From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mgamail.intel.com (mgamail.intel.com [192.55.52.120]) by sourceware.org (Postfix) with ESMTPS id 5B31D385F00A for ; Thu, 21 Sep 2023 07:25:04 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 5B31D385F00A Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=intel.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=intel.com DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1695281104; x=1726817104; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=+h7ejzRILbymkOAc+/sXHVhCvFXzh6GvpdnD6gvJtEY=; b=a+HToQxA7z2SAN5BI81mCAEiW5REZ5BxbghjcGuCIR+8vmen6CfcB/iN ZJHOuwJ1N9n00YO9oNTl/gmwzru4u6IcQWcRkKQBA350Yw6gl3LwAU4RJ azkkOp7Tfskx4HS9pfY1qGHpHEV5pxOCJ74GE+cAFWWmFxEZ3+snrhne3 U6w43rXqXDOEcB7wYURGUx8C4zfJMjtiaPgIAby1LlbOIyp5glp3fXq4C XiQfwcbPXHRzy0+hWCd8VVxz7DyroTo0+/avSDeazpKkz9Qvc/kcwo683 tbcQVm/RATjM4I7F35vx0gT+tgDRzqcR9SkYd8oDXPV4J/8BYsIKiV3T6 w==; X-IronPort-AV: E=McAfee;i="6600,9927,10839"; a="379326676" X-IronPort-AV: E=Sophos;i="6.03,164,1694761200"; d="scan'208";a="379326676" Received: from fmsmga004.fm.intel.com ([10.253.24.48]) by fmsmga104.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 21 Sep 2023 00:22:18 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10839"; a="817262183" X-IronPort-AV: E=Sophos;i="6.03,164,1694761200"; d="scan'208";a="817262183" Received: from shvmail03.sh.intel.com ([10.239.245.20]) by fmsmga004.fm.intel.com with ESMTP; 21 Sep 2023 00:22:15 -0700 Received: from shliclel4217.sh.intel.com (shliclel4217.sh.intel.com [10.239.240.127]) by shvmail03.sh.intel.com (Postfix) with ESMTP id 54C93100513A; Thu, 21 Sep 2023 15:22:14 +0800 (CST) From: "Hu, Lin1" To: gcc-patches@gcc.gnu.org Cc: hongtao.liu@intel.com, ubizjak@gmail.com, haochen.jiang@intel.com Subject: [PATCH 12/18] Disable zmm register and 512 bit libmvec call when !TARGET_EVEX512 Date: Thu, 21 Sep 2023 15:20:07 +0800 Message-Id: <20230921072013.2124750-13-lin1.hu@intel.com> X-Mailer: git-send-email 2.31.1 In-Reply-To: <20230921072013.2124750-1-lin1.hu@intel.com> References: <20230921072013.2124750-1-lin1.hu@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Spam-Status: No, score=-11.3 required=5.0 tests=BAYES_00,DKIMWL_WL_HIGH,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,GIT_PATCH_0,KAM_NUMSUBJECT,SPF_HELO_NONE,SPF_NONE,TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: From: Haochen Jiang gcc/ChangeLog: * config/i386/i386-expand.cc (ix86_broadcast_from_constant): Disable zmm broadcast for !TARGET_EVEX512. * config/i386/i386-options.cc (ix86_option_override_internal): Do not use PVW_512 when no-evex512. (ix86_simd_clone_adjust): Add evex512 target into string. * config/i386/i386.cc (type_natural_mode): Report ABI warning when using zmm register w/o evex512. (ix86_return_in_memory): Do not allow zmm when !TARGET_EVEX512. (ix86_hard_regno_mode_ok): Ditto. (ix86_set_reg_reg_cost): Ditto. (ix86_rtx_costs): Ditto. (ix86_vector_mode_supported_p): Ditto. (ix86_preferred_simd_mode): Ditto. (ix86_get_mask_mode): Ditto. (ix86_simd_clone_compute_vecsize_and_simdlen): Disable 512 bit libmvec call when !TARGET_EVEX512. (ix86_simd_clone_usable): Ditto. * config/i386/i386.h (BIGGEST_ALIGNMENT): Disable 512 alignment when !TARGET_EVEX512 (MOVE_MAX): Do not use PVW_512 when !TARGET_EVEX512. (STORE_MAX_PIECES): Ditto. --- gcc/config/i386/i386-expand.cc | 1 + gcc/config/i386/i386-options.cc | 14 +++++---- gcc/config/i386/i386.cc | 53 ++++++++++++++++++--------------- gcc/config/i386/i386.h | 7 +++-- 4 files changed, 42 insertions(+), 33 deletions(-) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index e42ff27c6ef..6eedcb384c0 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -611,6 +611,7 @@ ix86_broadcast_from_constant (machine_mode mode, rtx op) avx512 embed broadcast is available. */ if (GET_MODE_INNER (mode) == DImode && !TARGET_64BIT && (!TARGET_AVX512F + || (GET_MODE_SIZE (mode) == 64 && !TARGET_EVEX512) || (GET_MODE_SIZE (mode) < 64 && !TARGET_AVX512VL))) return nullptr; diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index a1a7a92da9f..e2a90d7d9e2 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -2845,7 +2845,8 @@ ix86_option_override_internal (bool main_args_p, opts->x_ix86_move_max = opts->x_prefer_vector_width_type; if (opts_set->x_ix86_move_max == PVW_NONE) { - if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)) + if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) + && TARGET_EVEX512_P (opts->x_ix86_isa_flags2)) opts->x_ix86_move_max = PVW_AVX512; else opts->x_ix86_move_max = PVW_AVX128; @@ -2866,7 +2867,8 @@ ix86_option_override_internal (bool main_args_p, opts->x_ix86_store_max = opts->x_prefer_vector_width_type; if (opts_set->x_ix86_store_max == PVW_NONE) { - if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)) + if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) + && TARGET_EVEX512_P (opts->x_ix86_isa_flags2)) opts->x_ix86_store_max = PVW_AVX512; else opts->x_ix86_store_max = PVW_AVX128; @@ -3145,13 +3147,13 @@ ix86_simd_clone_adjust (struct cgraph_node *node) case 'e': if (TARGET_PREFER_AVX256) { - if (!TARGET_AVX512F) - str = "avx512f,prefer-vector-width=512"; + if (!TARGET_AVX512F || !TARGET_EVEX512) + str = "avx512f,evex512,prefer-vector-width=512"; else str = "prefer-vector-width=512"; } - else if (!TARGET_AVX512F) - str = "avx512f"; + else if (!TARGET_AVX512F || !TARGET_EVEX512) + str = "avx512f,evex512"; break; default: gcc_unreachable (); diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 477e6cecc38..0df3bf10547 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -1924,7 +1924,8 @@ type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum, if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type) && GET_MODE_INNER (mode) == innermode) { - if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU) + if (size == 64 && (!TARGET_AVX512F || !TARGET_EVEX512) + && !TARGET_IAMCU) { static bool warnedavx512f; static bool warnedavx512f_ret; @@ -4347,7 +4348,7 @@ ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) /* AVX512F values are returned in ZMM0 if available. */ if (size == 64) - return !TARGET_AVX512F; + return !TARGET_AVX512F || !TARGET_EVEX512; } if (mode == XFmode) @@ -20286,7 +20287,7 @@ ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode) - any of 512-bit wide vector mode - any scalar mode. */ if (TARGET_AVX512F - && (VALID_AVX512F_REG_OR_XI_MODE (mode) + && ((VALID_AVX512F_REG_OR_XI_MODE (mode) && TARGET_EVEX512) || VALID_AVX512F_SCALAR_MODE (mode))) return true; @@ -20538,7 +20539,7 @@ ix86_set_reg_reg_cost (machine_mode mode) case MODE_VECTOR_INT: case MODE_VECTOR_FLOAT: - if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode)) + if ((TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode)) || (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) || (TARGET_SSE && VALID_SSE_REG_MODE (mode)) @@ -21267,7 +21268,8 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, { /* (ior (not ...) ...) can be a single insn in AVX512. */ if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F - && (GET_MODE_SIZE (mode) == 64 + && ((TARGET_EVEX512 + && GET_MODE_SIZE (mode) == 64) || (TARGET_AVX512VL && (GET_MODE_SIZE (mode) == 32 || GET_MODE_SIZE (mode) == 16)))) @@ -21315,7 +21317,8 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, /* (and (not ...) (not ...)) can be a single insn in AVX512. */ if (GET_CODE (right) == NOT && TARGET_AVX512F - && (GET_MODE_SIZE (mode) == 64 + && ((TARGET_EVEX512 + && GET_MODE_SIZE (mode) == 64) || (TARGET_AVX512VL && (GET_MODE_SIZE (mode) == 32 || GET_MODE_SIZE (mode) == 16)))) @@ -21385,7 +21388,8 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, { /* (not (xor ...)) can be a single insn in AVX512. */ if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F - && (GET_MODE_SIZE (mode) == 64 + && ((TARGET_EVEX512 + && GET_MODE_SIZE (mode) == 64) || (TARGET_AVX512VL && (GET_MODE_SIZE (mode) == 32 || GET_MODE_SIZE (mode) == 16)))) @@ -23000,7 +23004,7 @@ ix86_vector_mode_supported_p (machine_mode mode) return true; if (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) return true; - if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode)) + if (TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode)) return true; if ((TARGET_MMX || TARGET_MMX_WITH_SSE) && VALID_MMX_REG_MODE (mode)) @@ -23690,7 +23694,7 @@ ix86_preferred_simd_mode (scalar_mode mode) switch (mode) { case E_QImode: - if (TARGET_AVX512BW && !TARGET_PREFER_AVX256) + if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256) return V64QImode; else if (TARGET_AVX && !TARGET_PREFER_AVX128) return V32QImode; @@ -23698,7 +23702,7 @@ ix86_preferred_simd_mode (scalar_mode mode) return V16QImode; case E_HImode: - if (TARGET_AVX512BW && !TARGET_PREFER_AVX256) + if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256) return V32HImode; else if (TARGET_AVX && !TARGET_PREFER_AVX128) return V16HImode; @@ -23706,7 +23710,7 @@ ix86_preferred_simd_mode (scalar_mode mode) return V8HImode; case E_SImode: - if (TARGET_AVX512F && !TARGET_PREFER_AVX256) + if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256) return V16SImode; else if (TARGET_AVX && !TARGET_PREFER_AVX128) return V8SImode; @@ -23714,7 +23718,7 @@ ix86_preferred_simd_mode (scalar_mode mode) return V4SImode; case E_DImode: - if (TARGET_AVX512F && !TARGET_PREFER_AVX256) + if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256) return V8DImode; else if (TARGET_AVX && !TARGET_PREFER_AVX128) return V4DImode; @@ -23728,15 +23732,16 @@ ix86_preferred_simd_mode (scalar_mode mode) { if (TARGET_PREFER_AVX128) return V8HFmode; - else if (TARGET_PREFER_AVX256) + else if (TARGET_PREFER_AVX256 || !TARGET_EVEX512) return V16HFmode; } - return V32HFmode; + if (TARGET_EVEX512) + return V32HFmode; } return word_mode; case E_SFmode: - if (TARGET_AVX512F && !TARGET_PREFER_AVX256) + if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256) return V16SFmode; else if (TARGET_AVX && !TARGET_PREFER_AVX128) return V8SFmode; @@ -23744,7 +23749,7 @@ ix86_preferred_simd_mode (scalar_mode mode) return V4SFmode; case E_DFmode: - if (TARGET_AVX512F && !TARGET_PREFER_AVX256) + if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256) return V8DFmode; else if (TARGET_AVX && !TARGET_PREFER_AVX128) return V4DFmode; @@ -23764,13 +23769,13 @@ ix86_preferred_simd_mode (scalar_mode mode) static unsigned int ix86_autovectorize_vector_modes (vector_modes *modes, bool all) { - if (TARGET_AVX512F && !TARGET_PREFER_AVX256) + if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256) { modes->safe_push (V64QImode); modes->safe_push (V32QImode); modes->safe_push (V16QImode); } - else if (TARGET_AVX512F && all) + else if (TARGET_AVX512F && TARGET_EVEX512 && all) { modes->safe_push (V32QImode); modes->safe_push (V16QImode); @@ -23808,7 +23813,7 @@ ix86_get_mask_mode (machine_mode data_mode) unsigned elem_size = vector_size / nunits; /* Scalar mask case. */ - if ((TARGET_AVX512F && vector_size == 64) + if ((TARGET_AVX512F && TARGET_EVEX512 && vector_size == 64) || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))) { if (elem_size == 4 @@ -24306,7 +24311,7 @@ ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, { /* If the function isn't exported, we can pick up just one ISA for the clones. */ - if (TARGET_AVX512F) + if (TARGET_AVX512F && TARGET_EVEX512) clonei->vecsize_mangle = 'e'; else if (TARGET_AVX2) clonei->vecsize_mangle = 'd'; @@ -24398,17 +24403,17 @@ ix86_simd_clone_usable (struct cgraph_node *node) return -1; if (!TARGET_AVX) return 0; - return TARGET_AVX512F ? 3 : TARGET_AVX2 ? 2 : 1; + return (TARGET_AVX512F && TARGET_EVEX512) ? 3 : TARGET_AVX2 ? 2 : 1; case 'c': if (!TARGET_AVX) return -1; - return TARGET_AVX512F ? 2 : TARGET_AVX2 ? 1 : 0; + return (TARGET_AVX512F && TARGET_EVEX512) ? 2 : TARGET_AVX2 ? 1 : 0; case 'd': if (!TARGET_AVX2) return -1; - return TARGET_AVX512F ? 1 : 0; + return (TARGET_AVX512F && TARGET_EVEX512) ? 1 : 0; case 'e': - if (!TARGET_AVX512F) + if (!TARGET_AVX512F || !TARGET_EVEX512) return -1; return 0; default: diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 3e8488f2ae8..aac972f5caf 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -770,7 +770,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); TARGET_ABSOLUTE_BIGGEST_ALIGNMENT. */ #define BIGGEST_ALIGNMENT \ - (TARGET_IAMCU ? 32 : (TARGET_AVX512F ? 512 : (TARGET_AVX ? 256 : 128))) + (TARGET_IAMCU ? 32 : ((TARGET_AVX512F && TARGET_EVEX512) \ + ? 512 : (TARGET_AVX ? 256 : 128))) /* Maximum stack alignment. */ #define MAX_STACK_ALIGNMENT MAX_OFILE_ALIGNMENT @@ -1807,7 +1808,7 @@ typedef struct ix86_args { MOVE_MAX_PIECES defaults to MOVE_MAX. */ #define MOVE_MAX \ - ((TARGET_AVX512F \ + ((TARGET_AVX512F && TARGET_EVEX512\ && (ix86_move_max == PVW_AVX512 \ || ix86_store_max == PVW_AVX512)) \ ? 64 \ @@ -1826,7 +1827,7 @@ typedef struct ix86_args { store_by_pieces of 16/32/64 bytes. */ #define STORE_MAX_PIECES \ (TARGET_INTER_UNIT_MOVES_TO_VEC \ - ? ((TARGET_AVX512F && ix86_store_max == PVW_AVX512) \ + ? ((TARGET_AVX512F && TARGET_EVEX512 && ix86_store_max == PVW_AVX512) \ ? 64 \ : ((TARGET_AVX \ && ix86_store_max >= PVW_AVX256) \ -- 2.31.1