From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mgamail.intel.com (mgamail.intel.com [192.198.163.13]) by sourceware.org (Postfix) with ESMTPS id 52DE23882046 for ; Wed, 12 Jun 2024 12:37:59 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 52DE23882046 Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=intel.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=intel.com ARC-Filter: OpenARC Filter v1.0.0 sourceware.org 52DE23882046 Authentication-Results: server2.sourceware.org; arc=none smtp.remote-ip=192.198.163.13 ARC-Seal: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1718195882; cv=none; b=r52vF/nV56P7+SqRDS7X3Eqfjf9EIBB+EeepDMAU10w6ohd1q+oO9YzHp6BcyKrwqeIqzYXd3+saHgaE8K2nH15kmlzC0L3PcZWGIDepXlw+NTXUAYCt2L4eQohrN5LNkV/8X8DOHzOYpIdmZPJhvPoI5+cmtAVQzPSXEkjOu/E= ARC-Message-Signature: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1718195882; c=relaxed/simple; bh=ixDQoSYt49MdMAnm0HhTIuxVxseYFrwiDq7Gl1KXqno=; h=DKIM-Signature:From:To:Subject:Date:Message-Id:MIME-Version; b=gGFiq3eNJsVu8st/ELj7gCcV1DyVmoLIhGdMFRIR946FkzBuZO1/JY+T9Mq4GVWo6DCeC51wb3t+7kvVxpmkorfFK/wRozCvILW+lU7EMNnyv+RzkDLCpNSAtsPI2mmoXjsbdqsThLcXjcVL0mfwhOkmCZpvPiVyXiShR0Ir/rk= ARC-Authentication-Results: i=1; server2.sourceware.org DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1718195879; x=1749731879; h=from:to:cc:subject:date:message-id:mime-version: content-transfer-encoding; bh=ixDQoSYt49MdMAnm0HhTIuxVxseYFrwiDq7Gl1KXqno=; b=FlIBG5fM9fRzUHHJhknJSZaM64BckW8mjsxDgojYXShFiVcxKka24FBO tR8wlnMoFprtl6ON3Ab5D+Ozuw+Yg+IXKBq2uqQd1McfHEbbbM8EnlKF5 RhE2VIO4jOxLFqyUu63wPyjKoDaPKb0piBN0JOIEFzrXEKnYzPZqs0LIl wLZ5tjX4LGCpWiwJNAAk/9kkqW3b9GrNXKerdr2OjSrwF/DHVXZ7e5XU8 IpqSe4WOfopZKCdLpaCU9UOpZaJsp4tOJTx671o4GhhXoggbSW453/4p7 SRBsuzpZMI6ee0XLS+cv38FP2HjW78X/hXtD4a68f+KAvmCTkA79MB8A+ g==; X-CSE-ConnectionGUID: 90ipPvK8QsSNFJNLkpT56Q== X-CSE-MsgGUID: hTZFwFdqSCy7dhwjcS9nkw== X-IronPort-AV: E=McAfee;i="6700,10204,11101"; a="17877926" X-IronPort-AV: E=Sophos;i="6.08,233,1712646000"; d="scan'208";a="17877926" Received: from orviesa010.jf.intel.com ([10.64.159.150]) by fmvoesa107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 12 Jun 2024 05:37:58 -0700 X-CSE-ConnectionGUID: GUf9frjtRIKvTatKhhfpCg== X-CSE-MsgGUID: UrTJ2W7aTZisv1RvPlkdIQ== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="6.08,233,1712646000"; d="scan'208";a="39695343" Received: from shvmail02.sh.intel.com ([10.239.244.9]) by orviesa010.jf.intel.com with ESMTP; 12 Jun 2024 05:37:56 -0700 Received: from pli-ubuntu.sh.intel.com (pli-ubuntu.sh.intel.com [10.239.159.47]) by shvmail02.sh.intel.com (Postfix) with ESMTP id D86BA10085B9; Wed, 12 Jun 2024 20:37:54 +0800 (CST) From: pan2.li@intel.com To: gcc-patches@gcc.gnu.org Cc: juzhe.zhong@rivai.ai, kito.cheng@gmail.com, richard.guenther@gmail.com, jeffreyalaw@gmail.com, rdapp.gcc@gmail.com, Pan Li Subject: [PATCH v1] Match: Support more forms for the scalar unsigned .SAT_SUB Date: Wed, 12 Jun 2024 20:37:53 +0800 Message-Id: <20240612123753.201660-1-pan2.li@intel.com> X-Mailer: git-send-email 2.34.1 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Spam-Status: No, score=-11.4 required=5.0 tests=BAYES_00,DKIMWL_WL_HIGH,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,GIT_PATCH_0,SPF_HELO_NONE,SPF_NONE,TXREP,T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: From: Pan Li After we support the scalar unsigned form 1 and 2, we would like to introduce more forms include the branch and branchless. There are forms 3-10 list as below: Form 3: #define SAT_SUB_U_3(T) \ T sat_sub_u_3_##T (T x, T y) \ { \ return x > y ? x - y : 0; \ } Form 4: #define SAT_SUB_U_4(T) \ T sat_sub_u_4_##T (T x, T y) \ { \ return x >= y ? x - y : 0; \ } Form 5: #define SAT_SUB_U_5(T) \ T sat_sub_u_5_##T (T x, T y) \ { \ return x < y ? 0 : x - y; \ } Form 6: #define SAT_SUB_U_6(T) \ T sat_sub_u_6_##T (T x, T y) \ { \ return x <= y ? 0 : x - y; \ } Form 7: #define SAT_SUB_U_7(T) \ T sat_sub_u_7_##T (T x, T y) \ { \ T ret; \ T overflow = __builtin_sub_overflow (x, y, &ret); \ return ret & (T)(overflow - 1); \ } Form 8: #define SAT_SUB_U_8(T) \ T sat_sub_u_8_##T (T x, T y) \ { \ T ret; \ T overflow = __builtin_sub_overflow (x, y, &ret); \ return ret & (T)-(!overflow); \ } Form 9: #define SAT_SUB_U_9(T) \ T sat_sub_u_9_##T (T x, T y) \ { \ T ret; \ T overflow = __builtin_sub_overflow (x, y, &ret); \ return overflow ? 0 : ret; \ } Form 10: #define SAT_SUB_U_10(T) \ T sat_sub_u_10_##T (T x, T y) \ { \ T ret; \ T overflow = __builtin_sub_overflow (x, y, &ret); \ return !overflow ? ret : 0; \ } Take form 10 as example: SAT_SUB_U_10(uint64_t); Before this patch: uint8_t sat_sub_u_10_uint8_t (uint8_t x, uint8_t y) { unsigned char _1; unsigned char _2; uint8_t _3; __complex__ unsigned char _6; ;; basic block 2, loop depth 0 ;; pred: ENTRY _6 = .SUB_OVERFLOW (x_4(D), y_5(D)); _2 = IMAGPART_EXPR <_6>; if (_2 == 0) goto ; [50.00%] else goto ; [50.00%] ;; succ: 3 ;; 4 ;; basic block 3, loop depth 0 ;; pred: 2 _1 = REALPART_EXPR <_6>; ;; succ: 4 ;; basic block 4, loop depth 0 ;; pred: 2 ;; 3 # _3 = PHI <0(2), _1(3)> return _3; ;; succ: EXIT } After this patch: uint8_t sat_sub_u_10_uint8_t (uint8_t x, uint8_t y) { uint8_t _3; ;; basic block 2, loop depth 0 ;; pred: ENTRY _3 = .SAT_SUB (x_4(D), y_5(D)); [tail call] return _3; ;; succ: EXIT } The below test suites are passed for this patch: 1. The rv64gcv fully regression test with newlib. 2. The rv64gcv build with glibc. 3. The x86 bootstrap test. 4. The x86 fully regression test. gcc/ChangeLog: * match.pd: Add more match for unsigned sat_sub. * tree-ssa-math-opts.cc (match_unsigned_saturation_sub): Add new func impl to match phi node for .SAT_SUB. (math_opts_dom_walker::after_dom_children): Try match .SAT_SUB for the phi node, MULT_EXPR, BIT_XOR_EXPR and BIT_AND_EXPR. Signed-off-by: Pan Li --- gcc/match.pd | 25 +++++++++++++++++++++++-- gcc/tree-ssa-math-opts.cc | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index 5cfe81e80b3..66e411b3359 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3140,14 +3140,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) /* Unsigned saturation sub, case 1 (branch with gt): SAT_U_SUB = X > Y ? X - Y : 0 */ (match (unsigned_integer_sat_sub @0 @1) - (cond (gt @0 @1) (minus @0 @1) integer_zerop) + (cond^ (gt @0 @1) (minus @0 @1) integer_zerop) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) && types_match (type, @0, @1)))) /* Unsigned saturation sub, case 2 (branch with ge): SAT_U_SUB = X >= Y ? X - Y : 0. */ (match (unsigned_integer_sat_sub @0 @1) - (cond (ge @0 @1) (minus @0 @1) integer_zerop) + (cond^ (ge @0 @1) (minus @0 @1) integer_zerop) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) && types_match (type, @0, @1)))) @@ -3165,6 +3165,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) && types_match (type, @0, @1)))) +/* Unsigned saturation sub, case 5 (branchless bit_and with .SUB_OVERFLOW. */ +(match (unsigned_integer_sat_sub @0 @1) + (bit_and:c (realpart (IFN_SUB_OVERFLOW@2 @0 @1)) + (plus:c (imagpart @2) integer_minus_onep)) + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) + && types_match (type, @0, @1)))) + +/* Unsigned saturation sub, case 6 (branchless mult with .SUB_OVERFLOW. */ +(match (unsigned_integer_sat_sub @0 @1) + (mult:c (realpart (IFN_SUB_OVERFLOW@2 @0 @1)) + (bit_xor:c (imagpart @2) integer_onep)) + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) + && types_match (type, @0, @1)))) + +/* Unsigned saturation sub, case 7 (branch with .SUB_OVERFLOW. */ +(match (unsigned_integer_sat_sub @0 @1) + (cond^ (eq (imagpart (IFN_SUB_OVERFLOW@2 @0 @1)) integer_zerop) + (realpart @2) integer_zerop) + (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) + && types_match (type, @0, @1)))) + /* x > y && x != XXX_MIN --> x > y x > y && x == XXX_MIN --> false . */ (for eqne (eq ne) diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index fbb8e0ea306..05aa157611b 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -4186,6 +4186,36 @@ match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gassign *stmt) build_saturation_binary_arith_call (gsi, IFN_SAT_SUB, lhs, ops[0], ops[1]); } +/* + * Try to match saturation unsigned sub. + * [local count: 1073741824]: + * if (x_2(D) > y_3(D)) + * goto ; [50.00%] + * else + * goto ; [50.00%] + * + * [local count: 536870912]: + * _4 = x_2(D) - y_3(D); + * + * [local count: 1073741824]: + * # _1 = PHI <0(2), _4(3)> + * => + * [local count: 1073741824]: + * _1 = .SAT_SUB (x_2(D), y_3(D)); */ +static void +match_unsigned_saturation_sub (gimple_stmt_iterator *gsi, gphi *phi) +{ + if (gimple_phi_num_args (phi) != 2) + return; + + tree ops[2]; + tree phi_result = gimple_phi_result (phi); + + if (gimple_unsigned_integer_sat_sub (phi_result, ops, NULL)) + build_saturation_binary_arith_call (gsi, phi, IFN_SAT_SUB, phi_result, + ops[0], ops[1]); +} + /* Recognize for unsigned x x = y - z; if (x > y) @@ -6104,6 +6134,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb) { gimple_stmt_iterator gsi = gsi_start_bb (bb); match_unsigned_saturation_add (&gsi, psi.phi ()); + match_unsigned_saturation_sub (&gsi, psi.phi ()); } for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);) @@ -6129,6 +6160,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb) continue; } match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p); + match_unsigned_saturation_sub (&gsi, as_a (stmt)); break; case PLUS_EXPR: @@ -6167,6 +6199,7 @@ math_opts_dom_walker::after_dom_children (basic_block bb) break; case COND_EXPR: + case BIT_AND_EXPR: match_unsigned_saturation_sub (&gsi, as_a (stmt)); break; -- 2.34.1