From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <SRS0=LWur=MJ=intel.com=pan2.li@sourceware.org>
Received: from mgamail.intel.com (mgamail.intel.com [192.198.163.7])
	by sourceware.org (Postfix) with ESMTPS id 6871B3858D1E
	for <gcc-patches@gcc.gnu.org>; Mon,  6 May 2024 14:49:34 +0000 (GMT)
DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 6871B3858D1E
Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=intel.com
Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=intel.com
ARC-Filter: OpenARC Filter v1.0.0 sourceware.org 6871B3858D1E
Authentication-Results: server2.sourceware.org; arc=none smtp.remote-ip=192.198.163.7
ARC-Seal: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1715006977; cv=none;
	b=KIAHyexZRrlw6IsyWFf9KVQ5T34bXno/AdqqXlMRaO/lAliH0n8OctbYB3rd7onreWz53GcEepDcUIwPS2gLlzn5PLib0qeof+I7J+B00EGBXowLrIz69h3iLue+wwEwh+4lBECbSLQgf+MGqAsvFPW6ksH7CovaUUaRhnGjhBY=
ARC-Message-Signature: i=1; a=rsa-sha256; d=sourceware.org; s=key;
	t=1715006977; c=relaxed/simple;
	bh=GPeL/yIl4hXgwZQKyFiysmhtPqnIIScvUHFtJqCIqWo=;
	h=DKIM-Signature:From:To:Subject:Date:Message-Id:MIME-Version; b=fiTzSg/o9dEDONJtqsAuvVSEO/ZF+9Lxtd2ZXImgxeFJNonMDNukZD+dHWosi2ueifprUswYQTUMzFZK7mIM2NcwuFEa39Y6Jhe2l6N3JOz1OV31Ik6e5M4AUJoJAUQl7xg4yFjRUImoD2khTLdPzk2GG6plQsdnr2K+tt48KRQ=
ARC-Authentication-Results: i=1; server2.sourceware.org
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple;
  d=intel.com; i=@intel.com; q=dns/txt; s=Intel;
  t=1715006975; x=1746542975;
  h=from:to:cc:subject:date:message-id:in-reply-to:
   references:mime-version:content-transfer-encoding;
  bh=GPeL/yIl4hXgwZQKyFiysmhtPqnIIScvUHFtJqCIqWo=;
  b=nc9VMW3hSINxN3E4XoOYBx/OHmTtftiDCWSA8rrOBzZXKms02W1gOsoq
   yEnGeptZNhdC00GmTxYRgp1Wruinanv5tVRMWECqpg1xqduAvyUkC7Juy
   UZi6bx0ySBxSH7z2Df8R2NLVsFheCyRLuNPbAYOUga3zw+RGnsp+OWaZc
   EHdXSEaxxYuMaK4zHkGCdN3I+UXnwVGG4wt+9t7QM7PLlSC6Dxok5gpGD
   KS/wxMiJdb07IsLNONL7BeIptiBT8dj7tUzSNfqw0EkjmWmsJY7IvUPQ9
   BT4w6E0i19yNrJcBnwvReHuoyMLhMOpb8xGdeuELYXNXfhpBe66IT9t/N
   w==;
X-CSE-ConnectionGUID: UItefp5hQA+lF/eLXWJAvg==
X-CSE-MsgGUID: opJMzzLLRvS5EaNAkTKPeQ==
X-IronPort-AV: E=McAfee;i="6600,9927,11065"; a="36140674"
X-IronPort-AV: E=Sophos;i="6.07,258,1708416000"; 
   d="scan'208";a="36140674"
Received: from orviesa004.jf.intel.com ([10.64.159.144])
  by fmvoesa101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 06 May 2024 07:49:33 -0700
X-CSE-ConnectionGUID: en9e2zHbTSut36ZMbpY5zQ==
X-CSE-MsgGUID: mKZsqyEuT/ydfrmqOi7d7A==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="6.07,258,1708416000"; 
   d="scan'208";a="32991552"
Received: from shvmail03.sh.intel.com ([10.239.245.20])
  by orviesa004.jf.intel.com with ESMTP; 06 May 2024 07:49:30 -0700
Received: from pli-ubuntu.sh.intel.com (pli-ubuntu.sh.intel.com [10.239.159.47])
	by shvmail03.sh.intel.com (Postfix) with ESMTP id 36D461007344;
	Mon,  6 May 2024 22:49:29 +0800 (CST)
From: pan2.li@intel.com
To: gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai,
	kito.cheng@gmail.com,
	tamar.christina@arm.com,
	richard.guenther@gmail.com,
	hongtao.liu@intel.com,
	Pan Li <pan2.li@intel.com>
Subject: [PATCH v4 2/3] VECT: Support new IFN SAT_ADD for unsigned vector int
Date: Mon,  6 May 2024 22:49:27 +0800
Message-Id: <20240506144927.726990-1-pan2.li@intel.com>
X-Mailer: git-send-email 2.34.1
In-Reply-To: <20240406120755.2692291-1-pan2.li@intel.com>
References: <20240406120755.2692291-1-pan2.li@intel.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Spam-Status: No, score=-11.7 required=5.0 tests=BAYES_00,DKIMWL_WL_HIGH,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,GIT_PATCH_0,KAM_SHORT,SPF_HELO_NONE,SPF_NONE,TXREP autolearn=ham autolearn_force=no version=3.4.6
X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org
List-Id: <gcc-patches.gcc.gnu.org>

From: Pan Li <pan2.li@intel.com>

This patch depends on below scalar enabling patch:

https://gcc.gnu.org/pipermail/gcc-patches/2024-May/650822.html

For vectorize, we leverage the existing vect pattern recog to find
the pattern similar to scalar and let the vectorizer to perform
the rest part for standard name usadd<mode>3 in vector mode.
The riscv vector backend have insn "Vector Single-Width Saturating
Add and Subtract" which can be leveraged when expand the usadd<mode>3
in vector mode.  For example:

void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  unsigned i;

  for (i = 0; i < n; i++)
    out[i] = (x[i] + y[i]) | (- (uint64_t)((uint64_t)(x[i] + y[i]) < x[i]));
}

Before this patch:
void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  ...
  _80 = .SELECT_VL (ivtmp_78, POLY_INT_CST [2, 2]);
  ivtmp_58 = _80 * 8;
  vect__4.7_61 = .MASK_LEN_LOAD (vectp_x.5_59, 64B, { -1, ... }, _80, 0);
  vect__6.10_65 = .MASK_LEN_LOAD (vectp_y.8_63, 64B, { -1, ... }, _80, 0);
  vect__7.11_66 = vect__4.7_61 + vect__6.10_65;
  mask__8.12_67 = vect__4.7_61 > vect__7.11_66;
  vect__12.15_72 = .VCOND_MASK (mask__8.12_67, { 18446744073709551615, ... }, vect__7.11_66);
  .MASK_LEN_STORE (vectp_out.16_74, 64B, { -1, ... }, _80, 0, vect__12.15_72);
  vectp_x.5_60 = vectp_x.5_59 + ivtmp_58;
  vectp_y.8_64 = vectp_y.8_63 + ivtmp_58;
  vectp_out.16_75 = vectp_out.16_74 + ivtmp_58;
  ivtmp_79 = ivtmp_78 - _80;
  ...
}

After this patch:
void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  ...
  _62 = .SELECT_VL (ivtmp_60, POLY_INT_CST [2, 2]);
  ivtmp_46 = _62 * 8;
  vect__4.7_49 = .MASK_LEN_LOAD (vectp_x.5_47, 64B, { -1, ... }, _62, 0);
  vect__6.10_53 = .MASK_LEN_LOAD (vectp_y.8_51, 64B, { -1, ... }, _62, 0);
  vect__12.11_54 = .SAT_ADD (vect__4.7_49, vect__6.10_53);
  .MASK_LEN_STORE (vectp_out.12_56, 64B, { -1, ... }, _62, 0, vect__12.11_54);
  ...
}

The below test suites are passed for this patch.
* The riscv fully regression tests.
* The aarch64 fully regression tests.
* The x86 bootstrap tests.
* The x86 fully regression tests.

	PR target/51492
	PR target/112600

gcc/ChangeLog:

	* tree-vect-patterns.cc (gimple_unsigned_integer_sat_add): New func
	decl generated by match.pd match.
	(vect_recog_sat_add_pattern): New func impl to recog the pattern
	for unsigned SAT_ADD.

Signed-off-by: Pan Li <pan2.li@intel.com>
---
 gcc/tree-vect-patterns.cc | 51 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 87c2acff386..8ffcaf71d5c 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -4487,6 +4487,56 @@ vect_recog_mult_pattern (vec_info *vinfo,
   return pattern_stmt;
 }
 
+extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
+
+/*
+ * Try to detect saturation add pattern (SAT_ADD), aka below gimple:
+ *   _7 = _4 + _6;
+ *   _8 = _4 > _7;
+ *   _9 = (long unsigned int) _8;
+ *   _10 = -_9;
+ *   _12 = _7 | _10;
+ *
+ * And then simplied to
+ *   _12 = .SAT_ADD (_4, _6);
+ */
+
+static gimple *
+vect_recog_sat_add_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
+			    tree *type_out)
+{
+  gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
+
+  if (!is_gimple_assign (last_stmt))
+    return NULL;
+
+  tree res_ops[2];
+  tree lhs = gimple_assign_lhs (last_stmt);
+
+  if (gimple_unsigned_integer_sat_add (lhs, res_ops, NULL))
+    {
+      tree itype = TREE_TYPE (res_ops[0]);
+      tree vtype = get_vectype_for_scalar_type (vinfo, itype);
+
+      if (vtype != NULL_TREE && direct_internal_fn_supported_p (
+	IFN_SAT_ADD, vtype, OPTIMIZE_FOR_SPEED))
+	{
+	  *type_out = vtype;
+	  gcall *call = gimple_build_call_internal (IFN_SAT_ADD, 2, res_ops[0],
+						    res_ops[1]);
+
+	  gimple_call_set_lhs (call, vect_recog_temp_ssa_var (itype, NULL));
+	  gimple_call_set_nothrow (call, /* nothrow_p */ false);
+	  gimple_set_location (call, gimple_location (last_stmt));
+
+	  vect_pattern_detected ("vect_recog_sat_add_pattern", last_stmt);
+	  return call;
+	}
+    }
+
+  return NULL;
+}
+
 /* Detect a signed division by a constant that wouldn't be
    otherwise vectorized:
 
@@ -6987,6 +7037,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
   { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
   { vect_recog_divmod_pattern, "divmod" },
   { vect_recog_mult_pattern, "mult" },
+  { vect_recog_sat_add_pattern, "sat_add" },
   { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
   { vect_recog_gcond_pattern, "gcond" },
   { vect_recog_bool_pattern, "bool" },
-- 
2.34.1