From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-lj1-x236.google.com (mail-lj1-x236.google.com [IPv6:2a00:1450:4864:20::236]) by sourceware.org (Postfix) with ESMTPS id E8EE63849ADC for ; Tue, 14 May 2024 13:22:11 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org E8EE63849ADC Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=gmail.com ARC-Filter: OpenARC Filter v1.0.0 sourceware.org E8EE63849ADC Authentication-Results: server2.sourceware.org; arc=none smtp.remote-ip=2a00:1450:4864:20::236 ARC-Seal: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1715692934; cv=none; b=KQLVeAeARu4g0Hi7s6wr3I7eyJjCBZbiWrpaJm8DE0zaD5PQ3k0CAt/5edn6WJ85YJxg6lzyTYbAGdU8Y3Zt7Pf404xXU8QVI8ulJH4u2AEiXG8+vDEZixvhIylU1Qa/wvL3u+/uvufF+4R0GlY4V6JkXriGwiDYoeSQcjuGBMM= ARC-Message-Signature: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1715692934; c=relaxed/simple; bh=mgMbrwrQQYySDv5P6tOZ3r0tCcMSeybAkv4GxQBTmGk=; h=DKIM-Signature:MIME-Version:From:Date:Message-ID:Subject:To; b=RWLmIyVVB8/onoQBeujLKUKC5NuOy4GqMFvy6X477cPQzw/E69fWCxi651drORci4OOY9Sc5RxqGiL2GsUcrw2UOa5vBXfefQxrDWeVu2DmqHE7/i4QM1dCv30fxefqNALPyZ5eMcxRr8QqKnhkEsm+EA4IqOFC7gh04eMngtLA= ARC-Authentication-Results: i=1; server2.sourceware.org Received: by mail-lj1-x236.google.com with SMTP id 38308e7fff4ca-2e0b2ddc5d1so82284461fa.3 for ; Tue, 14 May 2024 06:22:11 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1715692930; x=1716297730; darn=gcc.gnu.org; h=content-transfer-encoding:cc:to:subject:message-id:date:from :in-reply-to:references:mime-version:from:to:cc:subject:date :message-id:reply-to; bh=gDWUMvU8q9M6AdaRhgec0ULOXqlL6LgkCRBV8z5nxbs=; b=dwuu/BLCZ5sNymew3FJpOpEey3p3MvSKPBt/hPnRGqFiWddDG/Y9ktM8D7ncGi9kTI cRdIhVxrHWY8fPZz22m208qX1ExVZ2eM2Lyl+KrNn7yqTZzBv1C/wjZ0E1T8jZHban+d sjqTgXm9MDB/2FIGUNzQi/5dYvzvARe+vLyuO583M83GtOOYAsnLMtyXT50huG7j1QbH hvLxHusMaTix/ieC577PAmtOiuvMYtVj2VBdfMIveUQClp2xI5iPUcsycz5EkExRJEGx y2/cj//vpnCHkA6jQo7WVpn3wnHLijACNfQyn5BBZxsBRxmBXWryzpU6Xe3c7ezS5AZX DxNw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1715692930; x=1716297730; h=content-transfer-encoding:cc:to:subject:message-id:date:from :in-reply-to:references:mime-version:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=gDWUMvU8q9M6AdaRhgec0ULOXqlL6LgkCRBV8z5nxbs=; b=j4bYAq96SXFFa3r0PjGmNUJbV2N/5Uh8UjRgybD57fIXESTefbgBegDQopDyBTLY2o Yv1ds69icvlLxrOQeoe4wrVM/oIqzOSVFJvLAblRsqZzdI1LQEbJ5qqKQuJi0fauPPxT 47TBkDGTrkK/tZCiiSRlJ1NqUiyM7P6HlKsNVwng2gVNRJQqngx2faKn5IjmlVhiBvKW LwdpB9YY+qsy/kyv/iRUPATF0YKKOAEa3md3vn+7/ZDNOLZt+iLXGG3DaO2iGIdxsHJp Ov1P2+O+hVZSUpZVBBbvVbPVd/7Qc1k141NZLS0CuuASrq1k0x1Pcg6Tmfm57Aw4FN++ W5iQ== X-Gm-Message-State: AOJu0YwEff101h2FD45AAaIwVJywMa1pBpSGwuxsP1p+1XGmtC5F65bL 26MUWkXVdNuM6BslfZN4LQBuCTiiyPYhYyVDOblmyMx8RLoJ+y1iWWdvglORULaU7ZEqtEjF0B7 knIOvFzxvIeRIjvOlRbSVypPnGPW+PQ== X-Google-Smtp-Source: AGHT+IFEvPvw6vor1PAdh85rb1kXmsz7l0BWgVFsENyVhOXB9jmiPsVJNCMmCmrq+RtG2Gb0K15hDCPTtbZSe7YUgUQ= X-Received: by 2002:a2e:90c7:0:b0:2d4:3d86:54e2 with SMTP id 38308e7fff4ca-2e51ff5e37fmr79706291fa.27.1715692930055; Tue, 14 May 2024 06:22:10 -0700 (PDT) MIME-Version: 1.0 References: <20240406120755.2692291-1-pan2.li@intel.com> <20240506144927.726990-1-pan2.li@intel.com> In-Reply-To: <20240506144927.726990-1-pan2.li@intel.com> From: Richard Biener Date: Tue, 14 May 2024 15:21:59 +0200 Message-ID: Subject: Re: [PATCH v4 2/3] VECT: Support new IFN SAT_ADD for unsigned vector int To: pan2.li@intel.com Cc: gcc-patches@gcc.gnu.org, juzhe.zhong@rivai.ai, kito.cheng@gmail.com, tamar.christina@arm.com, hongtao.liu@intel.com Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-Spam-Status: No, score=-8.0 required=5.0 tests=BAYES_00,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,FREEMAIL_FROM,GIT_PATCH_0,KAM_SHORT,RCVD_IN_DNSWL_NONE,SPF_HELO_NONE,SPF_PASS,TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: On Mon, May 6, 2024 at 4:49=E2=80=AFPM wrote: > > From: Pan Li > > This patch depends on below scalar enabling patch: > > https://gcc.gnu.org/pipermail/gcc-patches/2024-May/650822.html > > For vectorize, we leverage the existing vect pattern recog to find > the pattern similar to scalar and let the vectorizer to perform > the rest part for standard name usadd3 in vector mode. > The riscv vector backend have insn "Vector Single-Width Saturating > Add and Subtract" which can be leveraged when expand the usadd3 > in vector mode. For example: > > void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n= ) > { > unsigned i; > > for (i =3D 0; i < n; i++) > out[i] =3D (x[i] + y[i]) | (- (uint64_t)((uint64_t)(x[i] + y[i]) < x[= i])); > } > > Before this patch: > void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n= ) > { > ... > _80 =3D .SELECT_VL (ivtmp_78, POLY_INT_CST [2, 2]); > ivtmp_58 =3D _80 * 8; > vect__4.7_61 =3D .MASK_LEN_LOAD (vectp_x.5_59, 64B, { -1, ... }, _80, 0= ); > vect__6.10_65 =3D .MASK_LEN_LOAD (vectp_y.8_63, 64B, { -1, ... }, _80, = 0); > vect__7.11_66 =3D vect__4.7_61 + vect__6.10_65; > mask__8.12_67 =3D vect__4.7_61 > vect__7.11_66; > vect__12.15_72 =3D .VCOND_MASK (mask__8.12_67, { 18446744073709551615, = ... }, vect__7.11_66); > .MASK_LEN_STORE (vectp_out.16_74, 64B, { -1, ... }, _80, 0, vect__12.15= _72); > vectp_x.5_60 =3D vectp_x.5_59 + ivtmp_58; > vectp_y.8_64 =3D vectp_y.8_63 + ivtmp_58; > vectp_out.16_75 =3D vectp_out.16_74 + ivtmp_58; > ivtmp_79 =3D ivtmp_78 - _80; > ... > } > > After this patch: > void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n= ) > { > ... > _62 =3D .SELECT_VL (ivtmp_60, POLY_INT_CST [2, 2]); > ivtmp_46 =3D _62 * 8; > vect__4.7_49 =3D .MASK_LEN_LOAD (vectp_x.5_47, 64B, { -1, ... }, _62, 0= ); > vect__6.10_53 =3D .MASK_LEN_LOAD (vectp_y.8_51, 64B, { -1, ... }, _62, = 0); > vect__12.11_54 =3D .SAT_ADD (vect__4.7_49, vect__6.10_53); > .MASK_LEN_STORE (vectp_out.12_56, 64B, { -1, ... }, _62, 0, vect__12.11= _54); > ... > } > > The below test suites are passed for this patch. > * The riscv fully regression tests. > * The aarch64 fully regression tests. > * The x86 bootstrap tests. > * The x86 fully regression tests. > > PR target/51492 > PR target/112600 > > gcc/ChangeLog: > > * tree-vect-patterns.cc (gimple_unsigned_integer_sat_add): New fu= nc > decl generated by match.pd match. > (vect_recog_sat_add_pattern): New func impl to recog the pattern > for unsigned SAT_ADD. > > Signed-off-by: Pan Li > --- > gcc/tree-vect-patterns.cc | 51 +++++++++++++++++++++++++++++++++++++++ > 1 file changed, 51 insertions(+) > > diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc > index 87c2acff386..8ffcaf71d5c 100644 > --- a/gcc/tree-vect-patterns.cc > +++ b/gcc/tree-vect-patterns.cc > @@ -4487,6 +4487,56 @@ vect_recog_mult_pattern (vec_info *vinfo, > return pattern_stmt; > } > > +extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)= ); > + > +/* > + * Try to detect saturation add pattern (SAT_ADD), aka below gimple: > + * _7 =3D _4 + _6; > + * _8 =3D _4 > _7; > + * _9 =3D (long unsigned int) _8; > + * _10 =3D -_9; > + * _12 =3D _7 | _10; > + * > + * And then simplied to > + * _12 =3D .SAT_ADD (_4, _6); > + */ > + > +static gimple * > +vect_recog_sat_add_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo, > + tree *type_out) > +{ > + gimple *last_stmt =3D STMT_VINFO_STMT (stmt_vinfo); > + > + if (!is_gimple_assign (last_stmt)) > + return NULL; > + > + tree res_ops[2]; > + tree lhs =3D gimple_assign_lhs (last_stmt); > + > + if (gimple_unsigned_integer_sat_add (lhs, res_ops, NULL)) > + { > + tree itype =3D TREE_TYPE (res_ops[0]); > + tree vtype =3D get_vectype_for_scalar_type (vinfo, itype); > + > + if (vtype !=3D NULL_TREE && direct_internal_fn_supported_p ( > + IFN_SAT_ADD, vtype, OPTIMIZE_FOR_SPEED)) Please break the line before the && instead, like if (vtype !=3D NULL_TREE && direct_internal_fn_supported_p (... Otherwise this is OK once 1/3 is approved. Thanks, Richard. > + { > + *type_out =3D vtype; > + gcall *call =3D gimple_build_call_internal (IFN_SAT_ADD, 2, res= _ops[0], > + res_ops[1]); > + > + gimple_call_set_lhs (call, vect_recog_temp_ssa_var (itype, NULL= )); > + gimple_call_set_nothrow (call, /* nothrow_p */ false); > + gimple_set_location (call, gimple_location (last_stmt)); > + > + vect_pattern_detected ("vect_recog_sat_add_pattern", last_stmt)= ; > + return call; > + } > + } > + > + return NULL; > +} > + > /* Detect a signed division by a constant that wouldn't be > otherwise vectorized: > > @@ -6987,6 +7037,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = =3D { > { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" }, > { vect_recog_divmod_pattern, "divmod" }, > { vect_recog_mult_pattern, "mult" }, > + { vect_recog_sat_add_pattern, "sat_add" }, > { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" }, > { vect_recog_gcond_pattern, "gcond" }, > { vect_recog_bool_pattern, "bool" }, > -- > 2.34.1 >