From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <SRS0=hNXJ=MR=gmail.com=richard.guenther@sourceware.org>
Received: from mail-lj1-x230.google.com (mail-lj1-x230.google.com [IPv6:2a00:1450:4864:20::230])
	by sourceware.org (Postfix) with ESMTPS id EC0DB3849ADC
	for <gcc-patches@gcc.gnu.org>; Tue, 14 May 2024 13:18:17 +0000 (GMT)
DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org EC0DB3849ADC
Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=gmail.com
Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=gmail.com
ARC-Filter: OpenARC Filter v1.0.0 sourceware.org EC0DB3849ADC
Authentication-Results: server2.sourceware.org; arc=none smtp.remote-ip=2a00:1450:4864:20::230
ARC-Seal: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1715692700; cv=none;
	b=aViySlxmHpU7n/K4uQAHJRuKaeewmYQnFwNEzOmJpM2X/3BB7dbgPl3ucmycnlQAYzHCcDv9bGuqeeuQyfrUKJVCZFO9dJhRgElk7SHBgCPxx+xR9v4w2sNE2EHdqxiGuzSz7yvClqZMhZV3aJDHXoQ1TfzaP3uV22A7eqzXQ6s=
ARC-Message-Signature: i=1; a=rsa-sha256; d=sourceware.org; s=key;
	t=1715692700; c=relaxed/simple;
	bh=lO/jzbGyWLuXzDrs0wseTnu1maaxZixARUz1Qh5n+/g=;
	h=DKIM-Signature:MIME-Version:From:Date:Message-ID:Subject:To; b=bYG6sEkj676JqK0shb0bl5k08BsfdGQoJo3CUzoVs6ifdVSa8hj/gvp4JC/yqKIKZ0jjxlxHFJUiza0bU7R4Zm3eY46HlIK9BRXYQ7N6dpsGw/2VNkK6uG/MHWw9MFeeAulAlX8yPvljPYyPOGkcxD+DFz2M7oc22ZOQRvoRwV4=
ARC-Authentication-Results: i=1; server2.sourceware.org
Received: by mail-lj1-x230.google.com with SMTP id 38308e7fff4ca-2df848f9325so70593381fa.1
        for <gcc-patches@gcc.gnu.org>; Tue, 14 May 2024 06:18:17 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=gmail.com; s=20230601; t=1715692696; x=1716297496; darn=gcc.gnu.org;
        h=content-transfer-encoding:cc:to:subject:message-id:date:from
         :in-reply-to:references:mime-version:from:to:cc:subject:date
         :message-id:reply-to;
        bh=/xjCKQuha7hVow3jqNlMQQrD3zT6Ykl3WSgG/f7FlA4=;
        b=Z4jr7uUrYEP5zngMi65cKEwW0mQ7zXToS2+YWJimu8AZA6WuL4c/yJuhQBSeJSLeNw
         7QFBT35SLP10dp4m2Hofk7lpmp0VjytULjzFa75iHorasAT/dKZmQLx7n0x53ndGyE2R
         MuDY9k6HW9tttHVdpnZnjOeAlphPwEhffVGZS8pQsuyawTMrGztt9K1meHSAxHiwjLtF
         LLIxKFoBmxZPSvCOWFn//O+ULH9OYX/JzAORpb/MeVqlvjJBlPRHXcNLziiIzBcVYoav
         G0i5xYIvpYeSbaQKlQ1XqPKLmM+K8SAkdejPyQaIpJisIhgZHLpkf1Ujdnxk6eOlyl/B
         gQgQ==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=1e100.net; s=20230601; t=1715692696; x=1716297496;
        h=content-transfer-encoding:cc:to:subject:message-id:date:from
         :in-reply-to:references:mime-version:x-gm-message-state:from:to:cc
         :subject:date:message-id:reply-to;
        bh=/xjCKQuha7hVow3jqNlMQQrD3zT6Ykl3WSgG/f7FlA4=;
        b=twL5TxNRdMjlAMewkmAzDJtC6SInV97xgabfwMsV4aN4to/15i/+hj2VgbMoMmHutr
         Cf5mbACn3xR04ODXdCmRf6JpPaxeZ8iknWyRH0G+Y7zrG+RoDrCHwDwnimv5+W9WoaLd
         xElEUzvHCFMvEL/N82vbyOLZqk7fwe2LoAWkrjz2M+84OuxKp5Y/y4udjLca7XbIB9TJ
         HaIJGL4Ce1g+Ra4yBp4teFKqMp8c9hcD9AttebasTSwqZScUlbuwpZcSwlAwlz/Wd5SD
         OuY2JJARGiFqRbxyXz7Y1etUf1TdVcrZEu2wHYOsRBv1iuDs3IFlBlr3n7aLkNlcDEDh
         xEEQ==
X-Gm-Message-State: AOJu0YzI/XmwxLhMddZShUvc62r8VaGPrl/yGIi1PgZ8BWzHr5UYRJng
	JgnT9+51JgLSJxzGMPzeBOgVdaLVitRL/s0/SbunVnvr8H8On+0jUXkj+fiQ65RhIxiJdQWTj6E
	l1Pum2WdNpDIBwBsGohyjAToOv5E=
X-Google-Smtp-Source: AGHT+IGzlbjTvC53z+w9h64QMQdsn2DMz29PfLpaWCydadcOd1uSHxd81GkArvu0KACaSBWdbpS1gzKessUcnx2xbgY=
X-Received: by 2002:a05:651c:a10:b0:2e1:bd06:51b0 with SMTP id
 38308e7fff4ca-2e52039c4d7mr98501721fa.35.1715692695351; Tue, 14 May 2024
 06:18:15 -0700 (PDT)
MIME-Version: 1.0
References: <20240406120755.2692291-1-pan2.li@intel.com> <20240506144805.725379-1-pan2.li@intel.com>
In-Reply-To: <20240506144805.725379-1-pan2.li@intel.com>
From: Richard Biener <richard.guenther@gmail.com>
Date: Tue, 14 May 2024 15:18:04 +0200
Message-ID: <CAFiYyc35L1ec9vqXM-pCix44jV9o1R3RMC_NvPm3i_x4b_NRPQ@mail.gmail.com>
Subject: Re: [PATCH v4 1/3] Internal-fn: Support new IFN SAT_ADD for unsigned
 scalar int
To: pan2.li@intel.com
Cc: gcc-patches@gcc.gnu.org, juzhe.zhong@rivai.ai, kito.cheng@gmail.com, 
	tamar.christina@arm.com, hongtao.liu@intel.com
Content-Type: text/plain; charset="UTF-8"
Content-Transfer-Encoding: quoted-printable
X-Spam-Status: No, score=-7.7 required=5.0 tests=BAYES_00,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,FREEMAIL_FROM,GIT_PATCH_0,RCVD_IN_DNSWL_NONE,SPF_HELO_NONE,SPF_PASS,TXREP autolearn=ham autolearn_force=no version=3.4.6
X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org
List-Id: <gcc-patches.gcc.gnu.org>

On Mon, May 6, 2024 at 4:48=E2=80=AFPM <pan2.li@intel.com> wrote:
>
> From: Pan Li <pan2.li@intel.com>
>
> This patch would like to add the middle-end presentation for the
> saturation add.  Aka set the result of add to the max when overflow.
> It will take the pattern similar as below.
>
> SAT_ADD (x, y) =3D> (x + y) | (-(TYPE)((TYPE)(x + y) < x))
>
> Take uint8_t as example, we will have:
>
> * SAT_ADD (1, 254)   =3D> 255.
> * SAT_ADD (1, 255)   =3D> 255.
> * SAT_ADD (2, 255)   =3D> 255.
> * SAT_ADD (255, 255) =3D> 255.
>
> Given below example for the unsigned scalar integer uint64_t:
>
> uint64_t sat_add_u64 (uint64_t x, uint64_t y)
> {
>   return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x));
> }
>
> Before this patch:
> uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
> {
>   long unsigned int _1;
>   _Bool _2;
>   long unsigned int _3;
>   long unsigned int _4;
>   uint64_t _7;
>   long unsigned int _10;
>   __complex__ long unsigned int _11;
>
> ;;   basic block 2, loop depth 0
> ;;    pred:       ENTRY
>   _11 =3D .ADD_OVERFLOW (x_5(D), y_6(D));
>   _1 =3D REALPART_EXPR <_11>;
>   _10 =3D IMAGPART_EXPR <_11>;
>   _2 =3D _10 !=3D 0;
>   _3 =3D (long unsigned int) _2;
>   _4 =3D -_3;
>   _7 =3D _1 | _4;
>   return _7;
> ;;    succ:       EXIT
>
> }
>
> After this patch:
> uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
> {
>   uint64_t _7;
>
> ;;   basic block 2, loop depth 0
> ;;    pred:       ENTRY
>   _7 =3D .SAT_ADD (x_5(D), y_6(D)); [tail call]
>   return _7;
> ;;    succ:       EXIT
> }
>
> We perform the tranform during widen_mult because that the sub-expr of
> SAT_ADD will be optimized to .ADD_OVERFLOW.  We need to try the .SAT_ADD
> pattern first and then .ADD_OVERFLOW,  or we may never catch the pattern
> .SAT_ADD.  Meanwhile, the isel pass is after widen_mult and then we
> cannot perform the .SAT_ADD pattern match as the sub-expr will be
> optmized to .ADD_OVERFLOW first.
>
> The below tests are passed for this patch:
> 1. The riscv fully regression tests.
> 2. The aarch64 fully regression tests.
> 3. The x86 bootstrap tests.
> 4. The x86 fully regression tests.
>
>         PR target/51492
>         PR target/112600
>
> gcc/ChangeLog:
>
>         * internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD
>         to the return true switch case(s).
>         * internal-fn.def (SAT_ADD):  Add new signed optab SAT_ADD.
>         * match.pd: Add unsigned SAT_ADD match.
>         * optabs.def (OPTAB_NL): Remove fixed-point limitation for us/ssa=
dd.
>         * tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New ex=
tern
>         func decl generated in match.pd match.
>         (match_saturation_arith): New func impl to match the saturation a=
rith.
>         (math_opts_dom_walker::after_dom_children): Try match saturation
>         arith.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> ---
>  gcc/internal-fn.cc        |  1 +
>  gcc/internal-fn.def       |  2 ++
>  gcc/match.pd              | 28 ++++++++++++++++++++++++
>  gcc/optabs.def            |  4 ++--
>  gcc/tree-ssa-math-opts.cc | 46 +++++++++++++++++++++++++++++++++++++++
>  5 files changed, 79 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index 0a7053c2286..73045ca8c8c 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn)
>      case IFN_UBSAN_CHECK_MUL:
>      case IFN_ADD_OVERFLOW:
>      case IFN_MUL_OVERFLOW:
> +    case IFN_SAT_ADD:
>      case IFN_VEC_WIDEN_PLUS:
>      case IFN_VEC_WIDEN_PLUS_LO:
>      case IFN_VEC_WIDEN_PLUS_HI:
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 848bb9dbff3..25badbb86e5 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST | ECF_=
NOTHROW, first,
>  DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first,
>                               smulhrs, umulhrs, binary)
>
> +DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, b=
inary)
> +
>  DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
>  DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
>  DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary)
> diff --git a/gcc/match.pd b/gcc/match.pd
> index d401e7503e6..7058e4cbe29 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3043,6 +3043,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>         || POINTER_TYPE_P (itype))
>        && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))))))
>
> +/* Unsigned Saturation Add */
> +(match (usadd_left_part @0 @1)
> + (plus:c @0 @1)
> + (if (INTEGRAL_TYPE_P (type)
> +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@1)))))
> +
> +(match (usadd_right_part @0 @1)
> + (negate (convert (lt (plus:c @0 @1) @0)))
> + (if (INTEGRAL_TYPE_P (type)
> +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@1)))))
> +
> +(match (usadd_right_part @0 @1)
> + (negate (convert (gt @0 (plus:c @0 @1))))
> + (if (INTEGRAL_TYPE_P (type)
> +      && TYPE_UNSIGNED (TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@0))
> +      && types_match (type, TREE_TYPE (@1)))))
> +
> +/* Unsigned saturation add, case 1 (branchless):
> +   SAT_U_ADD =3D (X + Y) | - ((X + Y) < X) or
> +   SAT_U_ADD =3D (X + Y) | - (X > (X + Y)).  */
> +(match (unsigned_integer_sat_add @0 @1)
> + (bit_ior:c (usadd_left_part @0 @1) (usadd_right_part @0 @1)))
> +
>  /* x >  y  &&  x !=3D XXX_MIN  -->  x > y
>     x >  y  &&  x =3D=3D XXX_MIN  -->  false . */
>  (for eqne (eq ne)
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index ad14f9328b9..3f2cb46aff8 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -111,8 +111,8 @@ OPTAB_NX(add_optab, "add$F$a3")
>  OPTAB_NX(add_optab, "add$Q$a3")
>  OPTAB_VL(addv_optab, "addv$I$a3", PLUS, "add", '3', gen_intv_fp_libfunc)
>  OPTAB_VX(addv_optab, "add$F$a3")
> -OPTAB_NL(ssadd_optab, "ssadd$Q$a3", SS_PLUS, "ssadd", '3', gen_signed_fi=
xed_libfunc)
> -OPTAB_NL(usadd_optab, "usadd$Q$a3", US_PLUS, "usadd", '3', gen_unsigned_=
fixed_libfunc)
> +OPTAB_NL(ssadd_optab, "ssadd$a3", SS_PLUS, "ssadd", '3', gen_signed_fixe=
d_libfunc)
> +OPTAB_NL(usadd_optab, "usadd$a3", US_PLUS, "usadd", '3', gen_unsigned_fi=
xed_libfunc)
>  OPTAB_NL(sub_optab, "sub$P$a3", MINUS, "sub", '3', gen_int_fp_fixed_libf=
unc)
>  OPTAB_NX(sub_optab, "sub$F$a3")
>  OPTAB_NX(sub_optab, "sub$Q$a3")
> diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
> index 705f4a4695a..35a46edc9f6 100644
> --- a/gcc/tree-ssa-math-opts.cc
> +++ b/gcc/tree-ssa-math-opts.cc
> @@ -4026,6 +4026,44 @@ arith_overflow_check_p (gimple *stmt, gimple *cast=
_stmt, gimple *&use_stmt,
>    return 0;
>  }
>
> +extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree)=
);
> +
> +/*
> + * Try to match saturation arith pattern(s).
> + *   1. SAT_ADD (unsigned)
> + *      _7 =3D _4 + _6;
> + *      _8 =3D _4 > _7;
> + *      _9 =3D (long unsigned int) _8;
> + *      _10 =3D -_9;
> + *      _12 =3D _7 | _10;
> + *      =3D>
> + *      _12 =3D .SAT_ADD (_4, _6);  */
> +static bool
> +match_saturation_arith (gimple_stmt_iterator *gsi, gimple *stmt,
> +                       bool *cfg_changed_p)
> +{
> +  gcall *call =3D NULL;
> +  bool changed_p =3D false;
> +
> +  gcc_assert (is_gimple_assign (stmt));

If you require a gassign please statically type your function
argument as gassign * instead and remove this assert.

> +
> +  tree ops[2];
> +  tree lhs =3D gimple_assign_lhs (stmt);
> +
> +  if (gimple_unsigned_integer_sat_add (lhs, ops, NULL)
> +      && direct_internal_fn_supported_p (IFN_SAT_ADD, TREE_TYPE (lhs),
> +                                       OPTIMIZE_FOR_SPEED))
> +    {
> +      call =3D gimple_build_call_internal (IFN_SAT_ADD, 2, ops[0], ops[1=
]);
> +      gimple_call_set_lhs (call, lhs);
> +      gsi_replace (gsi, call, true);
> +      changed_p =3D true;
> +      *cfg_changed_p =3D changed_p;

As addition to Tamars good comments why do you set *cfg_changed_p to
true?  You are
not changing the CFG afer all?

> +    }
> +
> +  return changed_p;
> +}
> +
>  /* Recognize for unsigned x
>     x =3D y - z;
>     if (x > y)
> @@ -5886,6 +5924,14 @@ math_opts_dom_walker::after_dom_children (basic_bl=
ock bb)
>
>    fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
>
> +  for (gsi =3D gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> +    {
> +      gimple *stmt =3D gsi_stmt (gsi);
> +
> +      if (is_gimple_assign (stmt))
> +       match_saturation_arith (&gsi, stmt, m_cfg_changed_p);
> +    }
> +
>    for (gsi =3D gsi_after_labels (bb); !gsi_end_p (gsi);)
>      {
>        gimple *stmt =3D gsi_stmt (gsi);
> --
> 2.34.1
>