From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <SRS0=acyz=E4=gmail.com=rdapp.gcc@sourceware.org>
Received: from mail-ej1-x62a.google.com (mail-ej1-x62a.google.com [IPv6:2a00:1450:4864:20::62a])
	by sourceware.org (Postfix) with ESMTPS id 159963858412
	for <gcc-patches@gcc.gnu.org>; Tue, 12 Sep 2023 08:19:05 +0000 (GMT)
DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 159963858412
Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=gmail.com
Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=gmail.com
Received: by mail-ej1-x62a.google.com with SMTP id a640c23a62f3a-99de884ad25so697048466b.3
        for <gcc-patches@gcc.gnu.org>; Tue, 12 Sep 2023 01:19:05 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=gmail.com; s=20221208; t=1694506744; x=1695111544; darn=gcc.gnu.org;
        h=content-transfer-encoding:in-reply-to:from:references:to
         :content-language:subject:cc:user-agent:mime-version:date:message-id
         :from:to:cc:subject:date:message-id:reply-to;
        bh=e+C2oyilEt+NPvPnAV2hXlQWk8sc9qhhAP3tImREw74=;
        b=cPTNEDD3vIgKmQRkOO2wGqHvZCL5OU9iD+CVsGVDKg5b2JJ+lfE1QvBcMtEjkmuzqA
         5DJaqpzatt1vD/Yl8VVzQJAcZRPHFXyiHesVk/L8kjS2L5rfCPMDh0sBbswwkmbXTghB
         6pKak2mME0cmCZl/XehPqjiulyWlse0RrdDiFXOIWNq+ARiIcVi9U3nCQo0mST/Ok78B
         nk6YKgzraFNk+Ap/0SHmUXCijsDTBxmGWfRtkepdY1JaUMVGP2cxnp7aaWSOPLwHGYRH
         cvtJROAnFHPTnj+PDSCm/Js2BVKjpYlinUXskePbjxgsewGhaPBsxfULOPCMsNklYqai
         rg1Q==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=1e100.net; s=20230601; t=1694506744; x=1695111544;
        h=content-transfer-encoding:in-reply-to:from:references:to
         :content-language:subject:cc:user-agent:mime-version:date:message-id
         :x-gm-message-state:from:to:cc:subject:date:message-id:reply-to;
        bh=e+C2oyilEt+NPvPnAV2hXlQWk8sc9qhhAP3tImREw74=;
        b=KOXc264baNkp/0dlzuHbWPswQmkTRQbUm/9us0nPTa3Ucc8b4xATNCbcMGpF6CkhYF
         cXBxpHuZzgNtNVsLtcHbRADVtUXcq0WGh69zEeIp5LCtfKhNnNlTXXEOTU3jCY9pa7oo
         4LV2Yms7Dmp+KB9V0sKeoZyba+MHDTvWNo0tXETljP453XihZM5kP/fHVA7yfFLbGVj3
         8TIJTcnW72/5jSP5dn6dfhiHGmVIPer98VN7BQXNvUa+7xK2kFn6+WKv6SCIvr/i3/Ij
         gnRbegvoWRTNOKCXMuVxx8cBUB3RgMKJROHPVQU9KAWt4W/Ff9FGvBPgi6lmpnoDXF4T
         RYzQ==
X-Gm-Message-State: AOJu0YyDEx0SkHmmeIyJlgQoXARtpf7mKtLzeyiy2cexi6ft7XACg0cJ
	lcoElPVm3IkPgsO5GqW0SUghwB4OwfQCRQ==
X-Google-Smtp-Source: AGHT+IGgfWEkDw+bkbNoj27Pi6EkwexEKSFzJkeaivw51V8jRnwNh9c9rYi5R4f9gB/G0VH9yp9xZQ==
X-Received: by 2002:a17:906:2101:b0:9a9:e1e4:e47b with SMTP id 1-20020a170906210100b009a9e1e4e47bmr10703383ejt.60.1694506743342;
        Tue, 12 Sep 2023 01:19:03 -0700 (PDT)
Received: from [192.168.1.23] (ip-046-005-130-086.um12.pools.vodafone-ip.de. [46.5.130.86])
        by smtp.gmail.com with ESMTPSA id y5-20020a1709064b0500b009a0955a7ad0sm6426947eju.128.2023.09.12.01.19.02
        (version=TLS1_3 cipher=TLS_AES_128_GCM_SHA256 bits=128/128);
        Tue, 12 Sep 2023 01:19:03 -0700 (PDT)
Message-ID: <6acec37d-162f-5aa2-3ce9-d10abbb5468f@gmail.com>
Date: Tue, 12 Sep 2023 10:19:02 +0200
MIME-Version: 1.0
User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101
 Thunderbird/102.13.0
Cc: rdapp.gcc@gmail.com, kito.cheng@gmail.com, kito.cheng@sifive.com,
 jeffreyalaw@gmail.com
Subject: Re: [PATCH V4] RISC-V: Support Dynamic LMUL Cost model
Content-Language: en-US
To: Juzhe-Zhong <juzhe.zhong@rivai.ai>, gcc-patches@gcc.gnu.org
References: <20230912064932.647337-1-juzhe.zhong@rivai.ai>
From: Robin Dapp <rdapp.gcc@gmail.com>
In-Reply-To: <20230912064932.647337-1-juzhe.zhong@rivai.ai>
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 7bit
X-Spam-Status: No, score=-4.0 required=5.0 tests=BAYES_00,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,FREEMAIL_FROM,NICE_REPLY_A,RCVD_IN_DNSWL_NONE,SPF_HELO_NONE,SPF_PASS,TXREP autolearn=ham autolearn_force=no version=3.4.6
X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org
List-Id: <gcc-patches.gcc.gnu.org>

Hi Juzhe,

> +max_number_of_live_regs (const basic_block bb,
> +			 const hash_map<tree, pair> &live_ranges,
> +			 unsigned int max_point, machine_mode biggest_mode,
> +			 int lmul)
> +{
> +  unsigned int max_nregs = 0;
> +  unsigned int i;
> +  unsigned int live_point = 0;
> +  auto_vec<unsigned int> live_vars_vec;
> +  live_vars_vec.safe_grow (max_point + 1, true);
> +  for (i = 0; i < live_vars_vec.length (); ++i)
> +    live_vars_vec[i] = 0;
> +  for (hash_map<tree, pair>::iterator iter = live_ranges.begin ();
> +       iter != live_ranges.end (); ++iter)
> +    {
> +      tree var = (*iter).first;
> +      pair live_range = (*iter).second;
> +      for (i = live_range.first; i <= live_range.second; i++)
> +	{
> +	  machine_mode mode = TYPE_MODE (TREE_TYPE (var));
> +	  unsigned int nregs
> +	    = compute_nregs_for_mode (mode, biggest_mode, lmul);
> +	  live_vars_vec[i] += nregs;
> +	  if (live_vars_vec[i] > max_nregs)
> +	    max_nregs = live_vars_vec[i];
> +	}
> +    }

My concern is that we have O(nm) here, where n = number of live_ranges
and m = size of live range.  In large basic blocks (think calculix of
SPECfp 2006 which can reach up to 2000 instructions IIRC) this might
become prohibitive.

I'm going to do a quick benchmark with calculix and report back.  If
there is no noticable difference we can ditch my idea.

For short live ranges (like < 10) the O(nm) could be better.  As of now,
we still calculate the nregs n*m times, though.  I have something like
the following in mind (it is definitely not shorter, though):

  struct range {
      unsigned int pt;
      bool start;
      unsigned int nregs;
  };

  auto_vec<range> ranges (2 * live_ranges.elements ());
  for (hash_map<tree, pair>::iterator iter = live_ranges.begin ();
       iter != live_ranges.end (); ++iter)
    {
      tree var = (*iter).first;
      machine_mode mode = TYPE_MODE (TREE_TYPE (var));
      unsigned int nregs
	  = compute_nregs_for_mode (mode, biggest_mode, lmul);
      ranges.quick_push ({(*iter).second.first, true, nregs});
      ranges.quick_push ({(*iter).second.second, false, nregs});
    }

  ranges.qsort ([] (const void *a, const void *b) -> int {
		unsigned int aa = ((const range *)a)->pt;
		unsigned int bb = ((const range *)b)->pt;
		if (aa < bb)
		  return -1;
		if (aa == bb)
		  return 0;
		return 1;
		});

  unsigned int cur = 0;
  max_nregs = ranges[0].nregs;

  for (auto r : ranges)
    {
      if (r.start)
	cur += r.nregs;
      else
	cur -= r.nregs;
      max_nregs = MAX (max_nregs, cur);
    }

> +  for (i = 0; i < cfun->gimple_df->ssa_names->length (); i++)
> +    {
> +      tree t = ssa_name (i);
> +      if (!t)
> +       continue;

Could likely be replaced by

  tree t;
  FOR_EACH_SSA_NAME (i, t, cfun)

> +static void
> +update_local_live_ranges (
> +  vec_info *vinfo,
> +  hash_map<basic_block, vec<stmt_point>> &program_points_per_bb,
> +  hash_map<basic_block, hash_map<tree, pair>> &live_ranges_per_bb)
> +{

I just realized (sorry) that this is "nested" a bit far.  Can we still
have e.g. 

> +  if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo))
> +    {

this,

> +	      if (STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info))
> +		  != undef_vec_info_type)

this,

> +		      if (live_range)
> +			{

and this just "continue"?

Apart from that, LGTM.

Regards
 Robin