From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-vs1-xe31.google.com (mail-vs1-xe31.google.com [IPv6:2607:f8b0:4864:20::e31]) by sourceware.org (Postfix) with ESMTPS id 821ED3858CDB for ; Tue, 24 Oct 2023 15:26:24 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 821ED3858CDB Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=gmail.com ARC-Filter: OpenARC Filter v1.0.0 sourceware.org 821ED3858CDB Authentication-Results: server2.sourceware.org; arc=none smtp.remote-ip=2607:f8b0:4864:20::e31 ARC-Seal: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1698161190; cv=none; b=KZkAvd3U2GDuWev+DytfugAfOZ/ZBw/vftv/e11O28ZwK1QjUnRh62QV0qXBCwuIdtsxvyL6De2024MJZHk8bnhkXZqv0Kcs46uQmwSDisfL/t+IF/k+P9MzgDJHvEdrANrDfblPMIfL7tlV+FALs3a06rGAdoALtsYve24Gywc= ARC-Message-Signature: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1698161190; c=relaxed/simple; bh=UUY64WSeeDhxdPAwi25WQl1PQeckPiEaCk+DeCee+3I=; h=DKIM-Signature:MIME-Version:From:Date:Message-ID:Subject:To; b=oCPGJopXHK3WKuubEWj0/Yt2GfskK1+FuuVNgUG/SVCF9JW0gJWDsQVSj1FqGls7nXkY2Z6lJ27Y/M8EBqzo4hBILnR08S1mjuazUCX9KkHeNt2L4o/wdtivw5rfpRxbpbvMMBmu6/nOtWDf6paqVQUBhK5eAJkONXoDY3HsX0c= ARC-Authentication-Results: i=1; server2.sourceware.org Received: by mail-vs1-xe31.google.com with SMTP id ada2fe7eead31-457bda4cf32so1672520137.3 for ; Tue, 24 Oct 2023 08:26:24 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20230601; t=1698161183; x=1698765983; darn=gcc.gnu.org; h=cc:to:subject:message-id:date:from:in-reply-to:references :mime-version:from:to:cc:subject:date:message-id:reply-to; bh=iLETNLr6jlQP3sOLYLI/AF7Hr2cstel+JePNjgTq9HY=; b=KWGXbdeHaCBVcPYc3I0m1Rz1pvwBPpNU9VI1ayNZL4jm/TNzjxrx8ZSFLxfpJE38uK xTFjMj6KLzE24wl8gFuHpEQ+LUnOzHwvE/1lxvgXxbTQbW28YatpHxh6jRwKBgOVv09G ICJmtBvuOjONh8mW1XW0V/1xpo5lnwBp/fM1h1i/n77pmxVZ6zngXWkQ7nE6nN4UIAJj BF3mZA4LEN4f0t+qpGGOYvBQA3DZP+PrDD9PIyqyqM7EuvT4mjM+0Cz/7I+kmU2zcfIM CuKNFwyBNe3WHdMDDnG1OON6cz9Y3WnmZn1cdTb58BWKT1M3QivbA3GPnvdB2tl84SsI aGgg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1698161183; x=1698765983; h=cc:to:subject:message-id:date:from:in-reply-to:references :mime-version:x-gm-message-state:from:to:cc:subject:date:message-id :reply-to; bh=iLETNLr6jlQP3sOLYLI/AF7Hr2cstel+JePNjgTq9HY=; b=weJw3XjaOKDt1s7qCrLAmZDx9NP/NL6Zqx53Nz7ef9cHpFNXp2TX6W3NGqQUBCbYzx qbYqeYxMEP5tUBolhZ/fUntiEn/+n5p2bsp89RUmjBepocVLoep6RI85+W1iRIziZurn w2/MKqGElKMzM42lQVJkDznG3TUcc4lmp88BD/N4zBzStxfK7dyR8p8ogghkJOnUaFcz gvwqEorB65scAoxRoRm3PzX2LBFh5Jv22vmn8gZn6qEyFOoLkEQXA326kwb8pamOF1fR kxM5Y3U1xtgM6fuIc5GXBmPfx6scwKmqUb7YVHAaaJKRCrw7UXugeNPmV8VQOLgh8yYW WabA== X-Gm-Message-State: AOJu0Yy7gUrfkaDRab+RUyuR9RENQYfSvyHE6szz8+2RBs2PS595yyww etoCFqyRMakKv+Teha1BzyXaUl/kee68Bkf2nGM= X-Google-Smtp-Source: AGHT+IGwllSlbKSVdu+EPt7m57LZ4AqGTEBdYJF23eZIkNNYknh2BWhyvDjLdBO/Upu/SSqrwYlrMuEWemDVwa+Ht60= X-Received: by 2002:a05:6102:1624:b0:45a:aab9:4301 with SMTP id cu36-20020a056102162400b0045aaab94301mr1873914vsb.8.1698161183354; Tue, 24 Oct 2023 08:26:23 -0700 (PDT) MIME-Version: 1.0 References: <20231024033200.224558-1-juzhe.zhong@rivai.ai> In-Reply-To: <20231024033200.224558-1-juzhe.zhong@rivai.ai> From: Kito Cheng Date: Tue, 24 Oct 2023 23:26:11 +0800 Message-ID: Subject: Re: [PATCH] RISC-V: Add AVL propagation PASS for RVV auto-vectorization To: Juzhe-Zhong Cc: gcc-patches@gcc.gnu.org, kito.cheng@sifive.com, jeffreyalaw@gmail.com, rdapp.gcc@gmail.com Content-Type: text/plain; charset="UTF-8" X-Spam-Status: No, score=-3.1 required=5.0 tests=BAYES_00,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,FREEMAIL_FROM,RCVD_IN_DNSWL_NONE,SPF_HELO_NONE,SPF_PASS,TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: > +using namespace rtl_ssa; > +using namespace riscv_vector; > + > +/* The AVL propagation instructions and corresponding preferred AVL. > + It will be updated during the analysis. */ > +static hash_map *avlprops; Maybe put into member data of pass_avlprop? > + > +const pass_data pass_data_avlprop = { > + RTL_PASS, /* type */ > + "avlprop", /* name */ > + OPTGROUP_NONE, /* optinfo_flags */ > + TV_NONE, /* tv_id */ > + 0, /* properties_required */ > + 0, /* properties_provided */ > + 0, /* properties_destroyed */ > + 0, /* todo_flags_start */ > + 0, /* todo_flags_finish */ > +}; > + > +class pass_avlprop : public rtl_opt_pass > +{ > +public: > + pass_avlprop (gcc::context *ctxt) : rtl_opt_pass (pass_data_avlprop, ctxt) {} > + > + /* opt_pass methods: */ > + virtual bool gate (function *) final override > + { > + return TARGET_VECTOR && optimize > 0; > + } > + virtual unsigned int execute (function *) final override; > +}; // class pass_avlprop > + > +static void > +avlprop_init (void) Maybe put into member function of pass_avlprop? > +{ > + calculate_dominance_info (CDI_DOMINATORS); > + df_analyze (); > + crtl->ssa = new function_info (cfun); And take function * from incomping parameter of execute > + avlprops = new hash_map; > +} > + > +static void > +avlprop_done (void) > +{ > + free_dominance_info (CDI_DOMINATORS); > + if (crtl->ssa->perform_pending_updates ()) > + cleanup_cfg (0); > + delete crtl->ssa; > + crtl->ssa = nullptr; > + delete avlprops; > + avlprops = NULL; > +} > + > +/* Helper function to get AVL operand. */ > +static rtx > +get_avl (insn_info *insn, bool avlprop_p) > +{ > + if (get_attr_avl_type (insn->rtl ()) == INVALID_ATTRIBUTE > + || get_attr_avl_type (insn->rtl ()) == VLS) > + return NULL_RTX; > + if (avlprop_p) > + { > + if (avlprops->get (insn)) > + return (*avlprops->get (insn)); > + else if (vlmax_avl_type_p (insn->rtl ())) > + return RVV_VLMAX; I guess I didn't get why we need handle vlmax_avl_type_p here? > + } > + extract_insn_cached (insn->rtl ()); > + return recog_data.operand[get_attr_vl_op_idx (insn->rtl ())]; > +} > + > +/* This is a straight forward pattern ALWAYS in paritial auto-vectorization: > + > + VL = SELECT_AVL (AVL, ...) > + V0 = MASK_LEN_LOAD (..., VL) > + V1 = MASK_LEN_LOAD (..., VL) > + V2 = V0 + V1 --- Missed LEN information. > + MASK_LEN_STORE (..., V2, VL) > + > + We prefer PLUS_EXPR (V0 + V1) instead of COND_LEN_ADD (V0, V1, dummy LEN) > + because: > + > + - Few code changes in Loop Vectorizer. > + - Reuse the current clean flow of partial vectorization, That is, apply > + predicate LEN or MASK into LOAD/STORE operations and other special > + arithmetic operations (e.d. DIV), then do the whole vector register > + operation if it DON'T affect the correctness. > + Such flow is used by all other targets like x86, sve, s390, ... etc. > + - PLUS_EXPR has better gimple optimizations than COND_LEN_ADD. > + > + We propagate AVL from NON-VLMAX to VLMAX for gimple IR like PLUS_EXPR which > + generates the VLMAX instruction due to missed LEN information. The later > + VSETVL PASS will elided the redundant vsetvls. > +*/ > + > +static rtx > +get_autovectorize_preferred_avl (insn_info *insn) > +{ > + if (!vlmax_avl_p (get_avl (insn, true)) || !tail_agnostic_p (insn->rtl ())) > + return NULL_RTX; I would prefer adding new attribute to let this become simpler. > + > + rtx use_avl = NULL_RTX; > + insn_info *avl_use_insn = nullptr; > + unsigned int ratio > + = calculate_ratio (get_sew (insn->rtl ()), get_vlmul (insn->rtl ())); > + for (def_info *def : insn->defs ()) > + { > + auto set = safe_dyn_cast (def); > + if (!set || !set->is_reg ()) > + return NULL_RTX; > + for (use_info *use : set->all_uses ()) > + { > + if (!use->is_in_nondebug_insn ()) > + return NULL_RTX; > + insn_info *use_insn = use->insn (); > + /* FIXME: Stop AVL propagation if any USE is not a RVV real > + instruction. It should be totally enough for vectorized codes since > + they always locate at extended blocks. > + > + TODO: We can extend PHI checking for intrinsic codes if it > + necessary in the future. */ > + if (use_insn->is_artificial () || !has_vtype_op (use_insn->rtl ())) > + return NULL_RTX; > + if (!has_vl_op (use_insn->rtl ())) > + continue; > + > + rtx new_use_avl = get_avl (use_insn, true); > + if (!new_use_avl) > + return NULL_RTX; > + if (!use_avl) > + use_avl = new_use_avl; > + if (!rtx_equal_p (use_avl, new_use_avl) > + || calculate_ratio (get_sew (use_insn->rtl ()), > + get_vlmul (use_insn->rtl ())) > + != ratio > + || vlmax_avl_p (new_use_avl) > + || !tail_agnostic_p (use_insn->rtl ())) > + return NULL_RTX; > + if (!avl_use_insn) > + avl_use_insn = use_insn; > + } > + } > + > + if (use_avl && register_operand (use_avl, Pmode)) > + { > + gcc_assert (avl_use_insn); > + // Find a definition at or neighboring INSN. > + resource_info resource = full_register (REGNO (use_avl)); > + def_lookup dl1 = crtl->ssa->find_def (resource, insn); > + def_lookup dl2 = crtl->ssa->find_def (resource, avl_use_insn); > + if (dl1.matching_set () || dl2.matching_set ()) > + return NULL_RTX; > + def_info *def1 = dl1.last_def_of_prev_group (); > + def_info *def2 = dl2.last_def_of_prev_group (); > + if (def1 != def2) > + return NULL_RTX; > + /* FIXME: We only all AVL propation within a block which should > + be totally enough for vectorized codes. > + > + TODO: We can enhance it here for intrinsic codes in the future > + if it is necessary. */ > + if (def1->insn ()->bb () != insn->bb () > + || def1->insn ()->compare_with (insn) >= 0) > + return NULL_RTX; > + } > + return use_avl; > +} > + > +/* If we have a preferred AVL to propagate, return the AVL. > + Otherwise, return NULL_RTX as we don't need have any preferred > + AVL. */ > + > +static rtx > +get_preferred_avl (insn_info *insn) > +{ > + /* TODO: We only do AVL propagation for missed-LEN partial > + autovectorization for now. We could add more more AVL > + propagation for intrinsic codes in the future. */ > + return get_autovectorize_preferred_avl (insn); > +} > + > +/* Return the AVL TYPE operand index. */ > +static int > +get_avl_type_index (insn_info *insn) > +{ > + extract_insn_cached (insn->rtl ()); > + /* Except rounding mode patterns, AVL TYPE operand > + is always the last operand. */ > + if (find_access (insn->uses (), VXRM_REGNUM) > + || find_access (insn->uses (), FRM_REGNUM)) > + return recog_data.n_operands - 2; > + return recog_data.n_operands - 1; Could we add some attribute like `vl_op_idx`? maintain this magic here is not good idea IMO. > +} > + > +/* Main entry point for this pass. */ > +unsigned int > +pass_avlprop::execute (function *) > +{ > + avlprop_init (); > + > + /* Go through all the instructions looking for AVL that we could propagate. */ > + > + insn_info *next; > + bool change_p = true; > + > + while (change_p) > + { > + /* Iterate on each instruction until no more change need. */ > + change_p = false; > + for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next) Backward should converge faster, also I suggest add a pre-scan pass to collect all candidate, and then iterate those candidate only. Maybe something like this: for each insn in reverse order: if insn is candidate: put insn to candidate list while (change_p) { for each insn in candidate: ... }