From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail.loongson.cn (mail.loongson.cn [114.242.206.163]) by sourceware.org (Postfix) with ESMTP id 2C0193858037 for ; Fri, 26 Jan 2024 08:41:17 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 2C0193858037 Authentication-Results: sourceware.org; dmarc=none (p=none dis=none) header.from=loongson.cn Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=loongson.cn ARC-Filter: OpenARC Filter v1.0.0 sourceware.org 2C0193858037 Authentication-Results: server2.sourceware.org; arc=none smtp.remote-ip=114.242.206.163 ARC-Seal: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1706258486; cv=none; b=aw8XfKg/hdJ14JFcELHQmwVrh+j6/tfceXXkF+RXEm6naKN+dxaPXJmP+pGFQz55akFdAM8yhaLZdMLe4UCFW/7LBFkCfI213o5wXuPhUVDV/ZYPzApIwOY04VB4Qv+PYjumeweESIDNDQL+GjLCfhNO6gbTnL9TeKh0dDteK3Y= ARC-Message-Signature: i=1; a=rsa-sha256; d=sourceware.org; s=key; t=1706258486; c=relaxed/simple; bh=RxIaSxdTXHtT/h2Rec5iiW4HVL/uAD3385bqRoA4cD4=; h=From:To:Subject:Date:Message-Id:MIME-Version; b=G7P8igAbcGW0a1DW1v7/IiiHqIbY1CvmVFdslbfa3pIVgYmYU8FdVCPStQ409ptnBwyxb1bfX34V65TVSdPRGTpn2s2KeJzrDujf+siQIpQOiUsj+Ar/ON9hRlaFEIE7FBTYj9DyhwYu9cyf4JA01y4Mhiv1NpfUojsjrmzQ194= ARC-Authentication-Results: i=1; server2.sourceware.org Received: from loongson.cn (unknown [10.2.6.5]) by gateway (Coremail) with SMTP id _____8Axz+sqcLNlsCwGAA--.21687S3; Fri, 26 Jan 2024 16:41:14 +0800 (CST) Received: from 5.5.5 (unknown [10.2.6.5]) by localhost.localdomain (Coremail) with SMTP id AQAAf8AxX88pcLNll7gbAA--.54209S4; Fri, 26 Jan 2024 16:41:13 +0800 (CST) From: Li Wei To: gcc-patches@gcc.gnu.org Cc: xry111@xry111.site, i@xen0n.name, xuchenghua@loongson.cn, chenglulu@loongson.cn, Li Wei Subject: [PATCH v2] LoongArch: Adjust cost of vector_stmt that match multiply-add pattern. Date: Fri, 26 Jan 2024 16:41:11 +0800 Message-Id: <20240126084111.1811519-1-liwei@loongson.cn> X-Mailer: git-send-email 2.39.3 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-CM-TRANSID:AQAAf8AxX88pcLNll7gbAA--.54209S4 X-CM-SenderInfo: 5olzvxo6or00hjvr0hdfq/ X-Coremail-Antispam: 1Uk129KBj93XoW3Gr4kXF4kAFyxJrWUKw17urX_yoW7ZF1fpa nIkryftFW8A347G3Z7JFWrXr15A34Ik3W3WasIkry8Cw4UAa4aqw1Ut34UZF13Ja4jgr1S vr1kAF4DCa1kAagCm3ZEXasCq-sJn29KB7ZKAUJUUUUU529EdanIXcx71UUUUU7KY7ZEXa sCq-sGcSsGvfJ3Ic02F40EFcxC0VAKzVAqx4xG6I80ebIjqfuFe4nvWSU5nxnvy29KBjDU 0xBIdaVrnRJUUUkFb4IE77IF4wAFF20E14v26r1j6r4UM7CY07I20VC2zVCF04k26cxKx2 IYs7xG6rWj6s0DM7CIcVAFz4kK6r1Y6r17M28lY4IEw2IIxxk0rwA2F7IY1VAKz4vEj48v e4kI8wA2z4x0Y4vE2Ix0cI8IcVAFwI0_Gr0_Xr1l84ACjcxK6xIIjxv20xvEc7CjxVAFwI 0_Gr0_Cr1l84ACjcxK6I8E87Iv67AKxVW8Jr0_Cr1UM28EF7xvwVC2z280aVCY1x0267AK xVW8Jr0_Cr1UM2AIxVAIcxkEcVAq07x20xvEncxIr21l57IF6xkI12xvs2x26I8E6xACxx 1l5I8CrVACY4xI64kE6c02F40Ex7xfMcIj6xIIjxv20xvE14v26r106r15McIj6I8E87Iv 67AKxVW8JVWxJwAm72CE4IkC6x0Yz7v_Jr0_Gr1lF7xvr2IYc2Ij64vIr41l42xK82IYc2 Ij64vIr41l4I8I3I0E4IkC6x0Yz7v_Jr0_Gr1lx2IqxVAqx4xG67AKxVWUJVWUGwC20s02 6x8GjcxK67AKxVWUGVWUWwC2zVAF1VAY17CE14v26r126r1DMIIYrxkI7VAKI48JMIIF0x vE2Ix0cI8IcVAFwI0_Jr0_JF4lIxAIcVC0I7IYx2IY6xkF7I0E14v26r1j6r4UMIIF0xvE 42xK8VAvwI8IcIk0rVWUJVWUCwCI42IY6I8E87Iv67AKxVWUJVW8JwCI42IY6I8E87Iv6x kF7I0E14v26r1j6r4UYxBIdaVFxhVjvjDU0xZFpf9x07jnUUUUUUUU= X-Spam-Status: No, score=-13.4 required=5.0 tests=BAYES_00,GIT_PATCH_0,KAM_DMARC_STATUS,SPF_HELO_NONE,SPF_PASS,TXREP,T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: We found that when only 128-bit vectorization was enabled, 549.fotonik3d_r failed to vectorize effectively. For this reason, we adjust the cost of 128-bit vector_stmt that match the multiply-add pattern to facilitate 128-bit vectorization. The experimental results show that after the modification, 549.fotonik3d_r performance can be improved by 9.77% under the 128-bit vectorization option. gcc/ChangeLog: * config/loongarch/loongarch.cc (loongarch_multiply_add_p): New. (loongarch_vector_costs::add_stmt_cost): Adjust. gcc/testsuite/ChangeLog: * gfortran.dg/vect/vect-10.f90: New test. --- gcc/config/loongarch/loongarch.cc | 48 +++++++++++++++ gcc/testsuite/gfortran.dg/vect/vect-10.f90 | 71 ++++++++++++++++++++++ 2 files changed, 119 insertions(+) create mode 100644 gcc/testsuite/gfortran.dg/vect/vect-10.f90 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index b494040d165..4d99e30828b 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -4096,6 +4096,37 @@ loongarch_vector_costs::determine_suggested_unroll_factor (loop_vec_info loop_vi return 1 << ceil_log2 (uf); } +/* Check if assign stmt rhs op comes from a multiply-add operation. */ +static bool +loongarch_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info) +{ + gassign *assign = dyn_cast (stmt_info->stmt); + if (!assign) + return false; + tree_code code = gimple_assign_rhs_code (assign); + if (code != PLUS_EXPR && code != MINUS_EXPR) + return false; + + auto is_mul_result = [&](int i) + { + tree rhs = gimple_op (assign, i); + if (TREE_CODE (rhs) != SSA_NAME) + return false; + + stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs); + if (!def_stmt_info + || STMT_VINFO_DEF_TYPE (def_stmt_info) != vect_internal_def) + return false; + gassign *rhs_assign = dyn_cast (def_stmt_info->stmt); + if (!rhs_assign || gimple_assign_rhs_code (rhs_assign) != MULT_EXPR) + return false; + + return true; + }; + + return is_mul_result (1) || is_mul_result (2); +} + unsigned loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, stmt_vec_info stmt_info, slp_tree, @@ -4108,6 +4139,23 @@ loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, { int stmt_cost = loongarch_builtin_vectorization_cost (kind, vectype, misalign); + if (vectype && stmt_info) + { + gassign *assign = dyn_cast (STMT_VINFO_STMT (stmt_info)); + machine_mode mode = TYPE_MODE (vectype); + + /* We found through testing that this strategy (the stmt that + matches the multiply-add pattern) has positive returns only + when applied to the 128-bit vector stmt, so this restriction + is currently made. */ + if (kind == vector_stmt && GET_MODE_SIZE (mode) == 16 && assign) + { + if (!vect_is_reduction (stmt_info) + && loongarch_multiply_add_p (m_vinfo, stmt_info)) + stmt_cost = 0; + } + } + retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost); m_costs[where] += retval; diff --git a/gcc/testsuite/gfortran.dg/vect/vect-10.f90 b/gcc/testsuite/gfortran.dg/vect/vect-10.f90 new file mode 100644 index 00000000000..b85bc2702a3 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/vect/vect-10.f90 @@ -0,0 +1,71 @@ +! { dg-do compile } +! { dg-additional-options "-Ofast -mlsx -fvect-cost-model=dynamic" { target loongarch64*-*-* } } + +MODULE material_mod + +IMPLICIT NONE + +integer, parameter :: dfp = selected_real_kind (13, 99) +integer, parameter :: rfp = dfp + +PUBLIC Mat_updateE, iepx, iepy, iepz + +PRIVATE + +integer, dimension (:, :, :), allocatable :: iepx, iepy, iepz +real (kind = rfp), dimension (:), allocatable :: Dbdx, Dbdy, Dbdz +integer :: imin, jmin, kmin +integer, dimension (6) :: Exsize +integer, dimension (6) :: Eysize +integer, dimension (6) :: Ezsize +integer, dimension (6) :: Hxsize +integer, dimension (6) :: Hysize +integer, dimension (6) :: Hzsize + +CONTAINS + +SUBROUTINE mat_updateE (nx, ny, nz, Hx, Hy, Hz, Ex, Ey, Ez) + +integer, intent (in) :: nx, ny, nz + +real (kind = rfp), intent (inout), & + dimension (Exsize (1) : Exsize (2), Exsize (3) : Exsize (4), Exsize (5) : Exsize (6)) :: Ex +real (kind = rfp), intent (inout), & + dimension (Eysize (1) : Eysize (2), Eysize (3) : Eysize (4), Eysize (5) : Eysize (6)) :: Ey +real (kind = rfp), intent (inout), & + dimension (Ezsize (1) : Ezsize (2), Ezsize (3) : Ezsize (4), Ezsize (5) : Ezsize (6)) :: Ez +real (kind = rfp), intent (in), & + dimension (Hxsize (1) : Hxsize (2), Hxsize (3) : Hxsize (4), Hxsize (5) : Hxsize (6)) :: Hx +real (kind = rfp), intent (in), & + dimension (Hysize (1) : Hysize (2), Hysize (3) : Hysize (4), Hysize (5) : Hysize (6)) :: Hy +real (kind = rfp), intent (in), & + dimension (Hzsize (1) : Hzsize (2), Hzsize (3) : Hzsize (4), Hzsize (5) : Hzsize (6)) :: Hz + +integer :: i, j, k, mp + +do k = kmin, nz + do j = jmin, ny + do i = imin, nx + mp = iepx (i, j, k) + Ex (i, j, k) = Ex (i, j, k) + & + Dbdy (mp) * (Hz (i, j, k ) - Hz (i, j-1, k)) + & + Dbdz (mp) * (Hy (i, j, k-1) - Hy (i, j , k)) + + mp = iepy (i, j, k) + Ey (i, j, k) = Ey (i, j, k) + & + Dbdz (mp) * (Hx (i , j, k) - Hx (i, j, k-1)) + & + Dbdx (mp) * (Hz (i-1, j, k) - Hz (i, j, k )) + + mp = iepz (i, j, k) + Ez (i, j, k) = Ez (i, j, k) + & + Dbdx (mp) * (Hy (i, j , k) - Hy (i-1, j, k)) + & + Dbdy (mp) * (Hx (i, j-1, k) - Hx (i , j, k)) + end do + end do +end do + +END SUBROUTINE mat_updateE + +END MODULE material_mod + +! { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target loongarch64*-*-* } } } -- 2.39.3