* [PATCH v1] LoongArch: Adjust cost of vector_stmt that match multiply-add pattern.
@ 2024-01-24 9:36 Li Wei
2024-01-26 8:09 ` chenglulu
0 siblings, 1 reply; 2+ messages in thread
From: Li Wei @ 2024-01-24 9:36 UTC (permalink / raw)
To: gcc-patches; +Cc: xry111, i, xuchenghua, chenglulu, Li Wei
We found that when only 128-bit vectorization was enabled, 549.fotonik3d_r
failed to vectorize effectively. For this reason, we adjust the cost of
128-bit vector_stmt that match the multiply-add pattern to facilitate 128-bit
vectorization.
The experimental results show that after the modification, 549.fotonik3d_r
performance can be improved by 9.77% under the 128-bit vectorization option.
gcc/ChangeLog:
* config/loongarch/loongarch.cc (loongarch_multiply_add_p): New.
(loongarch_vector_costs::add_stmt_cost): Adjust.
gcc/testsuite/ChangeLog:
* gfortran.dg/vect/vect-10.f90: New test.
---
gcc/config/loongarch/loongarch.cc | 42 +++++++++++++
gcc/testsuite/gfortran.dg/vect/vect-10.f90 | 71 ++++++++++++++++++++++
2 files changed, 113 insertions(+)
create mode 100644 gcc/testsuite/gfortran.dg/vect/vect-10.f90
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 072c68d97e3..32a0b6f43e8 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4096,6 +4096,36 @@ loongarch_vector_costs::determine_suggested_unroll_factor (loop_vec_info loop_vi
return 1 << ceil_log2 (uf);
}
+static bool
+loongarch_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info)
+{
+ gassign *assign = dyn_cast<gassign *> (stmt_info->stmt);
+ if (!assign)
+ return false;
+ tree_code code = gimple_assign_rhs_code (assign);
+ if (code != PLUS_EXPR && code != MINUS_EXPR)
+ return false;
+
+ auto is_mul_result = [&](int i)
+ {
+ tree rhs = gimple_op (assign, i);
+ if (TREE_CODE (rhs) != SSA_NAME)
+ return false;
+
+ stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
+ if (!def_stmt_info
+ || STMT_VINFO_DEF_TYPE (def_stmt_info) != vect_internal_def)
+ return false;
+ gassign *rhs_assign = dyn_cast<gassign *> (def_stmt_info->stmt);
+ if (!rhs_assign || gimple_assign_rhs_code (rhs_assign) != MULT_EXPR)
+ return false;
+
+ return true;
+ };
+
+ return is_mul_result (1) || is_mul_result (2);
+}
+
unsigned
loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
stmt_vec_info stmt_info, slp_tree,
@@ -4108,6 +4138,18 @@ loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
{
int stmt_cost = loongarch_builtin_vectorization_cost (kind, vectype,
misalign);
+ if (vectype && stmt_info)
+ {
+ gassign *assign = dyn_cast<gassign *> (STMT_VINFO_STMT (stmt_info));
+ machine_mode mode = TYPE_MODE (vectype);
+ if (kind == vector_stmt && GET_MODE_SIZE (mode) == 16 && assign)
+ {
+ if (!vect_is_reduction (stmt_info)
+ && loongarch_multiply_add_p (m_vinfo, stmt_info))
+ stmt_cost = 0;
+ }
+ }
+
retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
m_costs[where] += retval;
diff --git a/gcc/testsuite/gfortran.dg/vect/vect-10.f90 b/gcc/testsuite/gfortran.dg/vect/vect-10.f90
new file mode 100644
index 00000000000..b85bc2702a3
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/vect/vect-10.f90
@@ -0,0 +1,71 @@
+! { dg-do compile }
+! { dg-additional-options "-Ofast -mlsx -fvect-cost-model=dynamic" { target loongarch64*-*-* } }
+
+MODULE material_mod
+
+IMPLICIT NONE
+
+integer, parameter :: dfp = selected_real_kind (13, 99)
+integer, parameter :: rfp = dfp
+
+PUBLIC Mat_updateE, iepx, iepy, iepz
+
+PRIVATE
+
+integer, dimension (:, :, :), allocatable :: iepx, iepy, iepz
+real (kind = rfp), dimension (:), allocatable :: Dbdx, Dbdy, Dbdz
+integer :: imin, jmin, kmin
+integer, dimension (6) :: Exsize
+integer, dimension (6) :: Eysize
+integer, dimension (6) :: Ezsize
+integer, dimension (6) :: Hxsize
+integer, dimension (6) :: Hysize
+integer, dimension (6) :: Hzsize
+
+CONTAINS
+
+SUBROUTINE mat_updateE (nx, ny, nz, Hx, Hy, Hz, Ex, Ey, Ez)
+
+integer, intent (in) :: nx, ny, nz
+
+real (kind = rfp), intent (inout), &
+ dimension (Exsize (1) : Exsize (2), Exsize (3) : Exsize (4), Exsize (5) : Exsize (6)) :: Ex
+real (kind = rfp), intent (inout), &
+ dimension (Eysize (1) : Eysize (2), Eysize (3) : Eysize (4), Eysize (5) : Eysize (6)) :: Ey
+real (kind = rfp), intent (inout), &
+ dimension (Ezsize (1) : Ezsize (2), Ezsize (3) : Ezsize (4), Ezsize (5) : Ezsize (6)) :: Ez
+real (kind = rfp), intent (in), &
+ dimension (Hxsize (1) : Hxsize (2), Hxsize (3) : Hxsize (4), Hxsize (5) : Hxsize (6)) :: Hx
+real (kind = rfp), intent (in), &
+ dimension (Hysize (1) : Hysize (2), Hysize (3) : Hysize (4), Hysize (5) : Hysize (6)) :: Hy
+real (kind = rfp), intent (in), &
+ dimension (Hzsize (1) : Hzsize (2), Hzsize (3) : Hzsize (4), Hzsize (5) : Hzsize (6)) :: Hz
+
+integer :: i, j, k, mp
+
+do k = kmin, nz
+ do j = jmin, ny
+ do i = imin, nx
+ mp = iepx (i, j, k)
+ Ex (i, j, k) = Ex (i, j, k) + &
+ Dbdy (mp) * (Hz (i, j, k ) - Hz (i, j-1, k)) + &
+ Dbdz (mp) * (Hy (i, j, k-1) - Hy (i, j , k))
+
+ mp = iepy (i, j, k)
+ Ey (i, j, k) = Ey (i, j, k) + &
+ Dbdz (mp) * (Hx (i , j, k) - Hx (i, j, k-1)) + &
+ Dbdx (mp) * (Hz (i-1, j, k) - Hz (i, j, k ))
+
+ mp = iepz (i, j, k)
+ Ez (i, j, k) = Ez (i, j, k) + &
+ Dbdx (mp) * (Hy (i, j , k) - Hy (i-1, j, k)) + &
+ Dbdy (mp) * (Hx (i, j-1, k) - Hx (i , j, k))
+ end do
+ end do
+end do
+
+END SUBROUTINE mat_updateE
+
+END MODULE material_mod
+
+! { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target loongarch64*-*-* } } }
--
2.39.3
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH v1] LoongArch: Adjust cost of vector_stmt that match multiply-add pattern.
2024-01-24 9:36 [PATCH v1] LoongArch: Adjust cost of vector_stmt that match multiply-add pattern Li Wei
@ 2024-01-26 8:09 ` chenglulu
0 siblings, 0 replies; 2+ messages in thread
From: chenglulu @ 2024-01-26 8:09 UTC (permalink / raw)
To: Li Wei, gcc-patches; +Cc: xry111, i, xuchenghua
在 2024/1/24 下午5:36, Li Wei 写道:
> We found that when only 128-bit vectorization was enabled, 549.fotonik3d_r
> failed to vectorize effectively. For this reason, we adjust the cost of
> 128-bit vector_stmt that match the multiply-add pattern to facilitate 128-bit
> vectorization.
> The experimental results show that after the modification, 549.fotonik3d_r
> performance can be improved by 9.77% under the 128-bit vectorization option.
>
> gcc/ChangeLog:
>
> * config/loongarch/loongarch.cc (loongarch_multiply_add_p): New.
> (loongarch_vector_costs::add_stmt_cost): Adjust.
>
> gcc/testsuite/ChangeLog:
>
> * gfortran.dg/vect/vect-10.f90: New test.
> ---
> gcc/config/loongarch/loongarch.cc | 42 +++++++++++++
> gcc/testsuite/gfortran.dg/vect/vect-10.f90 | 71 ++++++++++++++++++++++
> 2 files changed, 113 insertions(+)
> create mode 100644 gcc/testsuite/gfortran.dg/vect/vect-10.f90
>
> diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
> index 072c68d97e3..32a0b6f43e8 100644
> --- a/gcc/config/loongarch/loongarch.cc
> +++ b/gcc/config/loongarch/loongarch.cc
> @@ -4096,6 +4096,36 @@ loongarch_vector_costs::determine_suggested_unroll_factor (loop_vec_info loop_vi
> return 1 << ceil_log2 (uf);
> }
>
> +static bool
> +loongarch_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info)
> +{
> + gassign *assign = dyn_cast<gassign *> (stmt_info->stmt);
> + if (!assign)
> + return false;
> + tree_code code = gimple_assign_rhs_code (assign);
> + if (code != PLUS_EXPR && code != MINUS_EXPR)
> + return false;
> +
> + auto is_mul_result = [&](int i)
> + {
> + tree rhs = gimple_op (assign, i);
> + if (TREE_CODE (rhs) != SSA_NAME)
> + return false;
> +
> + stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
> + if (!def_stmt_info
> + || STMT_VINFO_DEF_TYPE (def_stmt_info) != vect_internal_def)
> + return false;
> + gassign *rhs_assign = dyn_cast<gassign *> (def_stmt_info->stmt);
> + if (!rhs_assign || gimple_assign_rhs_code (rhs_assign) != MULT_EXPR)
> + return false;
> +
> + return true;
> + };
> +
> + return is_mul_result (1) || is_mul_result (2);
> +}
> +
> unsigned
> loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
> stmt_vec_info stmt_info, slp_tree,
> @@ -4108,6 +4138,18 @@ loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
> {
> int stmt_cost = loongarch_builtin_vectorization_cost (kind, vectype,
> misalign);
> + if (vectype && stmt_info)
> + {
> + gassign *assign = dyn_cast<gassign *> (STMT_VINFO_STMT (stmt_info));
> + machine_mode mode = TYPE_MODE (vectype);
Hi, Liwei:
I think the code here needs to be commented.
Thanks.
> + if (kind == vector_stmt && GET_MODE_SIZE (mode) == 16 && assign)
> + {
> + if (!vect_is_reduction (stmt_info)
> + && loongarch_multiply_add_p (m_vinfo, stmt_info))
> + stmt_cost = 0;
> + }
> + }
> +
> retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
> m_costs[where] += retval;
>
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2024-01-26 8:09 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-24 9:36 [PATCH v1] LoongArch: Adjust cost of vector_stmt that match multiply-add pattern Li Wei
2024-01-26 8:09 ` chenglulu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).