From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <jakub@sourceware.org>
Received: by sourceware.org (Postfix, from userid 2153)
 id EE9B338515F7; Wed, 11 May 2022 06:23:54 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org EE9B338515F7
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Content-Type: text/plain; charset="utf-8"
From: Jakub Jelinek <jakub@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc r9-10118] match.pd, optabs: Avoid vectorization of {FLOOR, CEIL, 
 ROUND}_{DIV, MOD}_EXPR [PR102860]
X-Act-Checkin: gcc
X-Git-Author: Jakub Jelinek <jakub@redhat.com>
X-Git-Refname: refs/heads/releases/gcc-9
X-Git-Oldrev: e875dc9f975ee0a1f9468fda9ee29533cca77181
X-Git-Newrev: 95f6eb7ae707482fdeed57b0906dacb8e675385d
Message-Id: <20220511062354.EE9B338515F7@sourceware.org>
Date: Wed, 11 May 2022 06:23:54 +0000 (GMT)
X-BeenThere: gcc-cvs@gcc.gnu.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Gcc-cvs mailing list <gcc-cvs.gcc.gnu.org>
List-Unsubscribe: <https://gcc.gnu.org/mailman/options/gcc-cvs>,
 <mailto:gcc-cvs-request@gcc.gnu.org?subject=unsubscribe>
List-Archive: <https://gcc.gnu.org/pipermail/gcc-cvs/>
List-Help: <mailto:gcc-cvs-request@gcc.gnu.org?subject=help>
List-Subscribe: <https://gcc.gnu.org/mailman/listinfo/gcc-cvs>,
 <mailto:gcc-cvs-request@gcc.gnu.org?subject=subscribe>
X-List-Received-Date: Wed, 11 May 2022 06:23:55 -0000

https://gcc.gnu.org/g:95f6eb7ae707482fdeed57b0906dacb8e675385d

commit r9-10118-g95f6eb7ae707482fdeed57b0906dacb8e675385d
Author: Jakub Jelinek <jakub@redhat.com>
Date:   Wed Jan 19 15:03:45 2022 +0100

    match.pd, optabs: Avoid vectorization of {FLOOR,CEIL,ROUND}_{DIV,MOD}_EXPR [PR102860]
    
    power10 has modv4si3 expander and so vectorizes the following testcase
    where Fortran modulo is FLOOR_MOD_EXPR.
    optabs_for_tree_code indicates that the optab for all the *_MOD_EXPR
    variants is umod_optab or smod_optab, but that isn't true, that optab
    actually expands just TRUNC_MOD_EXPR.  For the other tree codes expmed.cc
    has code how to adjust the TRUNC_MOD_EXPR into those by emitting some
    extra comparisons and conditional updates.  Similarly for *_DIV_EXPR,
    except in that case it actually needs both division and modulo.
    
    While it would be possible to handle it in expmed.cc for vectors as well,
    we'd need to be sure all the vector operations we need for that are
    available, and furthermore we wouldn't account for that in the costing.
    
    So, IMHO it is better to stop pretending those non-truncating (and
    non-exact) div/mod operations have an optab.  For GCC 13, we should
    IMHO pattern match these in tree-vect-patterns.cc and transform them
    to truncating div/mod with follow-up adjustments and let the vectorizer
    vectorize that.  As written in the PR, for signed operands:
    r = x %[fl] y;
    is
    r = x % y; if (r && (x ^ y) < 0) r += y;
    and
    d = x /[fl] y;
    is
    r = x % y; d = x / y; if (r && (x ^ y) < 0) --d;
    and
    r = x %[cl] y;
    is
    r = x % y; if (r && (x ^ y) >= 0) r -= y;
    and
    d = /[cl] y;
    is
    r = x % y; d = x / y; if (r && (x ^ y) >= 0) ++d;
    (too lazy to figure out rounding div/mod now).  I'll create a PR
    for that.
    The patch also extends a match.pd optimization that floor_mod on
    unsigned operands is actually trunc_mod.
    
    2022-01-19  Jakub Jelinek  <jakub@redhat.com>
    
            PR middle-end/102860
            * match.pd (x %[fl] y -> x % y): New simplification for
            unsigned integral types.
            * optabs-tree.c (optab_for_tree_code): Return unknown_optab
            for {CEIL,FLOOR,ROUND}_{DIV,MOD}_EXPR with VECTOR_TYPE.
    
            * gfortran.dg/pr102860.f90: New test.
    
    (cherry picked from commit ffc7f200adbdf47f14b3594d9b21855c19cf797a)

Diff:
---
 gcc/match.pd                           | 15 +++++++++------
 gcc/optabs-tree.c                      | 21 ++++++++++++++++++---
 gcc/testsuite/gfortran.dg/pr102860.f90 | 10 ++++++++++
 3 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 9d4d4748192..cd3ad159455 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -304,12 +304,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
     { build_minus_one_cst (type); })))
 
 /* For unsigned integral types, FLOOR_DIV_EXPR is the same as
-   TRUNC_DIV_EXPR.  Rewrite into the latter in this case.  */
-(simplify
- (floor_div @0 @1)
- (if ((INTEGRAL_TYPE_P (type) || VECTOR_INTEGER_TYPE_P (type))
-      && TYPE_UNSIGNED (type))
-  (trunc_div @0 @1)))
+   TRUNC_DIV_EXPR.  Rewrite into the latter in this case.  Similarly
+   for MOD instead of DIV.  */
+(for floor_divmod (floor_div floor_mod)
+     trunc_divmod (trunc_div trunc_mod)
+ (simplify
+  (floor_divmod @0 @1)
+  (if ((INTEGRAL_TYPE_P (type) || VECTOR_INTEGER_TYPE_P (type))
+       && TYPE_UNSIGNED (type))
+   (trunc_divmod @0 @1))))
 
 /* Combine two successive divisions.  Note that combining ceil_div
    and floor_div is trickier and combining round_div even more so.  */
diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c
index 8157798cc71..024af299188 100644
--- a/gcc/optabs-tree.c
+++ b/gcc/optabs-tree.c
@@ -53,17 +53,32 @@ optab_for_tree_code (enum tree_code code, const_tree type,
     case MULT_HIGHPART_EXPR:
       return TYPE_UNSIGNED (type) ? umul_highpart_optab : smul_highpart_optab;
 
-    case TRUNC_MOD_EXPR:
     case CEIL_MOD_EXPR:
     case FLOOR_MOD_EXPR:
     case ROUND_MOD_EXPR:
+      /* {s,u}mod_optab implements TRUNC_MOD_EXPR.  For scalar modes,
+	 expansion has code to adjust TRUNC_MOD_EXPR into the desired other
+	 modes, but for vector modes it does not.  The adjustment code
+	 should be instead emitted in tree-vect-patterns.cc.  */
+      if (TREE_CODE (type) == VECTOR_TYPE)
+	return unknown_optab;
+      /* FALLTHRU */
+    case TRUNC_MOD_EXPR:
       return TYPE_UNSIGNED (type) ? umod_optab : smod_optab;
 
-    case RDIV_EXPR:
-    case TRUNC_DIV_EXPR:
     case CEIL_DIV_EXPR:
     case FLOOR_DIV_EXPR:
     case ROUND_DIV_EXPR:
+      /* {,u}{s,u}div_optab implements {TRUNC,EXACT}_DIV_EXPR or RDIV_EXPR.
+	 For scalar modes, expansion has code to adjust TRUNC_DIV_EXPR
+	 into the desired other modes, but for vector modes it does not.
+	 The adjustment code should be instead emitted in
+	 tree-vect-patterns.cc.  */
+      if (TREE_CODE (type) == VECTOR_TYPE)
+	return unknown_optab;
+      /* FALLTHRU */
+    case RDIV_EXPR:
+    case TRUNC_DIV_EXPR:
     case EXACT_DIV_EXPR:
       if (TYPE_SATURATING (type))
 	return TYPE_UNSIGNED (type) ? usdiv_optab : ssdiv_optab;
diff --git a/gcc/testsuite/gfortran.dg/pr102860.f90 b/gcc/testsuite/gfortran.dg/pr102860.f90
new file mode 100644
index 00000000000..d0a7356c0f5
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr102860.f90
@@ -0,0 +1,10 @@
+! PR middle-end/102860
+! { dg-do compile { target { powerpc*-*-* } } }
+! { dg-require-effective-target powerpc_vsx_ok } 
+! { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power10" } } 
+! { dg-options "-O2 -mcpu=power10" } 
+
+function foo(a)
+  integer(kind=4) :: a(1024)
+  a(:) = modulo (a(:), 39)
+end function