From dd3bb05cd7be72d080598cb693549ac74d5cb02d Mon Sep 17 00:00:00 2001 From: Zhao Wei Liew Date: Sat, 19 Feb 2022 16:28:38 +0800 Subject: [PATCH] tree-optimization: [PR103855] Fold (type)X / (type)Y This pattern converts (trunc_div (convert a) (convert b)) to (convert (trunc_div a b)) when: 1. type, a, and b all have unsigned integeral types 2. a and b have the same type precision 3. type has type precision at least as larger as a and b This is useful as wider divisions are typically more expensive. To illustrate the effects, consider the following code snippet: unsigned long long f(unsigned int a, unsigned int b) { unsigned long long all = a; return all / b; } Without the pattern, g++ -std=c++20 -O2 generates the following assembly: f(unsigned int, unsigned int): mov eax, edi mov esi, esi xor edx, edx div rsi ret With the pattern, it generates this: f(unsigned int, unsigned int): mov eax, edi xor edx, edx div esi ret This is identical to what clang++ -std=c++20 -O2 generates. Bootstrapped and regression tested on x86_64-pc-linux-gnu. Signed-off-by: Zhao Wei Liew PR tree-optimization/103855 gcc/ChangeLog: * match.pd: Add pattern for (type)X / (type)Y. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/divide-8.c: New test. * gcc.dg/tree-ssa/divide-9.c: New test. --- gcc/match.pd | 15 +++++++++++++++ gcc/testsuite/gcc.dg/tree-ssa/divide-8.c | 9 +++++++++ gcc/testsuite/gcc.dg/tree-ssa/divide-9.c | 9 +++++++++ 3 files changed, 33 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/divide-8.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/divide-9.c diff --git a/gcc/match.pd b/gcc/match.pd index 10f62284862..393b43756dd 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -684,6 +684,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (INTEGRAL_TYPE_P (type) || VECTOR_INTEGER_TYPE_P (type)) (convert (trunc_mod @0 @1)))) +/* (type)X / (type)Y -> (type)(X / Y) + when the resulting type is at least precise as the original types + and when all the types are unsigned integral types. */ +(simplify + (trunc_div (convert @0) (convert @1)) + (if (INTEGRAL_TYPE_P (type) + && INTEGRAL_TYPE_P (TREE_TYPE (@0)) + && INTEGRAL_TYPE_P (TREE_TYPE (@1)) + && TYPE_UNSIGNED (type) + && TYPE_UNSIGNED (TREE_TYPE (@0)) + && TYPE_UNSIGNED (TREE_TYPE (@1)) + && TYPE_PRECISION (TREE_TYPE (@0)) == TYPE_PRECISION (TREE_TYPE (@1)) + && TYPE_PRECISION (type) >= TYPE_PRECISION (TREE_TYPE (@0))) + (convert (trunc_div @0 @1)))) + /* x * (1 + y / x) - y -> x - y % x */ (simplify (minus (mult:cs @0 (plus:s (trunc_div:s @1 @0) integer_onep)) @1) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/divide-8.c b/gcc/testsuite/gcc.dg/tree-ssa/divide-8.c new file mode 100644 index 00000000000..dc3dc9ca769 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/divide-8.c @@ -0,0 +1,9 @@ +/* PR tree-optimization/103855 */ +/* { dg-options "-O -fdump-tree-optimized" } */ + +unsigned int f(unsigned int a, unsigned int b) { + unsigned long long all = a; + return all / b; +} + +/* { dg-final { scan-tree-dump-not "\\\(long long unsigned int\\\)" "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/divide-9.c b/gcc/testsuite/gcc.dg/tree-ssa/divide-9.c new file mode 100644 index 00000000000..6986b5484e4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/divide-9.c @@ -0,0 +1,9 @@ +/* PR tree-optimization/103855 */ +/* { dg-options "-O -fdump-tree-optimized" } */ + +unsigned long long f(unsigned int a, unsigned int b) { + unsigned long long all = a; + return all / b; +} + +/* { dg-final { scan-tree-dump-times "\\\(long long unsigned int\\\)" 1 "optimized" } } */ -- 2.35.1