* Re: [PATCH] aarch64/expr: Use ccmp when the outer expression is used twice [PR100942]
2023-12-12 8:21 [PATCH] aarch64/expr: Use ccmp when the outer expression is used twice [PR100942] Andrew Pinski
@ 2023-12-13 1:08 ` Andrew Pinski
2024-01-02 17:34 ` Andrew Pinski
2024-01-02 22:45 ` Richard Sandiford
2 siblings, 0 replies; 4+ messages in thread
From: Andrew Pinski @ 2023-12-13 1:08 UTC (permalink / raw)
To: Andrew Pinski; +Cc: gcc-patches
On Tue, Dec 12, 2023 at 12:22 AM Andrew Pinski <quic_apinski@quicinc.com> wrote:
>
> Ccmp is not used if the result of the and/ior is used by both
> a GIMPLE_COND and a GIMPLE_ASSIGN. This improves the code generation
> here by using ccmp in this case.
> Two changes is required, first we need to allow the outer statement's
> result be used more than once.
> The second change is that during the expansion of the gimple, we need
> to try using ccmp. This is needed because we don't use expand the ssa
> name of the lhs but rather expand directly from the gimple.
>
> A small note on the ccmp_4.c testcase, we should be able to get slightly
> better than with this patch but it is one extra instruction compared to
> before.
Just FYI, this pattern shows up a few times in GCC itself even.
Thanks,
Andrew Pinski
>
> Bootstraped and tested on aarch64-linux-gnu with no regressions.
>
> PR target/100942
>
> gcc/ChangeLog:
>
> * ccmp.cc (ccmp_candidate_p): Add outer argument.
> Allow if the outer is true and the lhs is used more
> than once.
> (expand_ccmp_expr): Update call to ccmp_candidate_p.
> * cfgexpand.cc (expand_gimple_stmt_1): Try using ccmp
> for binary assignments.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/aarch64/ccmp_3.c: New test.
> * gcc.target/aarch64/ccmp_4.c: New test.
>
> Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
> ---
> gcc/ccmp.cc | 9 +++---
> gcc/cfgexpand.cc | 25 ++++++++++++++++
> gcc/testsuite/gcc.target/aarch64/ccmp_3.c | 20 +++++++++++++
> gcc/testsuite/gcc.target/aarch64/ccmp_4.c | 35 +++++++++++++++++++++++
> 4 files changed, 85 insertions(+), 4 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_3.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_4.c
>
> diff --git a/gcc/ccmp.cc b/gcc/ccmp.cc
> index 1bd6fadea35..a274f8c3d53 100644
> --- a/gcc/ccmp.cc
> +++ b/gcc/ccmp.cc
> @@ -92,7 +92,7 @@ ccmp_tree_comparison_p (tree t, basic_block bb)
>
> /* Check whether G is a potential conditional compare candidate. */
> static bool
> -ccmp_candidate_p (gimple *g)
> +ccmp_candidate_p (gimple *g, bool outer = false)
> {
> tree lhs, op0, op1;
> gimple *gs0, *gs1;
> @@ -109,8 +109,9 @@ ccmp_candidate_p (gimple *g)
> lhs = gimple_assign_lhs (g);
> op0 = gimple_assign_rhs1 (g);
> op1 = gimple_assign_rhs2 (g);
> - if ((TREE_CODE (op0) != SSA_NAME) || (TREE_CODE (op1) != SSA_NAME)
> - || !has_single_use (lhs))
> + if ((TREE_CODE (op0) != SSA_NAME) || (TREE_CODE (op1) != SSA_NAME))
> + return false;
> + if (!outer && !has_single_use (lhs))
> return false;
>
> bb = gimple_bb (g);
> @@ -284,7 +285,7 @@ expand_ccmp_expr (gimple *g, machine_mode mode)
> rtx_insn *last;
> rtx tmp;
>
> - if (!ccmp_candidate_p (g))
> + if (!ccmp_candidate_p (g, true))
> return NULL_RTX;
>
> last = get_last_insn ();
> diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
> index b860be8bb77..0f9aad8e3eb 100644
> --- a/gcc/cfgexpand.cc
> +++ b/gcc/cfgexpand.cc
> @@ -74,6 +74,7 @@ along with GCC; see the file COPYING3. If not see
> #include "output.h"
> #include "builtins.h"
> #include "opts.h"
> +#include "ccmp.h"
>
> /* Some systems use __main in a way incompatible with its use in gcc, in these
> cases use the macros NAME__MAIN to give a quoted symbol and SYMBOL__MAIN to
> @@ -3972,6 +3973,30 @@ expand_gimple_stmt_1 (gimple *stmt)
> if (GET_CODE (target) == SUBREG && SUBREG_PROMOTED_VAR_P (target))
> promoted = true;
>
> + /* Try to expand conditonal compare. */
> + if (targetm.gen_ccmp_first
> + && gimple_assign_rhs_class (assign_stmt) == GIMPLE_BINARY_RHS)
> + {
> + machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
> + gcc_checking_assert (targetm.gen_ccmp_next != NULL);
> + temp = expand_ccmp_expr (stmt, mode);
> + if (temp)
> + {
> + if (promoted)
> + {
> + int unsignedp = SUBREG_PROMOTED_SIGN (target);
> + convert_move (SUBREG_REG (target), temp, unsignedp);
> + }
> + else
> + {
> + temp = force_operand (temp, target);
> + if (temp != target)
> + emit_move_insn (target, temp);
> + }
> + return;
> + }
> + }
> +
> ops.code = gimple_assign_rhs_code (assign_stmt);
> ops.type = TREE_TYPE (lhs);
> switch (get_gimple_rhs_class (ops.code))
> diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_3.c b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
> new file mode 100644
> index 00000000000..a2b47fbee14
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
> @@ -0,0 +1,20 @@
> +/* { dg-options "-O2" } */
> +/* PR target/100942 */
> +
> +void foo(void);
> +int f1(int a, int b)
> +{
> + int c = a == 0 || b == 0;
> + if (c) foo();
> + return c;
> +}
> +
> +/* We should get one cmp followed by ccmp and one cset. */
> +/* { dg-final { scan-assembler "\tccmp\t" } } */
> +/* { dg-final { scan-assembler-times "\tcset\t" 1 } } */
> +/* { dg-final { scan-assembler-times "\tcmp\t" 1 } } */
> +/* And not get 2 cmps and 2 (or more cset) and orr and a cbnz. */
> +/* { dg-final { scan-assembler-not "\torr\t" } } */
> +/* { dg-final { scan-assembler-not "\tcbnz\t" } } */
> +/* { dg-final { scan-assembler-not "\tcbz\t" } } */
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_4.c b/gcc/testsuite/gcc.target/aarch64/ccmp_4.c
> new file mode 100644
> index 00000000000..bc0f57a7c59
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/ccmp_4.c
> @@ -0,0 +1,35 @@
> +/* { dg-options "-O2" } */
> +/* PR target/100942 */
> +
> +void foo(void);
> +int f1(int a, int b, int d)
> +{
> + int c = a < 8 || b < 9;
> + int e = d < 11 || c;
> + if (e) foo();
> + return c;
> +}
> +
> +/*
> + We really should get:
> + cmp w0, 7
> + ccmp w1, 8, 4, gt
> + cset w0, le
> + ccmp w2, 10, 4, gt
> + ble .L11
> +
> + But we currently get:
> + cmp w0, 7
> + ccmp w1, 8, 4, gt
> + cset w0, le
> + cmp w0, 0
> + ccmp w2, 10, 4, eq
> + ble .L11
> + The middle cmp is not needed.
> + */
> +
> +/* We should end up with only one cmp and 2 ccmp and 1 cset but currently we get 2 cmp
> + though. */
> +/* { dg-final { scan-assembler-times "\tccmp\t" 2 } } */
> +/* { dg-final { scan-assembler-times "\tcset\t" 1 } } */
> +/* { dg-final { scan-assembler-times "\tcmp\t" 1 { xfail *-*-* } } } */
> --
> 2.39.3
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] aarch64/expr: Use ccmp when the outer expression is used twice [PR100942]
2023-12-12 8:21 [PATCH] aarch64/expr: Use ccmp when the outer expression is used twice [PR100942] Andrew Pinski
2023-12-13 1:08 ` Andrew Pinski
@ 2024-01-02 17:34 ` Andrew Pinski
2024-01-02 22:45 ` Richard Sandiford
2 siblings, 0 replies; 4+ messages in thread
From: Andrew Pinski @ 2024-01-02 17:34 UTC (permalink / raw)
To: Andrew Pinski; +Cc: gcc-patches
On Tue, Dec 12, 2023 at 12:22 AM Andrew Pinski <quic_apinski@quicinc.com> wrote:
>
> Ccmp is not used if the result of the and/ior is used by both
> a GIMPLE_COND and a GIMPLE_ASSIGN. This improves the code generation
> here by using ccmp in this case.
> Two changes is required, first we need to allow the outer statement's
> result be used more than once.
> The second change is that during the expansion of the gimple, we need
> to try using ccmp. This is needed because we don't use expand the ssa
> name of the lhs but rather expand directly from the gimple.
>
> A small note on the ccmp_4.c testcase, we should be able to get slightly
> better than with this patch but it is one extra instruction compared to
> before.
>
> Bootstraped and tested on aarch64-linux-gnu with no regressions.
Ping?
>
> PR target/100942
>
> gcc/ChangeLog:
>
> * ccmp.cc (ccmp_candidate_p): Add outer argument.
> Allow if the outer is true and the lhs is used more
> than once.
> (expand_ccmp_expr): Update call to ccmp_candidate_p.
> * cfgexpand.cc (expand_gimple_stmt_1): Try using ccmp
> for binary assignments.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/aarch64/ccmp_3.c: New test.
> * gcc.target/aarch64/ccmp_4.c: New test.
>
> Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
> ---
> gcc/ccmp.cc | 9 +++---
> gcc/cfgexpand.cc | 25 ++++++++++++++++
> gcc/testsuite/gcc.target/aarch64/ccmp_3.c | 20 +++++++++++++
> gcc/testsuite/gcc.target/aarch64/ccmp_4.c | 35 +++++++++++++++++++++++
> 4 files changed, 85 insertions(+), 4 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_3.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_4.c
>
> diff --git a/gcc/ccmp.cc b/gcc/ccmp.cc
> index 1bd6fadea35..a274f8c3d53 100644
> --- a/gcc/ccmp.cc
> +++ b/gcc/ccmp.cc
> @@ -92,7 +92,7 @@ ccmp_tree_comparison_p (tree t, basic_block bb)
>
> /* Check whether G is a potential conditional compare candidate. */
> static bool
> -ccmp_candidate_p (gimple *g)
> +ccmp_candidate_p (gimple *g, bool outer = false)
> {
> tree lhs, op0, op1;
> gimple *gs0, *gs1;
> @@ -109,8 +109,9 @@ ccmp_candidate_p (gimple *g)
> lhs = gimple_assign_lhs (g);
> op0 = gimple_assign_rhs1 (g);
> op1 = gimple_assign_rhs2 (g);
> - if ((TREE_CODE (op0) != SSA_NAME) || (TREE_CODE (op1) != SSA_NAME)
> - || !has_single_use (lhs))
> + if ((TREE_CODE (op0) != SSA_NAME) || (TREE_CODE (op1) != SSA_NAME))
> + return false;
> + if (!outer && !has_single_use (lhs))
> return false;
>
> bb = gimple_bb (g);
> @@ -284,7 +285,7 @@ expand_ccmp_expr (gimple *g, machine_mode mode)
> rtx_insn *last;
> rtx tmp;
>
> - if (!ccmp_candidate_p (g))
> + if (!ccmp_candidate_p (g, true))
> return NULL_RTX;
>
> last = get_last_insn ();
> diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
> index b860be8bb77..0f9aad8e3eb 100644
> --- a/gcc/cfgexpand.cc
> +++ b/gcc/cfgexpand.cc
> @@ -74,6 +74,7 @@ along with GCC; see the file COPYING3. If not see
> #include "output.h"
> #include "builtins.h"
> #include "opts.h"
> +#include "ccmp.h"
>
> /* Some systems use __main in a way incompatible with its use in gcc, in these
> cases use the macros NAME__MAIN to give a quoted symbol and SYMBOL__MAIN to
> @@ -3972,6 +3973,30 @@ expand_gimple_stmt_1 (gimple *stmt)
> if (GET_CODE (target) == SUBREG && SUBREG_PROMOTED_VAR_P (target))
> promoted = true;
>
> + /* Try to expand conditonal compare. */
> + if (targetm.gen_ccmp_first
> + && gimple_assign_rhs_class (assign_stmt) == GIMPLE_BINARY_RHS)
> + {
> + machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
> + gcc_checking_assert (targetm.gen_ccmp_next != NULL);
> + temp = expand_ccmp_expr (stmt, mode);
> + if (temp)
> + {
> + if (promoted)
> + {
> + int unsignedp = SUBREG_PROMOTED_SIGN (target);
> + convert_move (SUBREG_REG (target), temp, unsignedp);
> + }
> + else
> + {
> + temp = force_operand (temp, target);
> + if (temp != target)
> + emit_move_insn (target, temp);
> + }
> + return;
> + }
> + }
> +
> ops.code = gimple_assign_rhs_code (assign_stmt);
> ops.type = TREE_TYPE (lhs);
> switch (get_gimple_rhs_class (ops.code))
> diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_3.c b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
> new file mode 100644
> index 00000000000..a2b47fbee14
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
> @@ -0,0 +1,20 @@
> +/* { dg-options "-O2" } */
> +/* PR target/100942 */
> +
> +void foo(void);
> +int f1(int a, int b)
> +{
> + int c = a == 0 || b == 0;
> + if (c) foo();
> + return c;
> +}
> +
> +/* We should get one cmp followed by ccmp and one cset. */
> +/* { dg-final { scan-assembler "\tccmp\t" } } */
> +/* { dg-final { scan-assembler-times "\tcset\t" 1 } } */
> +/* { dg-final { scan-assembler-times "\tcmp\t" 1 } } */
> +/* And not get 2 cmps and 2 (or more cset) and orr and a cbnz. */
> +/* { dg-final { scan-assembler-not "\torr\t" } } */
> +/* { dg-final { scan-assembler-not "\tcbnz\t" } } */
> +/* { dg-final { scan-assembler-not "\tcbz\t" } } */
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_4.c b/gcc/testsuite/gcc.target/aarch64/ccmp_4.c
> new file mode 100644
> index 00000000000..bc0f57a7c59
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/ccmp_4.c
> @@ -0,0 +1,35 @@
> +/* { dg-options "-O2" } */
> +/* PR target/100942 */
> +
> +void foo(void);
> +int f1(int a, int b, int d)
> +{
> + int c = a < 8 || b < 9;
> + int e = d < 11 || c;
> + if (e) foo();
> + return c;
> +}
> +
> +/*
> + We really should get:
> + cmp w0, 7
> + ccmp w1, 8, 4, gt
> + cset w0, le
> + ccmp w2, 10, 4, gt
> + ble .L11
> +
> + But we currently get:
> + cmp w0, 7
> + ccmp w1, 8, 4, gt
> + cset w0, le
> + cmp w0, 0
> + ccmp w2, 10, 4, eq
> + ble .L11
> + The middle cmp is not needed.
> + */
> +
> +/* We should end up with only one cmp and 2 ccmp and 1 cset but currently we get 2 cmp
> + though. */
> +/* { dg-final { scan-assembler-times "\tccmp\t" 2 } } */
> +/* { dg-final { scan-assembler-times "\tcset\t" 1 } } */
> +/* { dg-final { scan-assembler-times "\tcmp\t" 1 { xfail *-*-* } } } */
> --
> 2.39.3
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] aarch64/expr: Use ccmp when the outer expression is used twice [PR100942]
2023-12-12 8:21 [PATCH] aarch64/expr: Use ccmp when the outer expression is used twice [PR100942] Andrew Pinski
2023-12-13 1:08 ` Andrew Pinski
2024-01-02 17:34 ` Andrew Pinski
@ 2024-01-02 22:45 ` Richard Sandiford
2 siblings, 0 replies; 4+ messages in thread
From: Richard Sandiford @ 2024-01-02 22:45 UTC (permalink / raw)
To: Andrew Pinski; +Cc: gcc-patches
Andrew Pinski <quic_apinski@quicinc.com> writes:
> Ccmp is not used if the result of the and/ior is used by both
> a GIMPLE_COND and a GIMPLE_ASSIGN. This improves the code generation
> here by using ccmp in this case.
> Two changes is required, first we need to allow the outer statement's
> result be used more than once.
> The second change is that during the expansion of the gimple, we need
> to try using ccmp. This is needed because we don't use expand the ssa
> name of the lhs but rather expand directly from the gimple.
>
> A small note on the ccmp_4.c testcase, we should be able to get slightly
> better than with this patch but it is one extra instruction compared to
> before.
>
> Bootstraped and tested on aarch64-linux-gnu with no regressions.
>
> PR target/100942
>
> gcc/ChangeLog:
>
> * ccmp.cc (ccmp_candidate_p): Add outer argument.
> Allow if the outer is true and the lhs is used more
> than once.
> (expand_ccmp_expr): Update call to ccmp_candidate_p.
> * cfgexpand.cc (expand_gimple_stmt_1): Try using ccmp
> for binary assignments.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/aarch64/ccmp_3.c: New test.
> * gcc.target/aarch64/ccmp_4.c: New test.
>
> Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
TBH I was hoping someone more familiar with the code would comment,
since it looks like the current ccmp decision might have been deliberate.
I agree it doesn't seem to make much sense though. It's not specific
to a use in a GIMPLE_ASSIGN + GIMPLE_COND, it applies even to uses
in two GIMPLE_ASSIGNs. E.g.:
int f1(int a, int b, _Bool *x)
{
x[0] = x[1] = a == 0 || b == 0;
}
int f2(int a, int b, int *x)
{
x[0] = x[1] = a == 0 || b == 0;
}
produces:
f1:
cmp w0, 0
cset w3, eq
cmp w1, 0
cset w1, eq
orr w1, w3, w1
strb w1, [x2]
strb w1, [x2, 1]
ret
f2:
cmp w0, 0
ccmp w1, 0, 4, ne
cset w1, eq
stp w1, w1, [x2]
ret
because f2 has a cast to int (making the || single-use) whereas f1
doesn't. Might be nice to include that as a ccmp_5.c.
> ---
> gcc/ccmp.cc | 9 +++---
> gcc/cfgexpand.cc | 25 ++++++++++++++++
> gcc/testsuite/gcc.target/aarch64/ccmp_3.c | 20 +++++++++++++
> gcc/testsuite/gcc.target/aarch64/ccmp_4.c | 35 +++++++++++++++++++++++
> 4 files changed, 85 insertions(+), 4 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_3.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_4.c
>
> diff --git a/gcc/ccmp.cc b/gcc/ccmp.cc
> index 1bd6fadea35..a274f8c3d53 100644
> --- a/gcc/ccmp.cc
> +++ b/gcc/ccmp.cc
> @@ -92,7 +92,7 @@ ccmp_tree_comparison_p (tree t, basic_block bb)
>
> /* Check whether G is a potential conditional compare candidate. */
> static bool
> -ccmp_candidate_p (gimple *g)
> +ccmp_candidate_p (gimple *g, bool outer = false)
The function comment should describe the new parameter.
> {
> tree lhs, op0, op1;
> gimple *gs0, *gs1;
> @@ -109,8 +109,9 @@ ccmp_candidate_p (gimple *g)
> lhs = gimple_assign_lhs (g);
> op0 = gimple_assign_rhs1 (g);
> op1 = gimple_assign_rhs2 (g);
> - if ((TREE_CODE (op0) != SSA_NAME) || (TREE_CODE (op1) != SSA_NAME)
> - || !has_single_use (lhs))
> + if ((TREE_CODE (op0) != SSA_NAME) || (TREE_CODE (op1) != SSA_NAME))
> + return false;
> + if (!outer && !has_single_use (lhs))
> return false;
>
> bb = gimple_bb (g);
> @@ -284,7 +285,7 @@ expand_ccmp_expr (gimple *g, machine_mode mode)
> rtx_insn *last;
> rtx tmp;
>
> - if (!ccmp_candidate_p (g))
> + if (!ccmp_candidate_p (g, true))
> return NULL_RTX;
>
> last = get_last_insn ();
> diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
> index b860be8bb77..0f9aad8e3eb 100644
> --- a/gcc/cfgexpand.cc
> +++ b/gcc/cfgexpand.cc
> @@ -74,6 +74,7 @@ along with GCC; see the file COPYING3. If not see
> #include "output.h"
> #include "builtins.h"
> #include "opts.h"
> +#include "ccmp.h"
>
> /* Some systems use __main in a way incompatible with its use in gcc, in these
> cases use the macros NAME__MAIN to give a quoted symbol and SYMBOL__MAIN to
> @@ -3972,6 +3973,30 @@ expand_gimple_stmt_1 (gimple *stmt)
> if (GET_CODE (target) == SUBREG && SUBREG_PROMOTED_VAR_P (target))
> promoted = true;
>
> + /* Try to expand conditonal compare. */
> + if (targetm.gen_ccmp_first
> + && gimple_assign_rhs_class (assign_stmt) == GIMPLE_BINARY_RHS)
> + {
> + machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
> + gcc_checking_assert (targetm.gen_ccmp_next != NULL);
> + temp = expand_ccmp_expr (stmt, mode);
> + if (temp)
> + {
> + if (promoted)
> + {
> + int unsignedp = SUBREG_PROMOTED_SIGN (target);
> + convert_move (SUBREG_REG (target), temp, unsignedp);
> + }
> + else
> + {
> + temp = force_operand (temp, target);
> + if (temp != target)
> + emit_move_insn (target, temp);
> + }
> + return;
> + }
> + }
> +
> ops.code = gimple_assign_rhs_code (assign_stmt);
> ops.type = TREE_TYPE (lhs);
> switch (get_gimple_rhs_class (ops.code))
Would it be possible instead to split:
rtx r;
location_t saved_loc = curr_insn_location ();
loc = gimple_location (g);
if (loc != UNKNOWN_LOCATION)
set_curr_insn_location (loc);
ops.code = gimple_assign_rhs_code (g);
switch (get_gimple_rhs_class (ops.code))
{
case GIMPLE_TERNARY_RHS:
ops.op2 = gimple_assign_rhs3 (g);
/* Fallthru */
case GIMPLE_BINARY_RHS:
ops.op1 = gimple_assign_rhs2 (g);
/* Try to expand conditonal compare. */
if (targetm.gen_ccmp_first)
{
gcc_checking_assert (targetm.gen_ccmp_next != NULL);
r = expand_ccmp_expr (g, mode);
if (r)
break;
}
/* Fallthru */
case GIMPLE_UNARY_RHS:
ops.op0 = gimple_assign_rhs1 (g);
ops.type = TREE_TYPE (gimple_assign_lhs (g));
ops.location = loc;
r = expand_expr_real_2 (&ops, target, tmode, modifier);
break;
case GIMPLE_SINGLE_RHS:
{
r = expand_expr_real (gimple_assign_rhs1 (g), target,
tmode, modifier, alt_rtl,
inner_reference_p);
break;
}
default:
gcc_unreachable ();
}
set_curr_insn_location (saved_loc);
of expand_expr_real_1 out into a subroutine and make this function call it?
That way we could avoid duplicating the expand_ccmp_expr stuff and the
promoted handling.
Thanks for doing this. Looks like a nice improvement.
Richard
> diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_3.c b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
> new file mode 100644
> index 00000000000..a2b47fbee14
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
> @@ -0,0 +1,20 @@
> +/* { dg-options "-O2" } */
> +/* PR target/100942 */
> +
> +void foo(void);
> +int f1(int a, int b)
> +{
> + int c = a == 0 || b == 0;
> + if (c) foo();
> + return c;
> +}
> +
> +/* We should get one cmp followed by ccmp and one cset. */
> +/* { dg-final { scan-assembler "\tccmp\t" } } */
> +/* { dg-final { scan-assembler-times "\tcset\t" 1 } } */
> +/* { dg-final { scan-assembler-times "\tcmp\t" 1 } } */
> +/* And not get 2 cmps and 2 (or more cset) and orr and a cbnz. */
> +/* { dg-final { scan-assembler-not "\torr\t" } } */
> +/* { dg-final { scan-assembler-not "\tcbnz\t" } } */
> +/* { dg-final { scan-assembler-not "\tcbz\t" } } */
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_4.c b/gcc/testsuite/gcc.target/aarch64/ccmp_4.c
> new file mode 100644
> index 00000000000..bc0f57a7c59
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/ccmp_4.c
> @@ -0,0 +1,35 @@
> +/* { dg-options "-O2" } */
> +/* PR target/100942 */
> +
> +void foo(void);
> +int f1(int a, int b, int d)
> +{
> + int c = a < 8 || b < 9;
> + int e = d < 11 || c;
> + if (e) foo();
> + return c;
> +}
> +
> +/*
> + We really should get:
> + cmp w0, 7
> + ccmp w1, 8, 4, gt
> + cset w0, le
> + ccmp w2, 10, 4, gt
> + ble .L11
> +
> + But we currently get:
> + cmp w0, 7
> + ccmp w1, 8, 4, gt
> + cset w0, le
> + cmp w0, 0
> + ccmp w2, 10, 4, eq
> + ble .L11
> + The middle cmp is not needed.
> + */
> +
> +/* We should end up with only one cmp and 2 ccmp and 1 cset but currently we get 2 cmp
> + though. */
> +/* { dg-final { scan-assembler-times "\tccmp\t" 2 } } */
> +/* { dg-final { scan-assembler-times "\tcset\t" 1 } } */
> +/* { dg-final { scan-assembler-times "\tcmp\t" 1 { xfail *-*-* } } } */
^ permalink raw reply [flat|nested] 4+ messages in thread