public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug target/59533] New: [SH] Missed cmp/pz opportunity
@ 2013-12-16 21:59 olegendo at gcc dot gnu.org
  2013-12-17  0:32 ` [Bug target/59533] " olegendo at gcc dot gnu.org
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: olegendo at gcc dot gnu.org @ 2013-12-16 21:59 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=59533

            Bug ID: 59533
           Summary: [SH] Missed cmp/pz opportunity
           Product: gcc
           Version: 4.9.0
            Status: UNCONFIRMED
          Severity: enhancement
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: olegendo at gcc dot gnu.org
            Target: sh*-*-*

In some cases RTL such as

(insn 7 6 8 2 (set (reg:SI 169)
        (not:SI (reg:SI 168))) sh_tmp.cpp:31 -1
     (nil))
(insn 8 7 9 2 (parallel [
            (set (reg:SI 167 [ D.1462+-3 ])
                (lshiftrt:SI (reg:SI 169)
                    (const_int 31 [0x1f])))
            (clobber (reg:SI 147 t))
        ]) sh_tmp.cpp:31 -1
     (nil))

is expanded to refer to the MSB of a word.
In most cases combine can catch such a sequence and generate a cmp/pz insn
(set (reg:SI T_REG) (ge:SI (...) (const_int 0))).  However, in cases where the
extracted MSB is not stored in the T bit, it fails to do so.
Here are a few test functions, compiled with -O2 -m4:

int test0_OK (unsigned char* a)
{
  return a[0] < 128;

/*
        mov.b   @r4,r1
        cmp/pz  r1
        rts
        movt    r0
*/
}

int test1_NG (unsigned char* a, unsigned char* b)
{
  return a[0] + (a[0] < 128);

/*
        mov.b   @r4,r1
        extu.b  r1,r0
        exts.b  r1,r1    // the redundant sign extension is another issue
        not     r1,r1
        shll    r1       // could use 'cmp/pz r1' here.
        mov     #0,r1
        rts
        addc    r1,r0
*/
}

int test2_NG (unsigned char* a, unsigned char* b)
{
  return a[0] + ((a[0] & 0x80) == 0);
/*
        mov.b   @r4,r1
        extu.b  r1,r0
        exts.b  r1,r1    // the redundant sign extension is another issue
        not     r1,r1
        shll    r1       // could use 'cmp/pz r1' here.
        mov     #0,r1
        rts
        addc    r1,r0
*/
}

int test3_OK (unsigned char* a, unsigned char* b)
{
  return a[0] + (a[0] > 127);
/*
        mov.b   @r4,r1
        extu.b  r1,r0
        exts.b  r1,r1
        shll    r1
        mov     #0,r1
        rts
        addc    r1,r0
*/
}

int test4_OK (unsigned char* a, unsigned char* b)
{
  return a[0] + ((a[0] & 0x80) != 0);
/*
        mov.b   @r4,r1
        extu.b  r1,r0
        exts.b  r1,r1
        shll    r1
        mov     #0,r1
        rts
        addc    r1,r0
*/
}

int test5_OK (unsigned char* a, int b, int c)
{
  if (a[0] < 128)
    return c;
  else
    return b + 50;

/*
        mov.b   @r4,r1
        cmp/pz  r1
        bt      .L9
        mov     r5,r6
        add     #50,r6
.L9:
        rts
        mov     r6,r0
*/
}

unsigned int test6_OK (unsigned int a)
{
  return ~a >> 31;

/*
        cmp/pz  r4
        rts
        movt    r0
*/
}


The following patch fixes the issue:

Index: gcc/config/sh/sh.md
===================================================================
--- gcc/config/sh/sh.md    (revision 205971)
+++ gcc/config/sh/sh.md    (working copy)
@@ -4707,13 +4707,28 @@
   DONE;
 })

-(define_insn "shll"
+(define_insn_and_split "shll"
   [(set (match_operand:SI 0 "arith_reg_dest" "=r")
     (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0") (const_int 1)))
    (set (reg:SI T_REG)
     (lt:SI (match_dup 1) (const_int 0)))]
   "TARGET_SH1"
   "shll    %0"
+  "&& can_create_pseudo_p ()"
+  [(set (reg:SI T_REG) (ge:SI (match_dup 2) (const_int 0)))
+   (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 1)))]
+{
+  rtx i = curr_insn;
+  rtx s = find_last_value (operands[1], &i, curr_insn, true);
+
+  if (s == operands[1] || i == curr_insn)
+    FAIL;
+
+  if (GET_CODE (s) == NOT && REG_P (XEXP (s, 0)))
+    operands[2] = XEXP (s, 0);
+  else
+    FAIL;
+}
   [(set_attr "type" "arith")])

 (define_insn "*ashlsi_c_void"


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [Bug target/59533] [SH] Missed cmp/pz opportunity
  2013-12-16 21:59 [Bug target/59533] New: [SH] Missed cmp/pz opportunity olegendo at gcc dot gnu.org
@ 2013-12-17  0:32 ` olegendo at gcc dot gnu.org
  2013-12-17  1:43 ` olegendo at gcc dot gnu.org
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: olegendo at gcc dot gnu.org @ 2013-12-17  0:32 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=59533

--- Comment #1 from Oleg Endo <olegendo at gcc dot gnu.org> ---
The shll trick above will not work properly in the following case:

int
test_00 (unsigned char* a, int b)
{
  return a[0] - (a[0] < 128);
}

results in:
        mov.b   @r4,r1
        extu.b  r1,r0
        exts.b  r1,r1
        cmp/pz  r1
        movt    r1
        rts
        sub     r1,r0

It is initially expanded as

(insn 10 9 11 2 (set (reg:SI 173)
        (not:SI (reg:SI 172))) sh_tmp.cpp:188 -1
     (nil))
(insn 11 10 12 2 (parallel [
            (set (reg:SI 171 [ D.1381+-3 ])
                (lshiftrt:SI (reg:SI 173)
                    (const_int 31 [0x1f])))
            (clobber (reg:SI 147 t))
        ]) sh_tmp.cpp:188 -1
     (nil))
(insn 12 11 13 2 (set (reg:SI 170 [ D.1379 ])
        (minus:SI (reg:SI 160 [ D.1378+-3 ])
            (reg:SI 171 [ D.1381+-3 ]))) sh_tmp.cpp:188 -1
     (nil))

and then combine will not try to integrate the not insn and only try:
Failed to match this instruction:
(set (reg:SI 170 [ D.1379 ])
    (minus:SI (zero_extend:SI (reg:QI 169 [ *a_2(D) ]))
        (lshiftrt:SI (reg:SI 173)
            (const_int 31 [0x1f]))))

The cmp/pz insn will be split out after combine and thus it will not be
combined into a subc insn.

One problem is that the function emit_store_flag_1 in expmed.c always expands
the not-shift because the assumption there is that it's cheaper.  This is not
true for SH and ideally it should check insn costs during expansion.



On the other hand, there are other cases such as

unsigned int
test_00 (unsigned int a)
{
  return (a >> 31) ^ 1;
}

which currently results in:
        shll    r4
        movt    r0
        rts
        xor     #1,r0

and can be done simpler as:
        cmp/pz  r4
        rts
        movt    r0

In this case combine will try to merge the shift and the xor into:
Failed to match this instruction:
(set (reg:SI 164 [ D.1394 ])
    (ge:SI (reg:SI 4 r4 [ a ])
        (const_int 0 [0])))

Adding such a pattern will also result in cmp/pz being used for all the other
cases above (without the shll trick), but it will fail to combine with any
other insn that takes the result as an operand in the T_REG, such as addc, subc
or rotcl/rotcr.  In order to get that working all the insns that can take T_REG
as an operand would require insn_and_split variants to include (ge:SI (reg:SI
(const_int 0)), which will be quite a lot of patterns.

Maybe it would be better to do a kind of SH specific like RTL pass that
performs insn pre-combination before the generic combine pass and catches such
special cases.  It could also probably be used to address issues mentioned in
PR 59291.

Alternatively the combine pass could be ran twice on SH, although it seems to
cause some problems.  If it worked, I'm afraid it would probably have a
negative impact on compilation time


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [Bug target/59533] [SH] Missed cmp/pz opportunity
  2013-12-16 21:59 [Bug target/59533] New: [SH] Missed cmp/pz opportunity olegendo at gcc dot gnu.org
  2013-12-17  0:32 ` [Bug target/59533] " olegendo at gcc dot gnu.org
@ 2013-12-17  1:43 ` olegendo at gcc dot gnu.org
  2013-12-17 21:07 ` olegendo at gcc dot gnu.org
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: olegendo at gcc dot gnu.org @ 2013-12-17  1:43 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=59533

--- Comment #2 from Oleg Endo <olegendo at gcc dot gnu.org> ---
I have quickly tried adding a peephole pass shortly after initial RTL
expansion:

Index: gcc/config/sh/sh.c
===================================================================
--- gcc/config/sh/sh.c    (revision 205971)
+++ gcc/config/sh/sh.c    (working copy)
@@ -735,6 +735,10 @@
   if (!TARGET_SH1)
     return;

+  opt_pass* pre_combine = make_pass_peephole2 (g);
+  pre_combine->name = "pre_peephole";
+  register_pass (pre_combine, PASS_POS_INSERT_AFTER, "dfinit", 1);
+
 /* Running the sh_treg_combine pass after ce1 generates better code when
    comparisons are combined and reg-reg moves are introduced, because
    reg-reg moves will be eliminated afterwards.  However, there are quite

and adding the following peepholes (the existing define_peephole2 patterns need
a && reload_completed condition)

Index: gcc/config/sh/sh.md
===================================================================
--- gcc/config/sh/sh.md    (revision 205971)
+++ gcc/config/sh/sh.md    (working copy)
@@ -821,6 +821,29 @@
     cmp/ge    %1,%0"
   [(set_attr "type" "mt_group")])

+;; This peephole will be done after RTL expansion before combine.
+(define_peephole2
+  [(parallel [(set (match_operand:SI 0 "arith_reg_dest")
+           (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand")
+                (const_int 31)))
+          (clobber (reg:SI T_REG))])
+   (set (match_operand:SI 2 "arith_reg_dest")
+    (xor:SI (match_dup 0) (const_int 1)))]
+  "TARGET_SH1 && can_create_pseudo_p ()"
+  [(set (reg:SI T_REG) (ge:SI (match_dup 1) (const_int 0)))
+   (set (match_dup 2) (reg:SI T_REG))])
+
+(define_peephole2
+  [(set (match_operand:SI 0 "arith_reg_dest")
+    (not:SI (match_operand:SI 1 "arith_reg_operand")))
+   (parallel [(set (match_operand:SI 2 "arith_reg_dest")
+           (lshiftrt:SI (match_dup 0) (const_int 31)))
+          (clobber (reg:SI T_REG))])]
+  "TARGET_SH1 && can_create_pseudo_p ()"
+  [(set (reg:SI T_REG) (ge:SI (match_dup 1) (const_int 0)))
+   (set (match_dup 2) (reg:SI T_REG))
+   (set (match_dup 0) (not:SI (match_dup 1)))])
+

This allows generating the cmp/pz insn before combine and thus it will be able
to combine it with other insns that use T_REG as an operand:

unsigned int
test_011 (unsigned int a, unsigned int b)
{
  return (a << 1) | ((a >> 31) ^ 1);
}

will compile to:
        cmp/pz  r4
        mov     r4,r0
        rts
        rotcl   r0


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [Bug target/59533] [SH] Missed cmp/pz opportunity
  2013-12-16 21:59 [Bug target/59533] New: [SH] Missed cmp/pz opportunity olegendo at gcc dot gnu.org
  2013-12-17  0:32 ` [Bug target/59533] " olegendo at gcc dot gnu.org
  2013-12-17  1:43 ` olegendo at gcc dot gnu.org
@ 2013-12-17 21:07 ` olegendo at gcc dot gnu.org
  2015-01-24 13:05 ` olegendo at gcc dot gnu.org
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: olegendo at gcc dot gnu.org @ 2013-12-17 21:07 UTC (permalink / raw)
  To: gcc-bugs

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=59533

--- Comment #3 from Oleg Endo <olegendo at gcc dot gnu.org> ---
This is basically the same issue as PR 54685.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [Bug target/59533] [SH] Missed cmp/pz opportunity
  2013-12-16 21:59 [Bug target/59533] New: [SH] Missed cmp/pz opportunity olegendo at gcc dot gnu.org
                   ` (2 preceding siblings ...)
  2013-12-17 21:07 ` olegendo at gcc dot gnu.org
@ 2015-01-24 13:05 ` olegendo at gcc dot gnu.org
  2015-01-25 17:08 ` olegendo at gcc dot gnu.org
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: olegendo at gcc dot gnu.org @ 2015-01-24 13:05 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59533

--- Comment #4 from Oleg Endo <olegendo at gcc dot gnu.org> ---
Author: olegendo
Date: Sat Jan 24 13:04:53 2015
New Revision: 220081

URL: https://gcc.gnu.org/viewcvs?rev=220081&root=gcc&view=rev
Log:
gcc/
    PR target/49263
    PR target/53987
    PR target/64345
    PR target/59533
    PR target/52933
    PR target/54236
    PR target/51244
    * config/sh/sh-protos.h
    (sh_extending_set_of_reg::can_use_as_unextended_reg,
    sh_extending_set_of_reg::use_as_unextended_reg,
    sh_is_nott_insn, sh_movt_set_dest, sh_movrt_set_dest, sh_is_movt_insn,
    sh_is_movrt_insn, sh_insn_operands_modified_between_p,
    sh_reg_dead_or_unused_after_insn, sh_in_recog_treg_set_expr,
    sh_recog_treg_set_expr, sh_split_treg_set_expr): New functions.
    (sh_treg_insns): New class.
    * config/sh/sh.c (TARGET_LEGITIMATE_COMBINED_INSN): Define target hook.
    (scope_counter): New class.
    (sh_legitimate_combined_insn, sh_is_nott_insn, sh_movt_set_dest,
    sh_movrt_set_dest, sh_reg_dead_or_unused_after_insn,
    sh_extending_set_of_reg::can_use_as_unextended_reg,
    sh_extending_set_of_reg::use_as_unextended_reg, sh_recog_treg_set_expr,
    sh_in_recog_treg_set_expr, sh_try_split_insn_simple,
    sh_split_treg_set_expr): New functions.
    (addsubcosts): Handle treg_set_expr.
    (sh_rtx_costs): Handle IF_THEN_ELSE and ZERO_EXTRACT.
    (sh_rtx_costs): Use arith_reg_operand in SIGN_EXTEND and ZERO_EXTEND.
    (sh_rtx_costs): Handle additional bit test patterns in EQ and AND cases.
    (sh_insn_operands_modified_between_p): Make non-static.
    * config/sh/predicates.md (zero_extend_movu_operand): Allow
    simple_mem_operand in addition to displacement_mem_operand.
    (zero_extend_operand): Don't allow zero_extend_movu_operand.
    (treg_set_expr, treg_set_expr_not_const01,
    arith_reg_or_treg_set_expr): New predicates.
    * config/sh/sh.md (tstsi_t): Use arith_reg_operand and
    arith_or_int_operand instead of logical_operand.  Convert to
    insn_and_split.  Try to optimize constant operand in splitter.
    (tsthi_t, tstqi_t): Fold into *tst<mode>_t.  Convert to insn_and_split.
    (*tstqi_t_zero): Delete.
    (*tst<mode>_t_subregs): Add !sh_in_recog_treg_set_expr split condition.
    (tstsi_t_and_not): Delete.
    (tst<mode>_t_zero_extract_eq): Rename to *tst<mode>_t_zero_extract.
    Convert to insn_and_split.
    (unnamed split, tstsi_t_zero_extract_xor,
    tstsi_t_zero_extract_subreg_xor_little,
    tstsi_t_zero_extract_subreg_xor_big): Delete.
    (*tstsi_t_shift_mask): New insn_and_split.
    (cmpeqsi_t, cmpgesi_t): Add new split for const_int 0 operands and try
    to recombine with surrounding insns when splitting.
    (*negtstsi): Add !sh_in_recog_treg_set_expr condition.
    (cmp_div0s_0, cmp_div0s_1, *cmp_div0s_0, *cmp_div0s_1): Rewrite as ...
    (cmp_div0s, *cmp_div0s_1, *cmp_div0s_2, *cmp_div0s_3, *cmp_div0s_4,
    *cmp_div0s_5, *cmp_div0s_6): ... these new insn_and_split patterns.
    (*cbranch_div0s: Delete.
    (*addc): Convert to insn_and_split.  Use treg_set_expr as 3rd operand.
    Try to recombine with surrounding insns when splitting.  Add operand
    order variants.
    (*addc_t_r, *addc_r_t): Use treg_set_expr_not_const01.
    (*addc_r_r_1, *addc_r_lsb, *addc_r_r_lsb, *addc_r_lsb_r, *addc_r_msb,
    *addc_r_r_msb, *addc_2r_msb): Delete.
    (*addc_2r_lsb): Rename to *addc_2r_t.  Use treg_set_expr.  Add operand
    order variant.
    (*addc_negreg_t): New insn_and_split.
    (*subc): Convert to insn_and_split.  Use treg_set_expr as 3rd operand.
    Try to recombine with surrounding insns when splitting.
    Add operand order variants.  
    (*subc_negt_reg, *subc_negreg_t, *reg_lsb_t, *reg_msb_t): New
    insn_and_split patterns.
    (*rotcr): Use arith_reg_or_treg_set_expr.  Try to recombine with
    surrounding insns when splitting.
    (unnamed rotcr split): Use arith_reg_or_treg_set_expr.
    (*rotcl): Likewise.  Add zero_extract variant.
    (*ashrsi2_31): New insn_and_split.
    (*negc): Convert to insn_and_split.  Use treg_set_expr.
    (*zero_extend<mode>si2_disp_mem): Update comment.
    (movrt_negc, *movrt_negc, nott): Add !sh_in_recog_treg_set_expr split
    condition.
    (*mov_t_msb_neg, mov_neg_si_t): Use treg_set_expr.  Try to recombine
    with surrounding insns when splitting.
    (any_treg_expr_to_reg): New insn_and_split.
    (*neg_zero_extract_0, *neg_zero_extract_1, *neg_zero_extract_2,
    *neg_zero_extract_3, *neg_zero_extract_4, *neg_zero_extract_5,
    *neg_zero_extract_6, *zero_extract_0, *zero_extract_1,
    *zero_extract_2): New single bit zero extract patterns.
    (bld_reg, *bld_regqi): Fold into bld<mode>_reg.
    (*get_thread_pointersi, store_gbr, *mov<mode>_gbr_load,
    *mov<mode>_gbr_load, *mov<mode>_gbr_load, *mov<mode>_gbr_load,
    *movdi_gbr_load): Use arith_reg_dest instead of register_operand for
    set destination.
    (set_thread_pointersi, load_gbr): Use arith_reg_operand instead of
    register_operand for set source.

gcc/testsuite/
    PR target/49263
    PR target/53987
    PR target/64345
    PR target/59533
    PR target/52933
    PR target/54236
    PR target/51244
    * gcc.target/sh/pr64345-1.c: New.
    * gcc.target/sh/pr64345-2.c: New.
    * gcc.target/sh/pr59533-1.c: New.
    * gcc.target/sh/pr49263.c: Adjust matching of expected insns.
    * gcc.target/sh/pr52933-2.c: Likewise.
    * gcc.target/sh/pr54089-1.c: Likewise.
    * gcc.target/sh/pr54236-1.c: Likewise.
    * gcc.target/sh/pr51244-20-sh2a.c: Likewise.
    * gcc.target/sh/pr49263-1.c: Remove xfails.
    * gcc.target/sh/pr49263-2.c: Likewise.
    * gcc.target/sh/pr49263-3.c: Likewise.
    * gcc.target/sh/pr53987-1.c: Likewise.
    * gcc.target/sh/pr52933-1.c: Adjust matching of expected insns.
    (test_24, test_25, test_26, test_27, test_28, test_29, test_30): New.
    * gcc.target/sh/pr51244-12.c: Adjust matching of expected insns.
    (test05, test06, test07, test08, test09, test10, test11, test12): New.
    * gcc.target/sh/pr54236-3.c: Adjust matching of expected insns.
    (test_002, test_003, test_004, test_005, test_006, test_007, test_008,
    test_009): New.
    * gcc.target/sh/pr51244-4.c: Adjust matching of expected insns.
    (test_02): New.

Added:
    trunk/gcc/testsuite/gcc.target/sh/pr59533-1.c
    trunk/gcc/testsuite/gcc.target/sh/pr64345-1.c
    trunk/gcc/testsuite/gcc.target/sh/pr64345-2.c
Modified:
    trunk/gcc/ChangeLog
    trunk/gcc/config/sh/predicates.md
    trunk/gcc/config/sh/sh-protos.h
    trunk/gcc/config/sh/sh.c
    trunk/gcc/config/sh/sh.md
    trunk/gcc/testsuite/ChangeLog
    trunk/gcc/testsuite/gcc.target/sh/pr49263-1.c
    trunk/gcc/testsuite/gcc.target/sh/pr49263-2.c
    trunk/gcc/testsuite/gcc.target/sh/pr49263-3.c
    trunk/gcc/testsuite/gcc.target/sh/pr49263.c
    trunk/gcc/testsuite/gcc.target/sh/pr51244-12.c
    trunk/gcc/testsuite/gcc.target/sh/pr51244-20-sh2a.c
    trunk/gcc/testsuite/gcc.target/sh/pr51244-4.c
    trunk/gcc/testsuite/gcc.target/sh/pr52933-1.c
    trunk/gcc/testsuite/gcc.target/sh/pr52933-2.c
    trunk/gcc/testsuite/gcc.target/sh/pr53987-1.c
    trunk/gcc/testsuite/gcc.target/sh/pr54089-1.c
    trunk/gcc/testsuite/gcc.target/sh/pr54236-1.c
    trunk/gcc/testsuite/gcc.target/sh/pr54236-3.c


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [Bug target/59533] [SH] Missed cmp/pz opportunity
  2013-12-16 21:59 [Bug target/59533] New: [SH] Missed cmp/pz opportunity olegendo at gcc dot gnu.org
                   ` (3 preceding siblings ...)
  2015-01-24 13:05 ` olegendo at gcc dot gnu.org
@ 2015-01-25 17:08 ` olegendo at gcc dot gnu.org
  2015-01-25 17:25 ` olegendo at gcc dot gnu.org
  2015-01-25 17:42 ` olegendo at gcc dot gnu.org
  6 siblings, 0 replies; 8+ messages in thread
From: olegendo at gcc dot gnu.org @ 2015-01-25 17:08 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59533

--- Comment #5 from Oleg Endo <olegendo at gcc dot gnu.org> ---
The issues mentioned above have been fixed by the treg_set_expr patch
(r220081).
There is one more thing, though:

int test2 (int x)
{
  return x < 0;
}

It's effectively a lshiftrt 31 and compiles to:
        shll    r4
        rts
        movt    r0

On SH2A the same can be done as:
        cmp/pz  r4
        rts
        movrt   r0

which avoids mutating the input operand of the comparison.  On non-SH2A this
would end up as:
        cmp/pz  r4
        mov     #-1,r0
        rts
        negc    r0,r0

which could be better than shll-movt in cases where the input operand is not
dead after the insn and even better if the -1 constant can be shared.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [Bug target/59533] [SH] Missed cmp/pz opportunity
  2013-12-16 21:59 [Bug target/59533] New: [SH] Missed cmp/pz opportunity olegendo at gcc dot gnu.org
                   ` (4 preceding siblings ...)
  2015-01-25 17:08 ` olegendo at gcc dot gnu.org
@ 2015-01-25 17:25 ` olegendo at gcc dot gnu.org
  2015-01-25 17:42 ` olegendo at gcc dot gnu.org
  6 siblings, 0 replies; 8+ messages in thread
From: olegendo at gcc dot gnu.org @ 2015-01-25 17:25 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59533

--- Comment #6 from Oleg Endo <olegendo at gcc dot gnu.org> ---
(In reply to Oleg Endo from comment #5)

The same would apply to:

int test (unsigned char* x, int y, int z)
{
   return ((*x >> 7) ^ 0) & 1;
}

which compiles to

        mov.b   @r4,r1
        shll    r1
        rts
        movt    r0

on non-SH2A.  The insn selection goes like this:
   zero_extract -> tstsi_t -> cmp/pz -> shll-movt


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [Bug target/59533] [SH] Missed cmp/pz opportunity
  2013-12-16 21:59 [Bug target/59533] New: [SH] Missed cmp/pz opportunity olegendo at gcc dot gnu.org
                   ` (5 preceding siblings ...)
  2015-01-25 17:25 ` olegendo at gcc dot gnu.org
@ 2015-01-25 17:42 ` olegendo at gcc dot gnu.org
  6 siblings, 0 replies; 8+ messages in thread
From: olegendo at gcc dot gnu.org @ 2015-01-25 17:42 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59533

--- Comment #7 from Oleg Endo <olegendo at gcc dot gnu.org> ---
If a shll is followed by a cbranch:

unsigned int
test_09_0 (int x, unsigned int y, unsigned int z)
{
  return ~(x >> 31) ? y : z;
}

        shll    r4
        bf      .L4
        mov     r6,r5
.L4:
        rts
        mov    r5,r0

it's better to use cmp/pz and invert the branch condition.
Combine is looking for the following pattern:

Failed to match this instruction:
(set (pc)
    (if_then_else (ge (reg:SI 4 r4 [ x ])
            (const_int 0 [0]))
        (label_ref:SI 17)
        (pc)))

One option could be adding a pattern like:

(define_insn_and_split "*cbranch"
  [(set (pc)
        (if_then_else (match_operand 0 "treg_set_expr")
                      (label_ref (match_operand 1))
                      (pc)))
   (clobber (reg:SI T_REG))]


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2015-01-25 17:42 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-12-16 21:59 [Bug target/59533] New: [SH] Missed cmp/pz opportunity olegendo at gcc dot gnu.org
2013-12-17  0:32 ` [Bug target/59533] " olegendo at gcc dot gnu.org
2013-12-17  1:43 ` olegendo at gcc dot gnu.org
2013-12-17 21:07 ` olegendo at gcc dot gnu.org
2015-01-24 13:05 ` olegendo at gcc dot gnu.org
2015-01-25 17:08 ` olegendo at gcc dot gnu.org
2015-01-25 17:25 ` olegendo at gcc dot gnu.org
2015-01-25 17:42 ` olegendo at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).