* [PATCH 3/3 v2] xtensa: Optimize 'cstoresi4' insn pattern [not found] <b861d41b-b48e-6e3e-8e62-dd21d0362101.ref@yahoo.co.jp> @ 2023-05-30 9:51 ` Takayuki 'January June' Suwa 2023-05-31 4:37 ` Max Filippov 0 siblings, 1 reply; 2+ messages in thread From: Takayuki 'January June' Suwa @ 2023-05-30 9:51 UTC (permalink / raw) To: GCC Patches; +Cc: Max Filippov Resubmitting the correct one due to a mistake in merging order of fixes. --- This patch introduces more optimized implementations for the 6 cstoresi4 insn comparison methods (eq/ne/lt/le/gt/ge, however, required TARGET_NSA for eq). gcc/ChangeLog: * config/xtensa/xtensa.cc (xtensa_expand_scc): Add dedicated optimization code for cstoresi4 (eq/ne/gt/ge/lt/le). * config/xtensa/xtensa.md (xtensa_ge_zero): Rename from '*signed_ge_zero', because it had to be called from 'xtensa_expand_scc()'. --- gcc/config/xtensa/xtensa.cc | 106 ++++++++++++++++++++++++++++++++---- gcc/config/xtensa/xtensa.md | 2 +- 2 files changed, 96 insertions(+), 12 deletions(-) diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc index 3b5d25b660a..64efd3d7287 100644 --- a/gcc/config/xtensa/xtensa.cc +++ b/gcc/config/xtensa/xtensa.cc @@ -991,24 +991,108 @@ xtensa_expand_conditional_move (rtx *operands, int isflt) int xtensa_expand_scc (rtx operands[4], machine_mode cmp_mode) { - rtx dest = operands[0]; - rtx cmp; - rtx one_tmp, zero_tmp; + rtx dest = operands[0], op0 = operands[2], op1 = operands[3]; + enum rtx_code code = GET_CODE (operands[1]); + rtx cmp, tmp0, tmp1; rtx (*gen_fn) (rtx, rtx, rtx, rtx, rtx); - if (!(cmp = gen_conditional_move (GET_CODE (operands[1]), cmp_mode, - operands[2], operands[3]))) - return 0; + /* Dedicated optimizations for cstoresi4. + a. In a magnitude comparison operator, swapping both sides and + inverting magnitude does not change the result, + eg. '(x >= y) != (y <= x)' is a constant of zero + (GE is changed to LE, not LT). + b. Due to room for further optimization, we use subtraction rather + than XOR (the default for RTL expansion of EQ/NE) as the binary + operation which is zero if both sides are the same and non-zero + otherwise. */ + if (cmp_mode == SImode) + switch (code) + { + /* EQ(op0, op1) := clz(op0 - op1) / 32 [requires TARGET_NSA] */ + case EQ: + if (!TARGET_NSA) + break; + /* EQ to EQZ conversion by subtracting op1 from op0. */ + emit_move_insn (dest, + expand_binop (SImode, sub_optab, op0, op1, + 0, 0, OPTAB_LIB_WIDEN)); + /* NSAU instruction will return 32 iff the source is zero, + zero through 31 otherwise (See Xtensa ISA Reference Manual, + p. 462) */ + emit_insn (gen_clzsi2 (dest, dest)); + emit_insn (gen_lshrsi3 (dest, dest, GEN_INT (5))); + return 1; + + /* NE(op0, op1) := (op0 - op1) == 0 ? 0 : 1 */ + case NE: + /* NE to NEZ conversion by subtracting op1 from op0. */ + emit_move_insn (tmp0 = gen_reg_rtx (SImode), + expand_binop (SImode, sub_optab, op0, op1, + 0, 0, OPTAB_LIB_WIDEN)); + emit_move_insn (dest, const_true_rtx); + emit_move_insn (dest, + gen_rtx_fmt_eee (IF_THEN_ELSE, SImode, + gen_rtx_fmt_ee (EQ, VOIDmode, + tmp0, const0_rtx), + tmp0, dest)); + return 1; + + case LE: + if (REG_P (op1)) + { + /* LE to GE conversion by swapping both sides. */ + tmp0 = op0, op0 = op1, op1 = tmp0; + goto case_GE_reg; + } + /* LE to LT conversion by adding one to op1. */ + op1 = GEN_INT (INTVAL (op1) + 1); + /* fallthru */ + + /* LT(op0, op1) := (unsigned)(op0 - op1) >> 31 */ + case LT: +case_LT: + /* LT to LTZ conversion by subtracting op1 from op0. */ + emit_move_insn (dest, + expand_binop (SImode, sub_optab, op0, op1, + 0, 0, OPTAB_LIB_WIDEN)); + emit_insn (gen_lshrsi3 (dest, dest, GEN_INT (31))); + return 1; + + case GE: + if (REG_P (op1)) + { +case_GE_reg: + /* GE to GEZ conversion by subtracting op1 from op0. */ + emit_move_insn (dest, + expand_binop (SImode, sub_optab, op0, op1, + 0, 0, OPTAB_LIB_WIDEN)); + /* Emitting the dedicated insn pattern. */ + emit_insn (gen_xtensa_ge_zero (dest, dest)); + return 1; + } + /* GE to GT conversion by subtracting one from op1. */ + op1 = GEN_INT (INTVAL (op1) - 1); + /* fallthru */ - one_tmp = gen_reg_rtx (SImode); - zero_tmp = gen_reg_rtx (SImode); - emit_insn (gen_movsi (one_tmp, const_true_rtx)); - emit_insn (gen_movsi (zero_tmp, const0_rtx)); + case GT: + /* GT to LT conversion by swapping both sides. */ + tmp0 = op0, op0 = op1, op1 = tmp0; + goto case_LT; + default: + break; + } + + if (! (cmp = gen_conditional_move (code, cmp_mode, op0, op1))) + return 0; + + tmp0 = force_reg (SImode, const0_rtx); + tmp1 = force_reg (SImode, const_true_rtx); gen_fn = (cmp_mode == SImode ? gen_movsicc_internal0 : gen_movsicc_internal1); - emit_insn (gen_fn (dest, XEXP (cmp, 0), one_tmp, zero_tmp, cmp)); + emit_insn (gen_fn (dest, XEXP (cmp, 0), tmp1, tmp0, cmp)); + return 1; } diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md index 6882baaedfd..ebc305bd387 100644 --- a/gcc/config/xtensa/xtensa.md +++ b/gcc/config/xtensa/xtensa.md @@ -3136,7 +3136,7 @@ (const_int 5) (const_int 6)))]) -(define_insn_and_split "*signed_ge_zero" +(define_insn_and_split "xtensa_ge_zero" [(set (match_operand:SI 0 "register_operand" "=a") (ge:SI (match_operand:SI 1 "register_operand" "r") (const_int 0)))] -- 2.30.2 ^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH 3/3 v2] xtensa: Optimize 'cstoresi4' insn pattern 2023-05-30 9:51 ` [PATCH 3/3 v2] xtensa: Optimize 'cstoresi4' insn pattern Takayuki 'January June' Suwa @ 2023-05-31 4:37 ` Max Filippov 0 siblings, 0 replies; 2+ messages in thread From: Max Filippov @ 2023-05-31 4:37 UTC (permalink / raw) To: Takayuki 'January June' Suwa; +Cc: GCC Patches Hi Suwa-san, On Tue, May 30, 2023 at 2:51 AM Takayuki 'January June' Suwa <jjsuwa_sys3175@yahoo.co.jp> wrote: > > Resubmitting the correct one due to a mistake in merging order of fixes. > --- > This patch introduces more optimized implementations for the 6 cstoresi4 > insn comparison methods (eq/ne/lt/le/gt/ge, however, required TARGET_NSA > for eq). > > gcc/ChangeLog: > > * config/xtensa/xtensa.cc (xtensa_expand_scc): > Add dedicated optimization code for cstoresi4 (eq/ne/gt/ge/lt/le). > * config/xtensa/xtensa.md (xtensa_ge_zero): > Rename from '*signed_ge_zero', because it had to be called from > 'xtensa_expand_scc()'. > --- > gcc/config/xtensa/xtensa.cc | 106 ++++++++++++++++++++++++++++++++---- > gcc/config/xtensa/xtensa.md | 2 +- > 2 files changed, 96 insertions(+), 12 deletions(-) This change introduces a bunch of testsuite failures: +FAIL: gcc.c-torture/execute/20070623-1.c -O0 execution test +FAIL: gcc.c-torture/execute/20070623-1.c -O1 execution test +FAIL: gcc.c-torture/execute/20070623-1.c -O2 execution test +FAIL: gcc.c-torture/execute/20070623-1.c -O3 -g execution test +FAIL: gcc.c-torture/execute/20070623-1.c -Os execution test +FAIL: gcc.c-torture/execute/20070623-1.c -O2 -flto -fno-use-linker-plugin -flto-partition=none execution test +FAIL: gcc.c-torture/execute/20070623-1.c -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects execution test +FAIL: gcc.c-torture/execute/920612-1.c -O0 execution test +FAIL: gcc.c-torture/execute/920612-1.c -O1 execution test +FAIL: gcc.c-torture/execute/920612-1.c -O2 execution test +FAIL: gcc.c-torture/execute/920612-1.c -O3 -g execution test +FAIL: gcc.c-torture/execute/920612-1.c -Os execution test +FAIL: gcc.c-torture/execute/920612-1.c -O2 -flto -fno-use-linker-plugin -flto-partition=none execution test +FAIL: gcc.c-torture/execute/int-compare.c -O0 execution test +FAIL: gcc.c-torture/execute/int-compare.c -O1 execution test +FAIL: gcc.c-torture/execute/int-compare.c -O2 execution test +FAIL: gcc.c-torture/execute/int-compare.c -O3 -g execution test +FAIL: gcc.c-torture/execute/int-compare.c -Os execution test +FAIL: gcc.c-torture/execute/int-compare.c -O2 -flto -fno-use-linker-plugin -flto-partition=none execution test +FAIL: gcc.c-torture/execute/pr28651.c -O0 execution test +FAIL: gcc.c-torture/execute/pr28651.c -O1 execution test +FAIL: gcc.c-torture/execute/pr28651.c -O2 execution test +FAIL: gcc.c-torture/execute/pr28651.c -O3 -g execution test +FAIL: gcc.c-torture/execute/pr28651.c -Os execution test +FAIL: gcc.c-torture/execute/pr28651.c -O2 -flto -fno-use-linker-plugin -flto-partition=none execution test +FAIL: gcc.c-torture/execute/pr55137.c -O0 execution test +FAIL: gcc.c-torture/execute/pr55137.c -O1 execution test +FAIL: gcc.c-torture/execute/pr55137.c -O2 execution test +FAIL: gcc.c-torture/execute/pr55137.c -O3 -g execution test +FAIL: gcc.c-torture/execute/pr55137.c -Os execution test +FAIL: gcc.c-torture/execute/pr55137.c -O2 -flto -fno-use-linker-plugin -flto-partition=none execution test +FAIL: gcc.dg/pr61045.c execution test +FAIL: gcc.dg/signbit-6.c execution test +FAIL: c-c++-common/torture/builtin-arith-overflow-12.c -O2 execution test +FAIL: c-c++-common/torture/builtin-arith-overflow-12.c -O2 -flto -fno-use-linker-plugin -flto-partition=none execution test +FAIL: c-c++-common/torture/builtin-arith-overflow-12.c -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects execution test +FAIL: c-c++-common/torture/builtin-arith-overflow-13.c -O2 execution test +FAIL: c-c++-common/torture/builtin-arith-overflow-13.c -O2 -flto -fno-use-linker-plugin -flto-partition=none execution test +FAIL: c-c++-common/torture/builtin-arith-overflow-13.c -O2 -flto -fuse-linker-plugin -fno-fat-lto-objects execution test +FAIL: c-c++-common/torture/builtin-arith-overflow-14.c -O2 execution test +FAIL: c-c++-common/torture/builtin-arith-overflow-14.c -O2 -flto -fno-use-linker-plugin -flto-partition=none execution test +FAIL: c-c++-common/torture/builtin-arith-overflow-p-14.c -O2 execution test +FAIL: c-c++-common/torture/builtin-arith-overflow-p-14.c -O2 -flto -fno-use-linker-plugin -flto-partition=none execution test +FAIL: gcc.dg/torture/pr49958.c -O0 execution test +FAIL: gcc.dg/torture/pr49958.c -O1 execution test +FAIL: gcc.dg/torture/pr49958.c -O2 execution test +FAIL: gcc.dg/torture/pr49958.c -O3 -g execution test +FAIL: gcc.dg/torture/pr49958.c -Os execution test +FAIL: gcc.dg/torture/pr49958.c -O2 -flto -fno-use-linker-plugin -flto-partition=none execution test +FAIL: gcc.dg/tree-ssa/pr68714.c (internal compiler error: in decompose, at rtl.h:2297) +FAIL: gcc.dg/tree-ssa/pr68714.c (test for excess errors) +FAIL: gcc.dg/tree-ssa/pr81346-4.c execution test > diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc > index 3b5d25b660a..64efd3d7287 100644 > --- a/gcc/config/xtensa/xtensa.cc > +++ b/gcc/config/xtensa/xtensa.cc > @@ -991,24 +991,108 @@ xtensa_expand_conditional_move (rtx *operands, int isflt) > int > xtensa_expand_scc (rtx operands[4], machine_mode cmp_mode) > { > - rtx dest = operands[0]; > - rtx cmp; > - rtx one_tmp, zero_tmp; > + rtx dest = operands[0], op0 = operands[2], op1 = operands[3]; > + enum rtx_code code = GET_CODE (operands[1]); > + rtx cmp, tmp0, tmp1; > rtx (*gen_fn) (rtx, rtx, rtx, rtx, rtx); > > - if (!(cmp = gen_conditional_move (GET_CODE (operands[1]), cmp_mode, > - operands[2], operands[3]))) > - return 0; > + /* Dedicated optimizations for cstoresi4. > + a. In a magnitude comparison operator, swapping both sides and > + inverting magnitude does not change the result, > + eg. '(x >= y) != (y <= x)' is a constant of zero > + (GE is changed to LE, not LT). > + b. Due to room for further optimization, we use subtraction rather > + than XOR (the default for RTL expansion of EQ/NE) as the binary > + operation which is zero if both sides are the same and non-zero > + otherwise. */ > + if (cmp_mode == SImode) > + switch (code) > + { > + /* EQ(op0, op1) := clz(op0 - op1) / 32 [requires TARGET_NSA] */ > + case EQ: > + if (!TARGET_NSA) > + break; > + /* EQ to EQZ conversion by subtracting op1 from op0. */ > + emit_move_insn (dest, > + expand_binop (SImode, sub_optab, op0, op1, > + 0, 0, OPTAB_LIB_WIDEN)); > + /* NSAU instruction will return 32 iff the source is zero, > + zero through 31 otherwise (See Xtensa ISA Reference Manual, > + p. 462) */ > + emit_insn (gen_clzsi2 (dest, dest)); > + emit_insn (gen_lshrsi3 (dest, dest, GEN_INT (5))); > + return 1; > + > + /* NE(op0, op1) := (op0 - op1) == 0 ? 0 : 1 */ > + case NE: > + /* NE to NEZ conversion by subtracting op1 from op0. */ > + emit_move_insn (tmp0 = gen_reg_rtx (SImode), > + expand_binop (SImode, sub_optab, op0, op1, > + 0, 0, OPTAB_LIB_WIDEN)); > + emit_move_insn (dest, const_true_rtx); > + emit_move_insn (dest, > + gen_rtx_fmt_eee (IF_THEN_ELSE, SImode, > + gen_rtx_fmt_ee (EQ, VOIDmode, > + tmp0, const0_rtx), > + tmp0, dest)); > + return 1; > + > + case LE: > + if (REG_P (op1)) > + { > + /* LE to GE conversion by swapping both sides. */ > + tmp0 = op0, op0 = op1, op1 = tmp0; > + goto case_GE_reg; > + } > + /* LE to LT conversion by adding one to op1. */ > + op1 = GEN_INT (INTVAL (op1) + 1); > + /* fallthru */ > + > + /* LT(op0, op1) := (unsigned)(op0 - op1) >> 31 */ This doesn't work (as demonstrated by the gcc.c-torture/execute/20070623-1.c) when an overflow occurs, e.g. for op0 == INT_MIN, op1 == INT_MAX. Maybe the dedicated instructions salt / saltu could be used in that pattern? They don't have their own XCHAL_* macros, but according to the ISA book they were introduced in RG-2015.0, which I believe could be tested as follows: #define TARGET_SALT (XTENSA_MARCH_EARLIEST >= 270000) -- Thanks. -- Max ^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2023-05-31 4:37 UTC | newest] Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- [not found] <b861d41b-b48e-6e3e-8e62-dd21d0362101.ref@yahoo.co.jp> 2023-05-30 9:51 ` [PATCH 3/3 v2] xtensa: Optimize 'cstoresi4' insn pattern Takayuki 'January June' Suwa 2023-05-31 4:37 ` Max Filippov
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).