diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 3a1444d..509c0ee 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -16146,6 +16146,20 @@ ix86_fp_compare_code_to_integer (enum rtx_code code) } } +/* Override compare_by_pieces' default implementation using the state + of the CCZmode FLAGS_REG and sete instruction. TARGET is the integral + mode result, and FAIL_LABEL is the branch target of mismatched + comparisons. */ + +void +ix86_finish_compare_by_pieces (rtx target, rtx_code_label *fail_label) +{ + rtx tmp = gen_reg_rtx (QImode); + emit_label (fail_label); + ix86_expand_setcc (tmp, NE, gen_rtx_REG (CCZmode, FLAGS_REG), const0_rtx); + convert_move (target, tmp, 1); +} + /* Zero extend possibly SImode EXP to Pmode register. */ rtx ix86_zero_extend_to_Pmode (rtx exp) @@ -25127,6 +25141,8 @@ ix86_run_selftests (void) #undef TARGET_OVERLAP_OP_BY_PIECES_P #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true +#undef TARGET_FINISH_COMPARE_BY_PIECES +#define TARGET_FINISH_COMPARE_BY_PIECES ix86_finish_compare_by_pieces #undef TARGET_FLAGS_REGNUM #define TARGET_FLAGS_REGNUM FLAGS_REG diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 95ba56e..28d8361 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -6922,6 +6922,18 @@ particular mode from being used for block comparisons by returning a negative number from this hook. @end deftypefn +@deftypefn {Target Hook} void TARGET_FINISH_COMPARE_BY_PIECES (rtx @var{target}, rtx_code_label *@var{fail_label}) +Allow targets with a zero flag and suitable setcc instruction to provide +an alternate implementation for @code{compare_by_pieces}. The function +@code{compare_by_pieces} generates a sequence of equality tests that +branch to @var{failure_label} on mismatches, and fall through on success. +By default, this hook assigns @code{const1_rtx} to @var{target} in the +current basic block and @code{const0_rtx} to @var{target} in a new +@var{fail_label} basic block. Targets like x86 can take advantage +of the property that the condition codes/zero flag are appropriately +set to avoid introducing a new basic block. +@end deftypefn + @defmac MOVE_MAX_PIECES A C expression used by @code{move_by_pieces} to determine the largest unit a load or store used to copy memory is. Defaults to @code{MOVE_MAX}. diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 4ac96dc..45711db 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -4529,6 +4529,8 @@ If you don't define this, a reasonable default is used. @hook TARGET_COMPARE_BY_PIECES_BRANCH_RATIO +@hook TARGET_FINISH_COMPARE_BY_PIECES + @defmac MOVE_MAX_PIECES A C expression used by @code{move_by_pieces} to determine the largest unit a load or store used to copy memory is. Defaults to @code{MOVE_MAX}. diff --git a/gcc/expr.cc b/gcc/expr.cc index 868fa6e..25fca17 100644 --- a/gcc/expr.cc +++ b/gcc/expr.cc @@ -1923,7 +1923,6 @@ compare_by_pieces (rtx arg0, rtx arg1, unsigned HOST_WIDE_INT len, by_pieces_constfn a1_cfn, void *a1_cfn_data) { rtx_code_label *fail_label = gen_label_rtx (); - rtx_code_label *end_label = gen_label_rtx (); if (target == NULL_RTX || !REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER) @@ -1934,12 +1933,8 @@ compare_by_pieces (rtx arg0, rtx arg1, unsigned HOST_WIDE_INT len, data.run (); - emit_move_insn (target, const0_rtx); - emit_jump (end_label); - emit_barrier (); - emit_label (fail_label); - emit_move_insn (target, const1_rtx); - emit_label (end_label); + /* Allow the backend to override the default implementation. */ + targetm.finish_compare_by_pieces (target, fail_label); return target; } diff --git a/gcc/target.def b/gcc/target.def index 7d68429..0593917 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -3749,6 +3749,20 @@ negative number from this hook.", default_compare_by_pieces_branch_ratio) DEFHOOK +(finish_compare_by_pieces, + "Allow targets with a zero flag and suitable setcc instruction to provide\n\ +an alternate implementation for @code{compare_by_pieces}. The function\n\ +@code{compare_by_pieces} generates a sequence of equality tests that\n\ +branch to @var{failure_label} on mismatches, and fall through on success.\n\ +By default, this hook assigns @code{const1_rtx} to @var{target} in the\n\ +current basic block and @code{const0_rtx} to @var{target} in a new\n\ +@var{fail_label} basic block. Targets like x86 can take advantage\n\ +of the property that the condition codes/zero flag are appropriately\n\ +set to avoid introducing a new basic block.", + void, (rtx target, rtx_code_label *fail_label), + default_finish_compare_by_pieces) + +DEFHOOK (slow_unaligned_access, "This hook returns true if memory accesses described by the\n\ @var{mode} and @var{alignment} parameters have a cost many times greater\n\ diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc index e190369..767fe38 100644 --- a/gcc/targhooks.cc +++ b/gcc/targhooks.cc @@ -2094,6 +2094,24 @@ default_compare_by_pieces_branch_ratio (machine_mode) return 1; } +/* This hook allows the backend to modify/override code generation for + compare_by_pieces. Targets with a zero flag and a suitable setcc + function can use them instead of the default "compare ? 0 : 1" + implementation below. TARGET is the integral mode result and + FAIL_LABEL is the destination for comparison mismatches. */ + +void +default_finish_compare_by_pieces (rtx target, rtx_code_label *fail_label) +{ + rtx_code_label *end_label = gen_label_rtx (); + emit_move_insn (target, const0_rtx); + emit_jump (end_label); + emit_barrier (); + emit_label (fail_label); + emit_move_insn (target, const1_rtx); + emit_label (end_label); +} + /* Write PATCH_AREA_SIZE NOPs into the asm outfile FILE around a function entry. If RECORD_P is true and the target supports named sections, the location of the NOPs will be recorded in a special object section diff --git a/gcc/targhooks.h b/gcc/targhooks.h index 1a0db8d..b99fff6 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -237,6 +237,7 @@ extern bool default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT, enum by_pieces_operation, bool); extern int default_compare_by_pieces_branch_ratio (machine_mode); +extern void default_finish_compare_by_pieces (rtx, rtx_code_label *); extern void default_print_patchable_function_entry (FILE *, unsigned HOST_WIDE_INT, diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcmp-1.c b/gcc/testsuite/gcc.target/i386/pieces-memcmp-1.c new file mode 100644 index 0000000..de1d82f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pieces-memcmp-1.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +int foo(char *a) +{ + static const char t[] = "0123456789012345678901234567890"; + return __builtin_memcmp(a, &t[0], sizeof(t)) == 0; +} + +/* { dg-final { scan-assembler-not "xorl\[ \\t]*\\\$1," } } */