From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 26194 invoked by alias); 24 Feb 2007 23:47:11 -0000 Received: (qmail 26166 invoked by uid 48); 24 Feb 2007 23:47:02 -0000 Date: Sat, 24 Feb 2007 23:47:00 -0000 Message-ID: <20070224234702.26165.qmail@sourceware.org> X-Bugzilla-Reason: CC References: Subject: [Bug target/30778] [4.3 Regression] invalid code generation for memset() with -mtune=k8 In-Reply-To: Reply-To: gcc-bugzilla@gcc.gnu.org To: gcc-bugs@gcc.gnu.org From: "hubicka at gcc dot gnu dot org" Mailing-List: contact gcc-bugs-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-bugs-owner@gcc.gnu.org X-SW-Source: 2007-02/txt/msg02785.txt.bz2 ------- Comment #6 from hubicka at gcc dot gnu dot org 2007-02-24 23:47 ------- Hi, this is version I ended up with. In general I don't think that code should handle with such a small constant sizes, we probably should bump move_by_pieces constants since it can do better job using proper moves not triggering partial memory stalls. This patch however fixes one extra pasto and makes the prologue test unconditional - I was a bit overagressive minimizing amount of RTL produced that only leads to bugs in side corners. Thanks a lot for looking into it - I should've dealt with this bug a lot sooner, but I was very busy last week... Honza Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 122292) +++ config/i386/i386.c (working copy) @@ -13522,7 +13522,7 @@ expand_movmem_epilogue (rtx destmem, rtx HOST_WIDE_INT countval = INTVAL (count); int offset = 0; - if ((countval & 0x16) && max_size > 16) + if ((countval & 0x10) && max_size > 16) { if (TARGET_64BIT) { @@ -13539,8 +13539,8 @@ expand_movmem_epilogue (rtx destmem, rtx emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset); else { - emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset); - emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 4); + emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset); + emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4); } offset += 8; } @@ -13675,7 +13675,7 @@ expand_setmem_epilogue (rtx destmem, rtx HOST_WIDE_INT countval = INTVAL (count); int offset = 0; - if ((countval & 0x16) && max_size > 16) + if ((countval & 0x10) && max_size > 16) { if (TARGET_64BIT) { @@ -14118,19 +14118,22 @@ ix86_expand_movmem (rtx dst, rtx src, rt gcc_assert (desired_align >= 1 && align >= 1); /* Ensure that alignment prologue won't copy past end of block. */ - if ((size_needed > 1 || (desired_align > 1 && desired_align > align)) - && !count) + if (size_needed > 1 || (desired_align > 1 && desired_align > align)) { - epilogue_size_needed = MAX (size_needed - 1, desired_align - align); + enum machine_mode mode = GET_MODE (count_exp); + if (mode == VOIDmode) + mode = DImode; + + epilogue_size_needed = MAX (size_needed - 1, desired_align - align); /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes. Make sure it is power of 2. */ epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed); label = gen_label_rtx (); - emit_cmp_and_jump_insns (count_exp, + emit_cmp_and_jump_insns (force_reg (mode, count_exp), GEN_INT (epilogue_size_needed), - LTU, 0, GET_MODE (count_exp), 1, label); + LTU, 0, mode, 1, label); if (expected_size == -1 || expected_size < epilogue_size_needed) predict_jump (REG_BR_PROB_BASE * 60 / 100); else @@ -14430,11 +14433,14 @@ ix86_expand_setmem (rtx dst, rtx count_e promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed, desired_align, align); /* Ensure that alignment prologue won't copy past end of block. */ - if ((size_needed > 1 || (desired_align > 1 && desired_align > align)) - && !count) + if (size_needed > 1 || (desired_align > 1 && desired_align > align)) { - epilogue_size_needed = MAX (size_needed - 1, desired_align - align); + enum machine_mode mode = GET_MODE (count_exp); + + if (mode == VOIDmode) + mode = DImode; + epilogue_size_needed = MAX (size_needed - 1, desired_align - align); /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes. Make sure it is power of 2. */ epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed); @@ -14446,9 +14452,9 @@ ix86_expand_setmem (rtx dst, rtx count_e if (epilogue_size_needed > 2 && !promoted_val) force_loopy_epilogue = true; label = gen_label_rtx (); - emit_cmp_and_jump_insns (count_exp, + emit_cmp_and_jump_insns (force_reg (mode, count_exp), GEN_INT (epilogue_size_needed), - LTU, 0, GET_MODE (count_exp), 1, label); + LTU, 0, mode, 1, label); if (expected_size == -1 || expected_size <= epilogue_size_needed) predict_jump (REG_BR_PROB_BASE * 60 / 100); else -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=30778