public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
  • * [AArch64][2/3] Optimize aarch64_add_constant to generate better addition sequences
           [not found] <626096f0-957f-98bd-5efa-faa0c14eb5ab@foss.arm.com>
           [not found] ` <b653851c-9587-a498-a8da-8d235b4ddbcc@foss.arm.com>
    @ 2016-07-20 13:03 ` Jiong Wang
      2016-07-20 14:13   ` Richard Earnshaw (lists)
      1 sibling, 1 reply; 9+ messages in thread
    From: Jiong Wang @ 2016-07-20 13:03 UTC (permalink / raw)
      To: GCC Patches
    
    [-- Attachment #1: Type: text/plain, Size: 1332 bytes --]
    
    This patch optimize immediate addition sequences generated by
    aarch64_add_constant.
    
    The current addition sequences generated are:
    
       * If immediate fit into unsigned 12bit range, generate single add/sub.
         
       * Otherwise if it fit into unsigned 24bit range, generate two add/sub.
    
       * Otherwise invoke general constant build function.
    
    
    This haven't considered the situation where immedate can't fit into
    unsigned 12bit range, but can fit into single mov instruction for which
    case we generate one move and one addition.  The move won't touch the
    destination register thus the sequences is better than two additions
    which both touch the destination register.
    
    
    This patch thus optimize the addition sequences into:
    
       * If immediate fit into unsigned 12bit range, generate single add/sub.
      
       * Otherwise if it fit into unsigned 24bit range, generate two add/sub.
         And don't do this if it fit into single move instruction, in which case
         move the immedaite to scratch register firstly, then generate one
         addition to add the scratch register to the destination register.
         
       * Otherwise invoke general constant build function.
    
    
    OK for trunk?
    
    gcc/
    2016-07-20  Jiong Wang  <jiong.wang@arm.com>
    
                 * config/aarch64/aarch64.c (aarch64_add_constant): Optimize
                 instruction sequences.
    
    
    [-- Attachment #2: build-const-2.patch --]
    [-- Type: text/x-patch, Size: 3283 bytes --]
    
    diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
    index aeea3b3ebc514663043ac8d7cd13361f06f78502..41844a101247c939ecb31f8a8c17cf79759255aa 100644
    --- a/gcc/config/aarch64/aarch64.c
    +++ b/gcc/config/aarch64/aarch64.c
    @@ -1865,6 +1865,47 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
       aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
     }
     
    +/* Add DELTA onto REGNUM in MODE, using SCRATCHREG to held intermediate value if
    +   it is necessary.  */
    +
    +static void
    +aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
    +		      HOST_WIDE_INT delta)
    +{
    +  HOST_WIDE_INT mdelta = abs_hwi (delta);
    +  rtx this_rtx = gen_rtx_REG (mode, regnum);
    +
    +  /* Do nothing if mdelta is zero.  */
    +  if (!mdelta)
    +    return;
    +
    +  /* We only need single instruction if the offset fit into add/sub.  */
    +  if (aarch64_uimm12_shift (mdelta))
    +    {
    +      emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta)));
    +      return;
    +    }
    +
    +  /* We need two add/sub instructions, each one perform part of the
    +     addition/subtraction, but don't this if the addend can be loaded into
    +     register by single instruction, in that case we prefer a move to scratch
    +     register following by addition.  */
    +  if (mdelta < 0x1000000 && !aarch64_move_imm (delta, mode))
    +    {
    +      HOST_WIDE_INT low_off = mdelta & 0xfff;
    +
    +      low_off = delta < 0 ? -low_off : low_off;
    +      emit_insn (gen_add2_insn (this_rtx, GEN_INT (low_off)));
    +      emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta - low_off)));
    +      return;
    +    }
    +
    +  /* Otherwise use generic function to handle all other situations.  */
    +  rtx scratch_rtx = gen_rtx_REG (mode, scratchreg);
    +  aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (delta), true, mode);
    +  emit_insn (gen_add2_insn (this_rtx, scratch_rtx));
    +}
    +
     static bool
     aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
     				 tree exp ATTRIBUTE_UNUSED)
    @@ -3337,44 +3378,6 @@ aarch64_final_eh_return_addr (void)
     				       - 2 * UNITS_PER_WORD));
     }
     
    -static void
    -aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
    -		      HOST_WIDE_INT delta)
    -{
    -  HOST_WIDE_INT mdelta = delta;
    -  rtx this_rtx = gen_rtx_REG (mode, regnum);
    -  rtx scratch_rtx = gen_rtx_REG (mode, scratchreg);
    -
    -  if (mdelta < 0)
    -    mdelta = -mdelta;
    -
    -  if (mdelta >= 4096 * 4096)
    -    {
    -      aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (delta), true, mode);
    -      emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
    -    }
    -  else if (mdelta > 0)
    -    {
    -      if (mdelta >= 4096)
    -	{
    -	  emit_insn (gen_rtx_SET (scratch_rtx, GEN_INT (mdelta / 4096)));
    -	  rtx shift = gen_rtx_ASHIFT (mode, scratch_rtx, GEN_INT (12));
    -	  if (delta < 0)
    -	    emit_insn (gen_rtx_SET (this_rtx,
    -				    gen_rtx_MINUS (mode, this_rtx, shift)));
    -	  else
    -	    emit_insn (gen_rtx_SET (this_rtx,
    -				    gen_rtx_PLUS (mode, this_rtx, shift)));
    -	}
    -      if (mdelta % 4096 != 0)
    -	{
    -	  scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
    -	  emit_insn (gen_rtx_SET (this_rtx,
    -				  gen_rtx_PLUS (mode, this_rtx, scratch_rtx)));
    -	}
    -    }
    -}
    -
     /* Output code to add DELTA to the first argument, and then jump
        to FUNCTION.  Used for C++ multiple inheritance.  */
     static void
    
    ^ permalink raw reply	[flat|nested] 9+ messages in thread

  • end of thread, other threads:[~2016-07-25 13:08 UTC | newest]
    
    Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
    -- links below jump to the message on this page --
         [not found] <626096f0-957f-98bd-5efa-faa0c14eb5ab@foss.arm.com>
         [not found] ` <b653851c-9587-a498-a8da-8d235b4ddbcc@foss.arm.com>
    2016-07-20 13:03   ` [AArch64][3/3] Migrate aarch64_expand_prologue/epilogue to aarch64_add_constant Jiong Wang
    2016-07-20 14:19     ` Richard Earnshaw (lists)
    2016-07-20 15:02       ` Jiong Wang
    2016-07-20 15:09         ` Richard Earnshaw (lists)
    2016-07-21 10:08         ` Richard Earnshaw (lists)
    2016-07-25  9:34           ` Jiong Wang
    2016-07-25 13:08             ` Richard Earnshaw (lists)
    2016-07-20 13:03 ` [AArch64][2/3] Optimize aarch64_add_constant to generate better addition sequences Jiong Wang
    2016-07-20 14:13   ` Richard Earnshaw (lists)
    

    This is a public inbox, see mirroring instructions
    for how to clone and mirror all data and code used for this inbox;
    as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).