public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* Ping: C-family stack check for threads
@ 2011-09-04 15:48 Thomas Klein
  2011-09-05  9:45 ` Ye Joey
  0 siblings, 1 reply; 13+ messages in thread
From: Thomas Klein @ 2011-09-04 15:48 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 2371 bytes --]

ping

references
http://gcc.gnu.org/ml/gcc-patches/2011-08/msg00216.html
http://gcc.gnu.org/ml/gcc-patches/2011-07/msg00281.html
http://gcc.gnu.org/ml/gcc-patches/2011-07/msg00149.html
http://gcc.gnu.org/ml/gcc-patches/2011-06/msg01872.html
http://gcc.gnu.org/ml/gcc-patches/2011-03/msg01226.html


gcc/ChangeLog

2011-09-04  Thomas Klein <th.r.klein@web.de>
     * opts.c (common_handle_option): introduce new parameters "direct" and
     "indirect"
     * flag-types.h (enum stack_check_type): Likewise

     * explow.c (allocate_dynamic_stack_space):
     - suppress stack probing if parameter "direct", "indirect" or if a
     stack-limit is given
     - do additional read of limit value if parameter "indirect" and a
     stack-limit symbol is given
     - emit a call to a stack_failure function [as an alternative to a trap
     call]
     (function probe_stack_range): if allowed to override the range porbe
     emit generic_limit_check_stack

     * config/arm/arm.c
     (stack_check_work_registers): new function to find possible working
     registers [only used by "stack check"]
     (emit_push_regs): add push RTL instruction without keeping regnumber
     and frame memory in mind.
     (emit_pop_regs): add pop RTL instruction to revert the above push
     (emit_stack_check_insns): new function to write RTL instructions for
     stack check at prologue stage.
     (arm_expand_prologue): stack check integration for ARM and Thumb-2
     (thumb1_output_function_prologue): stack check integration for Thumb-1

     * config/arm/arm.md
     (cbranchsi4_insn): allow compare and branch using stack pointer
     register [at thumb mode]
     (arm_cmpsi_insn): allow comparing using stack pointer register [at arm]
     (probe_stack): do not emit code when parameters "direct" or "indirect"
     is given, emit move code way same as in gcc/explow.c [function
     emit_stack_probe]
     (probe_stack_done): dummy to make sure probe_stack insns are not
     optimized away
     (generic_limit_check_stack): if stack-limit and parameter "generic" is
     given use the limit the same way as in function
     allocate_dynamic_stack_space
     (stack_failure): failure call used in stack check functions
     emit_stack_check_insns, generic_limit_check_stack or
     allocate_dynamic_stack_space [similar to a trap but avoid conflict with
     builtin_trap]


[-- Attachment #2: stackCheck.diff --]
[-- Type: text/plain, Size: 18163 bytes --]

Index: gcc/opts.c
===================================================================
--- gcc/opts.c	(revision 178508)
+++ gcc/opts.c	(working copy)
@@ -1644,6 +1644,12 @@ common_handle_option (struct gcc_options *opts,
 			   : STACK_CHECK_STATIC_BUILTIN
 			     ? STATIC_BUILTIN_STACK_CHECK
 			     : GENERIC_STACK_CHECK;
+      else if (!strcmp (arg, "indirect"))
+	/* This is an other stack checking method.  */
+	opts->x_flag_stack_check = INDIRECT_STACK_CHECK;
+      else if (!strcmp (arg, "direct"))
+	/* This is an other stack checking method.  */
+	opts->x_flag_stack_check = DIRECT_STACK_CHECK;
       else
 	warning_at (loc, 0, "unknown stack check parameter \"%s\"", arg);
       break;
Index: gcc/flag-types.h
===================================================================
--- gcc/flag-types.h	(revision 178508)
+++ gcc/flag-types.h	(working copy)
@@ -153,7 +153,15 @@ enum stack_check_type
 
   /* Check the stack and entirely rely on the target configuration
      files, i.e. do not use the generic mechanism at all.  */
-  FULL_BUILTIN_STACK_CHECK
+  FULL_BUILTIN_STACK_CHECK,
+
+  /* Check the stack (if possible) before allocation of local variables at
+     each function entry. The stack limit is directly given e.g. by address
+     of a symbol */
+  DIRECT_STACK_CHECK,
+  /* Check the stack (if possible) before allocation of local variables at
+     each function entry. The stack limit is given by global variable. */
+  INDIRECT_STACK_CHECK
 };
 
 /* Names for the different levels of -Wstrict-overflow=N.  The numeric
Index: gcc/explow.c
===================================================================
--- gcc/explow.c	(revision 178508)
+++ gcc/explow.c	(working copy)
@@ -1372,7 +1372,12 @@ allocate_dynamic_stack_space (rtx size, unsigned s
 
   /* If needed, check that we have the required amount of stack.  Take into
      account what has already been checked.  */
-  if (STACK_CHECK_MOVING_SP)
+  if (  STACK_CHECK_MOVING_SP 
+#ifdef HAVE_generic_limit_check_stack    
+     || crtl->limit_stack
+#endif
+     || flag_stack_check == DIRECT_STACK_CHECK
+     || flag_stack_check == INDIRECT_STACK_CHECK)
     ;
   else if (flag_stack_check == GENERIC_STACK_CHECK)
     probe_stack_range (STACK_OLD_CHECK_PROTECT + STACK_CHECK_MAX_FRAME_SIZE,
@@ -1409,19 +1414,32 @@ allocate_dynamic_stack_space (rtx size, unsigned s
       /* Check stack bounds if necessary.  */
       if (crtl->limit_stack)
 	{
+          rtx limit_rtx;
 	  rtx available;
 	  rtx space_available = gen_label_rtx ();
+          if (  GET_CODE (stack_limit_rtx) == SYMBOL_REF
+             && flag_stack_check == INDIRECT_STACK_CHECK)
+            limit_rtx = expand_unop (Pmode, mov_optab,
+				    gen_rtx_MEM (Pmode, stack_limit_rtx),
+				    NULL_RTX, 1);
+          else
+            limit_rtx = stack_limit_rtx;
 #ifdef STACK_GROWS_DOWNWARD
 	  available = expand_binop (Pmode, sub_optab,
-				    stack_pointer_rtx, stack_limit_rtx,
+				    stack_pointer_rtx, limit_rtx,
 				    NULL_RTX, 1, OPTAB_WIDEN);
 #else
 	  available = expand_binop (Pmode, sub_optab,
-				    stack_limit_rtx, stack_pointer_rtx,
+				    limit_rtx, stack_pointer_rtx,
 				    NULL_RTX, 1, OPTAB_WIDEN);
 #endif
 	  emit_cmp_and_jump_insns (available, size, GEU, NULL_RTX, Pmode, 1,
 				   space_available);
+#ifdef HAVE_stack_failure
+	  if (HAVE_stack_failure)
+	    emit_insn (gen_stack_failure ());
+	  else
+#endif
 #ifdef HAVE_trap
 	  if (HAVE_trap)
 	    emit_insn (gen_trap ());
@@ -1568,6 +1586,13 @@ probe_stack_range (HOST_WIDE_INT first, rtx size)
 	return;
     }
 #endif
+#ifdef HAVE_generic_limit_check_stack
+  else if (HAVE_generic_limit_check_stack)
+    {
+      rtx addr = memory_address (Pmode,stack_pointer_rtx);
+      emit_insn (gen_generic_limit_check_stack (addr));
+    }
+#endif
 
   /* Otherwise we have to generate explicit probes.  If we have a constant
      small number of them to generate, that's the easy case.  */
Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c	(revision 178508)
+++ gcc/config/arm/arm.c	(working copy)
@@ -16263,6 +16263,299 @@ thumb_set_frame_pointer (arm_stack_offsets *offset
   RTX_FRAME_RELATED_P (insn) = 1;
 }
 
+/*search for possible work registers for stack-check operation at prologue
+ return the number of register that can be used without extra push/pop */
+
+static int
+stack_check_work_registers (rtx *workreg)
+{
+  int reg, i, k, n, nregs;
+  
+  if (crtl->args.info.pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+    {
+      nregs = crtl->args.info.aapcs_next_ncrn;
+    }
+  else
+    nregs = crtl->args.info.nregs;
+
+
+  n = 0;
+  i = 0;
+  /* check if we can use one of the argument registers r0..r3 as long as they
+   * not holding data*/
+  for (reg = 0; reg <= LAST_ARG_REGNUM && i < 2; reg++)
+    {
+      if (  !df_regs_ever_live_p (reg)
+         || (cfun->machine->uses_anonymous_args && crtl->args.pretend_args_size
+                  > (LAST_ARG_REGNUM - reg) * UNITS_PER_WORD)
+         || (!cfun->machine->uses_anonymous_args && nregs < reg + 1)
+         )
+        {
+          workreg[i++] = gen_rtx_REG (SImode, reg);
+          n = (reg + 1) % 4;
+        }
+    }
+
+  /* otherwise try to use r4..r7*/
+  for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM && i < 2; reg++)
+    {
+      if (  df_regs_ever_live_p (reg)
+         && !fixed_regs[reg]
+         && reg != FP_REGNUM )
+        {
+          workreg[i++] = gen_rtx_REG (SImode, reg);
+        }
+    }
+
+  if (TARGET_32BIT)
+    {
+      /* ARM and Thumb-2 can use high regs.  */
+      for (reg = FIRST_HI_REGNUM; reg <= LAST_HI_REGNUM && i < 2; reg ++)
+        if (  df_regs_ever_live_p (reg)
+           && !fixed_regs[reg]
+           && reg != FP_REGNUM )
+          {
+            workreg[i++] = gen_rtx_REG (SImode, reg);
+          }
+    }
+
+  k = i;
+  /* if not enough found to be uses without extra push,
+   * collect next from r0..r4*/
+  for ( ; i<2; i++)
+    workreg[i] = gen_rtx_REG (SImode, n++);
+
+  /* only if k==0, two register will be pushed later
+   * only in this case the registers are guaranteed to be sorted */
+  return k;
+}
+
+/* push some registers to stack */
+static void
+emit_push_regs(int num_to_push, rtx *reg)
+{
+  int i;
+  rtvec tmpvec;
+  rtx par[16], dwarf, tmp, insn;
+
+  if (num_to_push > 15 || num_to_push < 0)
+    return;
+
+  tmpvec = gen_rtvec (1, reg[0]);
+  par[0] = gen_rtx_UNSPEC (BLKmode, tmpvec, UNSPEC_PUSH_MULT);
+  for (i=1; i<num_to_push; i++)
+    par[i] = gen_rtx_USE (VOIDmode, reg[i]);
+
+  tmp = plus_constant (stack_pointer_rtx, -4 * num_to_push);
+  dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
+  tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
+  tmp = gen_frame_mem (BLKmode, tmp);
+  par[0]= gen_rtx_SET (VOIDmode, tmp, par[0]);
+  tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (num_to_push, par));
+  insn = emit_insn (tmp);
+  RTX_FRAME_RELATED_P (insn) = 1;
+  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+  return;
+}
+
+/* pop some registers from stack */
+static void
+emit_pop_regs(const int num_to_pop, rtx *reg)
+{
+  int i;
+  rtvec tmpvec;
+  rtx par[16], dwarf, tmp, insn;
+
+  if (num_to_pop > 15 || num_to_pop < 0)
+    return;
+
+  tmpvec = gen_rtvec (1, reg[0]);
+  par[0] = gen_rtx_UNSPEC (BLKmode, tmpvec, UNSPEC_PUSH_MULT);
+  for (i=1; i<num_to_pop; i++)
+    par[i] = gen_rtx_USE (VOIDmode, reg[i]);
+  tmp = plus_constant (stack_pointer_rtx, 4 * num_to_pop);
+  dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
+  tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
+  tmp = gen_frame_mem (BLKmode, tmp);
+  par[0] = gen_rtx_SET (VOIDmode, tmp, par[0]);
+  tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (num_to_pop, par));
+  insn = emit_insn (tmp);
+  RTX_FRAME_RELATED_P (insn) = 1;
+  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+  return;
+}
+
+/*
+ * Emit RTL instructions for stack check at prologue stage.
+ * For Thumb this may look like this:
+ *   push {rsym,ramn}
+ *   ldr rsym, =symbol_addr_of(stack_limit_rtx)
+ *   ldr rsym, [rsym]
+ *   ldr ramn, =lenght_of(amount)
+ *   add rsym, rsym, ramn
+ *   cmp sp, rsym
+ *   bhs .LSPCHK0
+ *   push {lr}
+ *   bl __thumb_stack_failure
+ * .LSPCHK0:
+ *   pop {rsym,ramn}
+ */
+static void
+emit_stack_check_insns (HOST_WIDE_INT amount, int lr_not_yet_pushed)
+{
+  unsigned numregs;
+  unsigned amount_needsreg;
+  bool amount_const_ok, is_non_opt_thumb2, is_thumb2_hi_reg[2];
+  bool issym=false;
+  rtx reg[2], cmp_reg, amount_rtx;
+  rtx dwarf, tmp, insn;
+  rtx jump, label;
+
+  numregs = stack_check_work_registers(reg);
+
+  if (TARGET_THUMB1)
+    amount_const_ok = (amount < 256);
+  else
+    amount_const_ok = const_ok_for_arm (amount);
+
+  if (GET_CODE (stack_limit_rtx) == SYMBOL_REF) /*stack_limit_rtx*/
+    {
+      issym = true;
+      amount_needsreg = !amount_const_ok;
+    }
+  else
+    amount_needsreg = (amount != 0);
+
+  is_non_opt_thumb2 = (TARGET_THUMB2 && !(optimize_size || optimize >= 2));
+  is_thumb2_hi_reg[0] = (TARGET_THUMB2 && INTVAL(reg[0])>7);
+  is_thumb2_hi_reg[1] = (TARGET_THUMB2 && INTVAL(reg[1])>7);
+
+  /* push as many as needed */
+  if (issym && amount_needsreg) /*need two temp regs for limit and amount*/
+    {
+      if (numregs >= 2)
+        ; /*have 2 regs => no need to push*/
+      else if (numregs == 1)
+        {
+          /*have one reg but need two regs => push temp reg for amount*/
+    	  emit_push_regs (1, &reg[1]); /*push {reg1}*/
+          /*due to additional push try to correct amount*/
+          if (amount >= 4)
+            amount -= 4;
+        }
+      else
+        {
+          /*have no reg but need two => push temp regs for limit and amount*/
+    	  emit_push_regs (2, &reg[0]); /*push {reg0,reg1}*/
+          /*due to additional push try to correct amount*/
+          if (amount >= 8)
+            amount -= 8;
+        }
+    }
+  else if ((issym || amount_needsreg) && numregs == 0)
+    { /*push temp reg either for limit or amount*/
+      emit_push_regs (1, &reg[0]); /*push {reg0}*/
+      /*due to additional push try to correct amount*/
+      if (amount >= 4)
+        {
+          if (amount_const_ok)
+            {
+              if (TARGET_THUMB1 || const_ok_for_arm(amount - 4))
+                amount -= 4;
+              /*on Thumb2 or ARM may not corrected; shouldn't hurt*/
+            }
+          else /*will be loaded from pool*/
+            amount -= 4;
+        }
+    }
+
+  amount_rtx = GEN_INT (amount);
+
+  /* move limit plus amount to cmp_reg e.g. reg[0] */
+  if (issym)
+    {
+      if (is_non_opt_thumb2 || is_thumb2_hi_reg[0])
+   	    arm_emit_movpair(reg[0], stack_limit_rtx);
+      else
+        emit_move_insn(reg[0], stack_limit_rtx);
+
+      if (flag_stack_check == INDIRECT_STACK_CHECK)
+   	    emit_insn (gen_movsi (reg[0], gen_rtx_MEM (SImode, reg[0])));
+      if (amount)
+        {
+          if (amount_const_ok)
+            emit_insn(gen_addsi3(reg[0], reg[0], amount_rtx));
+          else
+            {
+              if (is_non_opt_thumb2 || is_thumb2_hi_reg[1])
+                arm_emit_movpair(reg[1], amount_rtx);
+              else
+                emit_insn (gen_movsi (reg[1], amount_rtx));
+              emit_insn(gen_addsi3(reg[0], reg[0], reg[1]));
+            }
+        }
+      cmp_reg = reg[0];
+    }
+  else if (amount)
+    {
+      if (amount_const_ok)
+        emit_move_insn(reg[0], amount_rtx);
+      else
+        {
+          if (is_non_opt_thumb2 || is_thumb2_hi_reg[0])
+            arm_emit_movpair(reg[0], amount_rtx);
+          else
+            emit_insn (gen_movsi (reg[0], amount_rtx));
+        }
+      emit_insn(gen_addsi3(reg[0], reg[0], stack_limit_rtx));
+      cmp_reg = reg[0];
+    }
+  else
+    cmp_reg = stack_limit_rtx;
+
+  /*compare and jump*/
+  emit_insn (gen_blockage ());
+  label = gen_label_rtx ();
+  do_compare_rtx_and_jump (stack_pointer_rtx, cmp_reg, GEU, 1, Pmode,
+		  NULL_RTX, NULL_RTX, label, -1);
+  jump = get_last_insn ();
+  gcc_assert (JUMP_P (jump));
+  JUMP_LABEL (jump) = label;
+  LABEL_NUSES (label)++;
+  if (lr_not_yet_pushed) /*push LR if not already done*/
+    {
+	  rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
+      emit_push_regs (1, &lr);
+    }
+  insn = emit_insn (gen_stack_failure ());
+  if (lr_not_yet_pushed)
+    {
+	  /*the trap will not come back; but tell it has restored the stack*/
+      tmp = plus_constant (stack_pointer_rtx, 4);
+      dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
+      RTX_FRAME_RELATED_P (insn) = 1;
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+    }
+  emit_label (label);
+
+  /*restore registers*/
+  if (issym && amount_needsreg) /*pop temp regs used by limit and amount*/
+    {
+      if (numregs >= 2)
+        ; /*no need to pop*/
+      else if (numregs == 1)
+        emit_pop_regs (1, &reg[1]); /*pop {reg1}*/
+      else
+        emit_pop_regs (2, &reg[0]); /*pop {reg0, reg1}*/
+    }
+  else if ((issym || amount_needsreg) && numregs == 0)
+    { /*pop temp reg used by limit or amount*/
+      emit_pop_regs (1, &reg[0]); /*pop {reg0}*/
+    }
+
+  return;
+}
+
 /* Generate the prologue instructions for entry into an ARM or Thumb-2
    function.  */
 void
@@ -16514,6 +16807,17 @@ arm_expand_prologue (void)
     current_function_static_stack_size
       = offsets->outgoing_args - offsets->saved_args;
 
+  if (  crtl->limit_stack
+     && !(IS_INTERRUPT (func_type))
+     && (  flag_stack_check == DIRECT_STACK_CHECK 
+        || flag_stack_check == INDIRECT_STACK_CHECK)
+     && (offsets->outgoing_args - offsets->saved_args) > 0
+     )
+    {
+	  emit_stack_check_insns (offsets->outgoing_args - saved_regs
+			- offsets->saved_args, !(live_regs_mask & (1<<LR_REGNUM)) );
+    }
+    
   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
     {
       /* This add can produce multiple insns for a large constant, so we
@@ -21722,6 +22026,16 @@ thumb1_expand_prologue (void)
 
   amount = offsets->outgoing_args - offsets->saved_regs;
   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
+  
+  if(  crtl->limit_stack
+    && (  flag_stack_check == DIRECT_STACK_CHECK
+       || flag_stack_check == INDIRECT_STACK_CHECK)
+    && (offsets->outgoing_args - offsets->saved_args)
+    )
+    {
+      emit_stack_check_insns (amount, !(l_mask & (1<<LR_REGNUM)));
+    }
+  
   if (amount)
     {
       if (amount < 512)
@@ -21881,6 +22195,7 @@ thumb1_output_interwork (void)
   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
 
   return "";
+    
 }
 
 /* Handle the case of a double word load into a low register from
Index: gcc/config/arm/arm.md
===================================================================
--- gcc/config/arm/arm.md	(revision 178508)
+++ gcc/config/arm/arm.md	(working copy)
@@ -112,6 +112,7 @@
   UNSPEC_SYMBOL_OFFSET  ; The offset of the start of the symbol from
                         ; another symbolic address.
   UNSPEC_MEMORY_BARRIER ; Represent a memory barrier.
+  UNSPEC_PROBE_STACK    ; probe stack memory reference
 ])
 
 ;; UNSPEC_VOLATILE Usage:
@@ -6682,8 +6683,8 @@
 (define_insn "cbranchsi4_insn"
   [(set (pc) (if_then_else
 	      (match_operator 0 "arm_comparison_operator"
-	       [(match_operand:SI 1 "s_register_operand" "l,l*h")
-	        (match_operand:SI 2 "thumb1_cmp_operand" "lI*h,*r")])
+	       [(match_operand:SI 1 "s_register_operand" "l,l*h,k")
+	        (match_operand:SI 2 "thumb1_cmp_operand" "lI*h,*r,r")])
 	      (label_ref (match_operand 3 "" ""))
 	      (pc)))]
   "TARGET_THUMB1"
@@ -7229,17 +7230,18 @@
 
 (define_insn "*arm_cmpsi_insn"
   [(set (reg:CC CC_REGNUM)
-	(compare:CC (match_operand:SI 0 "s_register_operand" "l,r,r,r")
-		    (match_operand:SI 1 "arm_add_operand"    "Py,r,rI,L")))]
+	(compare:CC (match_operand:SI 0 "s_register_operand" "l,r,k,r,r")
+		    (match_operand:SI 1 "arm_add_operand"    "Py,r,r,rI,L")))]
   "TARGET_32BIT"
   "@
    cmp%?\\t%0, %1
    cmp%?\\t%0, %1
    cmp%?\\t%0, %1
+   cmp%?\\t%0, %1
    cmn%?\\t%0, #%n1"
   [(set_attr "conds" "set")
-   (set_attr "arch" "t2,t2,any,any")
-   (set_attr "length" "2,2,4,4")]
+   (set_attr "arch" "t2,t2,any,any,any")
+   (set_attr "length" "2,2,4,4,4")]
 )
 
 (define_insn "*cmpsi_shiftsi"
@@ -10952,6 +10954,69 @@
 
 ;;
 
+(define_expand "probe_stack"
+  [(match_operand 0 "memory_operand" "")]
+  "TARGET_EITHER"
+{
+  if (  flag_stack_check == DIRECT_STACK_CHECK
+     || flag_stack_check == INDIRECT_STACK_CHECK)
+    ;
+  else
+    {
+      emit_move_insn (operands[0], const0_rtx);
+      emit_insn (gen_probe_stack_done ());
+      emit_insn (gen_blockage ());
+    }
+  DONE;
+}
+)
+
+(define_insn "probe_stack_done"
+  [(unspec_volatile [(const_int 0)] UNSPEC_PROBE_STACK)]
+  "TARGET_EITHER"
+  {return \"@ probe stack done\";}
+  [(set_attr "type" "store1")
+   (set_attr "length" "0")]
+)
+
+(define_expand "generic_limit_check_stack"
+  [(match_operand 0 "memory_operand" "")]
+  "crtl->limit_stack 
+  && flag_stack_check != DIRECT_STACK_CHECK 
+  && flag_stack_check != INDIRECT_STACK_CHECK"
+{
+  rtx label = gen_label_rtx ();
+  rtx addr = copy_rtx (operands[0]);
+  addr = gen_rtx_fmt_ee (MINUS, Pmode, addr, GEN_INT (0));
+  addr = force_operand (addr, NULL_RTX);
+  emit_insn (gen_blockage ());
+  emit_cmp_and_jump_insns (stack_limit_rtx, addr, LEU, NULL_RTX, Pmode, 1,
+                           label);
+  emit_insn (gen_stack_failure ());
+  emit_label (label);
+  emit_insn (gen_blockage ());
+  DONE;
+}
+)
+
+(define_insn "stack_failure"
+  [(trap_if (const_int 1) (const_int 0))
+   (clobber (reg:SI LR_REGNUM))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_EITHER"
+  "*
+  {
+    if (TARGET_ARM)
+      output_asm_insn (\"bl\\t__arm_stack_failure\\t%@ trap call\", operands);
+    else
+      output_asm_insn (\"bl\\t__thumb_stack_failure\\t%@ trap call\", operands);
+  }
+  return \"\";
+  "
+  [(set_attr "conds" "clob")
+    (set_attr "length" "8")]
+)
+
 ;; We only care about the lower 16 bits of the constant 
 ;; being inserted into the upper 16 bits of the register.
 (define_insn "*arm_movtas_ze" 

^ permalink raw reply	[flat|nested] 13+ messages in thread
* Ping: C-family stack check for threads
@ 2011-09-20 17:09 Thomas Klein
  2011-09-20 22:07 ` Joseph S. Myers
  0 siblings, 1 reply; 13+ messages in thread
From: Thomas Klein @ 2011-09-20 17:09 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 2429 bytes --]

ping

references
http://gcc.gnu.org/ml/gcc-patches/2011-09/msg00310.html
http://gcc.gnu.org/ml/gcc-patches/2011-08/msg00216.html
http://gcc.gnu.org/ml/gcc-patches/2011-07/msg00281.html
http://gcc.gnu.org/ml/gcc-patches/2011-07/msg00149.html
http://gcc.gnu.org/ml/gcc-patches/2011-06/msg01872.html
http://gcc.gnu.org/ml/gcc-patches/2011-03/msg01226.html

gcc/ChangeLog

2011-09-20  Thomas Klein <th.r.klein@web.de>
     * opts.c (common_handle_option): introduce new parameters "direct" and
     "indirect"
     * flag-types.h (enum stack_check_type): Likewise

     * explow.c (allocate_dynamic_stack_space):
     - suppress stack probing if parameter "direct", "indirect" or if a
     stack-limit is given
     - do additional read of limit value if parameter "indirect" and a
     stack-limit symbol is given
     - emit a call to a stack_failure function [as an alternative to a trap
     call]
     (function probe_stack_range): if allowed to override the range porbe
     emit generic_limit_check_stack

     * config/arm/arm.c
     (stack_check_work_registers): new function to find possible working
     registers [only used by "stack check"]
     (emit_push_regs): add push RTL instruction without keeping regnumber
     and frame memory in mind.
     (emit_pop_regs): add pop RTL instruction to revert the above push
     (emit_stack_check_insns): new function to write RTL instructions for
     stack check at prologue stage.
     (arm_expand_prologue): stack check integration for ARM and Thumb-2
     (thumb1_output_function_prologue): stack check integration for Thumb-1

     * config/arm/arm.md
     (cbranchsi4_insn): allow compare and branch using stack pointer
     register [at thumb mode]
     (arm_cmpsi_insn): allow comparing using stack pointer register [at 
arm]
     (probe_stack): do not emit code when parameters "direct" or "indirect"
     is given, emit move code way same as in gcc/explow.c [function
     emit_stack_probe]
     (probe_stack_done): dummy to make sure probe_stack insns are not
     optimized away
     (generic_limit_check_stack): if stack-limit and parameter "generic" is
     given use the limit the same way as in function
     allocate_dynamic_stack_space
     (stack_failure): failure call used in stack check functions
     emit_stack_check_insns, generic_limit_check_stack or
     allocate_dynamic_stack_space [similar to a trap but avoid conflict 
with
     builtin_trap]



[-- Attachment #2: stackCheck.diff --]
[-- Type: text/plain, Size: 18554 bytes --]

Index: gcc/opts.c
===================================================================
--- gcc/opts.c	(revision 179007)
+++ gcc/opts.c	(working copy)
@@ -1644,6 +1644,12 @@ common_handle_option (struct gcc_options *opts,
 			   : STACK_CHECK_STATIC_BUILTIN
 			     ? STATIC_BUILTIN_STACK_CHECK
 			     : GENERIC_STACK_CHECK;
+      else if (!strcmp (arg, "indirect"))
+	/* This is an other stack checking method.  */
+	opts->x_flag_stack_check = INDIRECT_STACK_CHECK;
+      else if (!strcmp (arg, "direct"))
+	/* This is an other stack checking method.  */
+	opts->x_flag_stack_check = DIRECT_STACK_CHECK;
       else
 	warning_at (loc, 0, "unknown stack check parameter \"%s\"", arg);
       break;
Index: gcc/flag-types.h
===================================================================
--- gcc/flag-types.h	(revision 179007)
+++ gcc/flag-types.h	(working copy)
@@ -153,7 +153,15 @@ enum stack_check_type
 
   /* Check the stack and entirely rely on the target configuration
      files, i.e. do not use the generic mechanism at all.  */
-  FULL_BUILTIN_STACK_CHECK
+  FULL_BUILTIN_STACK_CHECK,
+
+  /* Check the stack (if possible) before allocation of local variables at
+     each function entry. The stack limit is directly given e.g. by address
+     of a symbol */
+  DIRECT_STACK_CHECK,
+  /* Check the stack (if possible) before allocation of local variables at
+     each function entry. The stack limit is given by global variable. */
+  INDIRECT_STACK_CHECK
 };
 
 /* Names for the different levels of -Wstrict-overflow=N.  The numeric
Index: gcc/explow.c
===================================================================
--- gcc/explow.c	(revision 179007)
+++ gcc/explow.c	(working copy)
@@ -1386,7 +1386,12 @@ allocate_dynamic_stack_space (rtx size, unsigned s
 
   /* If needed, check that we have the required amount of stack.  Take into
      account what has already been checked.  */
-  if (STACK_CHECK_MOVING_SP)
+  if (  STACK_CHECK_MOVING_SP 
+#ifdef HAVE_generic_limit_check_stack    
+     || crtl->limit_stack
+#endif
+     || flag_stack_check == DIRECT_STACK_CHECK
+     || flag_stack_check == INDIRECT_STACK_CHECK)
     ;
   else if (flag_stack_check == GENERIC_STACK_CHECK)
     probe_stack_range (STACK_OLD_CHECK_PROTECT + STACK_CHECK_MAX_FRAME_SIZE,
@@ -1423,19 +1428,32 @@ allocate_dynamic_stack_space (rtx size, unsigned s
       /* Check stack bounds if necessary.  */
       if (crtl->limit_stack)
 	{
+          rtx limit_rtx;
 	  rtx available;
 	  rtx space_available = gen_label_rtx ();
+          if (  GET_CODE (stack_limit_rtx) == SYMBOL_REF
+             && flag_stack_check == INDIRECT_STACK_CHECK)
+            limit_rtx = expand_unop (Pmode, mov_optab,
+				    gen_rtx_MEM (Pmode, stack_limit_rtx),
+				    NULL_RTX, 1);
+          else
+            limit_rtx = stack_limit_rtx;
 #ifdef STACK_GROWS_DOWNWARD
 	  available = expand_binop (Pmode, sub_optab,
-				    stack_pointer_rtx, stack_limit_rtx,
+				    stack_pointer_rtx, limit_rtx,
 				    NULL_RTX, 1, OPTAB_WIDEN);
 #else
 	  available = expand_binop (Pmode, sub_optab,
-				    stack_limit_rtx, stack_pointer_rtx,
+				    limit_rtx, stack_pointer_rtx,
 				    NULL_RTX, 1, OPTAB_WIDEN);
 #endif
 	  emit_cmp_and_jump_insns (available, size, GEU, NULL_RTX, Pmode, 1,
 				   space_available);
+#ifdef HAVE_stack_failure
+	  if (HAVE_stack_failure)
+	    emit_insn (gen_stack_failure ());
+	  else
+#endif
 #ifdef HAVE_trap
 	  if (HAVE_trap)
 	    emit_insn (gen_trap ());
@@ -1582,6 +1600,13 @@ probe_stack_range (HOST_WIDE_INT first, rtx size)
 	return;
     }
 #endif
+#ifdef HAVE_generic_limit_check_stack
+  else if (HAVE_generic_limit_check_stack)
+    {
+      rtx addr = memory_address (Pmode,stack_pointer_rtx);
+      emit_insn (gen_generic_limit_check_stack (addr));
+    }
+#endif
 
   /* Otherwise we have to generate explicit probes.  If we have a constant
      small number of them to generate, that's the easy case.  */
Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c	(revision 179007)
+++ gcc/config/arm/arm.c	(working copy)
@@ -16285,6 +16285,307 @@ thumb_set_frame_pointer (arm_stack_offsets *offset
   RTX_FRAME_RELATED_P (insn) = 1;
 }
 
+/*search for possible work registers for stack-check operation at prologue
+ return the number of register that can be used without extra push/pop */
+
+static int
+stack_check_work_registers (rtx *workreg)
+{
+  int reg, i, k, n, nregs;
+  
+  if (crtl->args.info.pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+    {
+      nregs = crtl->args.info.aapcs_next_ncrn;
+    }
+  else
+    {
+      nregs = crtl->args.info.nregs;
+    }
+
+
+  n = 0;
+  i = 0;
+  /* check if we can use one of the argument registers r0..r3 as long as they
+   * not holding data*/
+  for (reg = 0; reg <= LAST_ARG_REGNUM && i < 2; reg++)
+    {
+      if (  !df_regs_ever_live_p (reg)
+         || (cfun->machine->uses_anonymous_args && crtl->args.pretend_args_size
+                  > (LAST_ARG_REGNUM - reg) * UNITS_PER_WORD)
+         || (!cfun->machine->uses_anonymous_args && nregs < reg + 1)
+         )
+        {
+          workreg[i++] = gen_rtx_REG (SImode, reg);
+          /*if only one register can be used without push
+           *keep the next register in mind (either r1,r2,r3 or r0) that 
+           *might be pushed later*/
+          if (i<2)
+            {
+              n = (reg + 1) % NUM_ARG_REGS;
+            }
+        }
+    }
+
+  /* otherwise try to use r4..r7*/
+  for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM && i < 2; reg++)
+    {
+      if (  df_regs_ever_live_p (reg)
+         && !fixed_regs[reg]
+         && reg != FP_REGNUM )
+        {
+          workreg[i++] = gen_rtx_REG (SImode, reg);
+        }
+    }
+
+  if (TARGET_32BIT)
+    {
+      /* ARM and Thumb-2 can use high regs.  */
+      for (reg = FIRST_HI_REGNUM; reg <= LAST_HI_REGNUM && i < 2; reg ++)
+        if (  df_regs_ever_live_p (reg)
+           && !fixed_regs[reg]
+           && reg != FP_REGNUM )
+          {
+            workreg[i++] = gen_rtx_REG (SImode, reg);
+          }
+    }
+
+  k = i;
+  /* if not enough found to be uses without extra push,
+   * collect next from r0..r3*/
+  for ( ; i<2; i++)
+    workreg[i] = gen_rtx_REG (SImode, n++);
+
+  /* only if k==0, two register will be pushed later
+   * only in this case the registers are guaranteed to be sorted */
+  return k;
+}
+
+/* push some registers to stack */
+static void
+emit_push_regs(int num_to_push, rtx *reg)
+{
+  int i;
+  rtvec tmpvec;
+  rtx par[16], dwarf, tmp, insn;
+
+  if (num_to_push > 15 || num_to_push < 0)
+    return;
+
+  tmpvec = gen_rtvec (1, reg[0]);
+  par[0] = gen_rtx_UNSPEC (BLKmode, tmpvec, UNSPEC_PUSH_MULT);
+  for (i=1; i<num_to_push; i++)
+    par[i] = gen_rtx_USE (VOIDmode, reg[i]);
+
+  tmp = plus_constant (stack_pointer_rtx, -4 * num_to_push);
+  dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
+  tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
+  tmp = gen_frame_mem (BLKmode, tmp);
+  par[0]= gen_rtx_SET (VOIDmode, tmp, par[0]);
+  tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (num_to_push, par));
+  insn = emit_insn (tmp);
+  RTX_FRAME_RELATED_P (insn) = 1;
+  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+  return;
+}
+
+/* pop some registers from stack */
+static void
+emit_pop_regs(const int num_to_pop, rtx *reg)
+{
+  int i;
+  rtvec tmpvec;
+  rtx par[16], dwarf, tmp, insn;
+
+  if (num_to_pop > 15 || num_to_pop < 0)
+    return;
+
+  tmpvec = gen_rtvec (1, reg[0]);
+  par[0] = gen_rtx_UNSPEC (BLKmode, tmpvec, UNSPEC_PUSH_MULT);
+  for (i=1; i<num_to_pop; i++)
+    par[i] = gen_rtx_USE (VOIDmode, reg[i]);
+  tmp = plus_constant (stack_pointer_rtx, 4 * num_to_pop);
+  dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
+  tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
+  tmp = gen_frame_mem (BLKmode, tmp);
+  par[0] = gen_rtx_SET (VOIDmode, tmp, par[0]);
+  tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (num_to_pop, par));
+  insn = emit_insn (tmp);
+  RTX_FRAME_RELATED_P (insn) = 1;
+  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+  return;
+}
+
+/*
+ * Emit RTL instructions for stack check at prologue stage.
+ * For Thumb this may look like this:
+ *   push {rsym,ramn}
+ *   ldr rsym, =symbol_addr_of(stack_limit_rtx)
+ *   ldr rsym, [rsym]
+ *   ldr ramn, =lenght_of(amount)
+ *   add rsym, rsym, ramn
+ *   cmp sp, rsym
+ *   bhs .LSPCHK0
+ *   push {lr}
+ *   bl __thumb_stack_failure
+ * .LSPCHK0:
+ *   pop {rsym,ramn}
+ */
+static void
+emit_stack_check_insns (HOST_WIDE_INT amount, int lr_not_yet_pushed)
+{
+  unsigned numregs;
+  unsigned amount_needsreg;
+  bool amount_const_ok, is_non_opt_thumb2, is_thumb2_hi_reg[2];
+  bool issym=false;
+  rtx reg[2], cmp_reg, amount_rtx;
+  rtx dwarf, tmp, insn;
+  rtx jump, label;
+
+  numregs = stack_check_work_registers(reg);
+
+  if (TARGET_THUMB1)
+    amount_const_ok = (amount < 256);
+  else
+    amount_const_ok = const_ok_for_arm (amount);
+
+  if (GET_CODE (stack_limit_rtx) == SYMBOL_REF) /*stack_limit_rtx*/
+    {
+      issym = true;
+      amount_needsreg = !amount_const_ok;
+    }
+  else
+    amount_needsreg = (amount != 0);
+
+  is_non_opt_thumb2 = (TARGET_THUMB2 && !(optimize_size || optimize >= 2));
+  is_thumb2_hi_reg[0] = (TARGET_THUMB2 && INTVAL(reg[0])>7);
+  is_thumb2_hi_reg[1] = (TARGET_THUMB2 && INTVAL(reg[1])>7);
+
+  /* push as many as needed */
+  if (issym && amount_needsreg) /*need two temp regs for limit and amount*/
+    {
+      if (numregs >= 2)
+        ; /*have 2 regs => no need to push*/
+      else if (numregs == 1)
+        {
+          /*have one reg but need two regs => push temp reg for amount*/
+    	  emit_push_regs (1, &reg[1]); /*push {reg1}*/
+          /*due to additional push try to correct amount*/
+          if (amount >= 4)
+            amount -= 4;
+        }
+      else
+        {
+          /*have no reg but need two => push temp regs for limit and amount*/
+    	  emit_push_regs (2, &reg[0]); /*push {reg0,reg1}*/
+          /*due to additional push try to correct amount*/
+          if (amount >= 8)
+            amount -= 8;
+        }
+    }
+  else if ((issym || amount_needsreg) && numregs == 0)
+    { /*push temp reg either for limit or amount*/
+      emit_push_regs (1, &reg[0]); /*push {reg0}*/
+      /*due to additional push try to correct amount*/
+      if (amount >= 4)
+        {
+          if (amount_const_ok)
+            {
+              if (TARGET_THUMB1 || const_ok_for_arm(amount - 4))
+                amount -= 4;
+              /*on Thumb2 or ARM may not corrected; shouldn't hurt*/
+            }
+          else /*will be loaded from pool*/
+            amount -= 4;
+        }
+    }
+
+  amount_rtx = GEN_INT (amount);
+
+  /* move limit plus amount to cmp_reg e.g. reg[0] */
+  if (issym)
+    {
+      if (is_non_opt_thumb2 || is_thumb2_hi_reg[0])
+        arm_emit_movpair(reg[0], stack_limit_rtx);
+      else
+        emit_move_insn(reg[0], stack_limit_rtx);
+
+      if (flag_stack_check == INDIRECT_STACK_CHECK)
+        emit_insn (gen_movsi (reg[0], gen_rtx_MEM (SImode, reg[0])));
+      if (amount)
+        {
+          if (amount_const_ok)
+            emit_insn(gen_addsi3(reg[0], reg[0], amount_rtx));
+          else
+            {
+              if (is_non_opt_thumb2 || is_thumb2_hi_reg[1])
+                arm_emit_movpair(reg[1], amount_rtx);
+              else
+                emit_insn (gen_movsi (reg[1], amount_rtx));
+              emit_insn(gen_addsi3(reg[0], reg[0], reg[1]));
+            }
+        }
+      cmp_reg = reg[0];
+    }
+  else if (amount)
+    {
+      if (amount_const_ok)
+        emit_move_insn(reg[0], amount_rtx);
+      else
+        {
+          if (is_non_opt_thumb2 || is_thumb2_hi_reg[0])
+            arm_emit_movpair(reg[0], amount_rtx);
+          else
+            emit_insn (gen_movsi (reg[0], amount_rtx));
+        }
+      emit_insn(gen_addsi3(reg[0], reg[0], stack_limit_rtx));
+      cmp_reg = reg[0];
+    }
+  else
+    cmp_reg = stack_limit_rtx;
+
+  /*compare and jump*/
+  emit_insn (gen_blockage ());
+  label = gen_label_rtx ();
+  do_compare_rtx_and_jump (stack_pointer_rtx, cmp_reg, GEU, 1, Pmode,
+		  NULL_RTX, NULL_RTX, label, -1);
+  jump = get_last_insn ();
+  gcc_assert (JUMP_P (jump));
+  JUMP_LABEL (jump) = label;
+  LABEL_NUSES (label)++;
+  if (lr_not_yet_pushed) /*push LR if not already done*/
+    {
+      rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
+      emit_push_regs (1, &lr);
+    }
+  insn = emit_insn (gen_stack_failure ());
+  if (lr_not_yet_pushed)
+    {
+      /*the trap will not come back; but tell it has restored the stack*/
+      tmp = plus_constant (stack_pointer_rtx, 4);
+      dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
+      RTX_FRAME_RELATED_P (insn) = 1;
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+    }
+  emit_label (label);
+
+  /*restore registers*/
+  if (issym && amount_needsreg) /*pop temp regs used by limit and amount*/
+    {
+      if (numregs >= 2)
+        ; /*no need to pop*/
+      else if (numregs == 1)
+        emit_pop_regs (1, &reg[1]); /*pop {reg1}*/
+      else
+        emit_pop_regs (2, &reg[0]); /*pop {reg0, reg1}*/
+    }
+  else if ((issym || amount_needsreg) && numregs == 0)
+    { /*pop temp reg used by limit or amount*/
+      emit_pop_regs (1, &reg[0]); /*pop {reg0}*/
+    }
+
+  return;
+}
+
 /* Generate the prologue instructions for entry into an ARM or Thumb-2
    function.  */
 void
@@ -16536,6 +16837,17 @@ arm_expand_prologue (void)
     current_function_static_stack_size
       = offsets->outgoing_args - offsets->saved_args;
 
+  if (  crtl->limit_stack
+     && !(IS_INTERRUPT (func_type))
+     && (  flag_stack_check == DIRECT_STACK_CHECK 
+        || flag_stack_check == INDIRECT_STACK_CHECK)
+     && (offsets->outgoing_args - offsets->saved_args) > 0
+     )
+    {
+      emit_stack_check_insns (offsets->outgoing_args - saved_regs
+                  - offsets->saved_args, !(live_regs_mask & (1<<LR_REGNUM)) );
+    }
+    
   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
     {
       /* This add can produce multiple insns for a large constant, so we
@@ -21753,6 +22065,16 @@ thumb1_expand_prologue (void)
 
   amount = offsets->outgoing_args - offsets->saved_regs;
   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
+  
+  if(  crtl->limit_stack
+    && (  flag_stack_check == DIRECT_STACK_CHECK
+       || flag_stack_check == INDIRECT_STACK_CHECK)
+    && (offsets->outgoing_args - offsets->saved_args)
+    )
+    {
+      emit_stack_check_insns (amount, !(l_mask & (1<<LR_REGNUM)));
+    }
+  
   if (amount)
     {
       if (amount < 512)
@@ -21912,6 +22234,7 @@ thumb1_output_interwork (void)
   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
 
   return "";
+    
 }
 
 /* Handle the case of a double word load into a low register from
Index: gcc/config/arm/arm.md
===================================================================
--- gcc/config/arm/arm.md	(revision 179007)
+++ gcc/config/arm/arm.md	(working copy)
@@ -112,6 +112,7 @@
   UNSPEC_SYMBOL_OFFSET  ; The offset of the start of the symbol from
                         ; another symbolic address.
   UNSPEC_MEMORY_BARRIER ; Represent a memory barrier.
+  UNSPEC_PROBE_STACK    ; probe stack memory reference
   UNSPEC_UNALIGNED_LOAD	; Used to represent ldr/ldrh instructions that access
 			; unaligned locations, on architectures which support
 			; that.
@@ -6908,8 +6909,8 @@
 (define_insn "cbranchsi4_insn"
   [(set (pc) (if_then_else
 	      (match_operator 0 "arm_comparison_operator"
-	       [(match_operand:SI 1 "s_register_operand" "l,l*h")
-	        (match_operand:SI 2 "thumb1_cmp_operand" "lI*h,*r")])
+	       [(match_operand:SI 1 "s_register_operand" "l,l*h,k")
+	        (match_operand:SI 2 "thumb1_cmp_operand" "lI*h,*r,r")])
 	      (label_ref (match_operand 3 "" ""))
 	      (pc)))]
   "TARGET_THUMB1"
@@ -7455,17 +7456,18 @@
 
 (define_insn "*arm_cmpsi_insn"
   [(set (reg:CC CC_REGNUM)
-	(compare:CC (match_operand:SI 0 "s_register_operand" "l,r,r,r")
-		    (match_operand:SI 1 "arm_add_operand"    "Py,r,rI,L")))]
+	(compare:CC (match_operand:SI 0 "s_register_operand" "l,r,k,r,r")
+		    (match_operand:SI 1 "arm_add_operand"    "Py,r,r,rI,L")))]
   "TARGET_32BIT"
   "@
    cmp%?\\t%0, %1
    cmp%?\\t%0, %1
    cmp%?\\t%0, %1
+   cmp%?\\t%0, %1
    cmn%?\\t%0, #%n1"
   [(set_attr "conds" "set")
-   (set_attr "arch" "t2,t2,any,any")
-   (set_attr "length" "2,2,4,4")]
+   (set_attr "arch" "t2,t2,any,any,any")
+   (set_attr "length" "2,2,4,4,4")]
 )
 
 (define_insn "*cmpsi_shiftsi"
@@ -11180,6 +11182,69 @@
 
 ;;
 
+(define_expand "probe_stack"
+  [(match_operand 0 "memory_operand" "")]
+  "TARGET_EITHER"
+{
+  if (  flag_stack_check == DIRECT_STACK_CHECK
+     || flag_stack_check == INDIRECT_STACK_CHECK)
+    ;
+  else
+    {
+      emit_move_insn (operands[0], const0_rtx);
+      emit_insn (gen_probe_stack_done ());
+      emit_insn (gen_blockage ());
+    }
+  DONE;
+}
+)
+
+(define_insn "probe_stack_done"
+  [(unspec_volatile [(const_int 0)] UNSPEC_PROBE_STACK)]
+  "TARGET_EITHER"
+  {return \"@ probe stack done\";}
+  [(set_attr "type" "store1")
+   (set_attr "length" "0")]
+)
+
+(define_expand "generic_limit_check_stack"
+  [(match_operand 0 "memory_operand" "")]
+  "crtl->limit_stack 
+  && flag_stack_check != DIRECT_STACK_CHECK 
+  && flag_stack_check != INDIRECT_STACK_CHECK"
+{
+  rtx label = gen_label_rtx ();
+  rtx addr = copy_rtx (operands[0]);
+  addr = gen_rtx_fmt_ee (MINUS, Pmode, addr, GEN_INT (0));
+  addr = force_operand (addr, NULL_RTX);
+  emit_insn (gen_blockage ());
+  emit_cmp_and_jump_insns (stack_limit_rtx, addr, LEU, NULL_RTX, Pmode, 1,
+                           label);
+  emit_insn (gen_stack_failure ());
+  emit_label (label);
+  emit_insn (gen_blockage ());
+  DONE;
+}
+)
+
+(define_insn "stack_failure"
+  [(trap_if (const_int 1) (const_int 0))
+   (clobber (reg:SI LR_REGNUM))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_EITHER"
+  "*
+  {
+    if (TARGET_ARM)
+      output_asm_insn (\"bl\\t__arm_stack_failure\\t%@ trap call\", operands);
+    else
+      output_asm_insn (\"bl\\t__thumb_stack_failure\\t%@ trap call\", operands);
+  }
+  return \"\";
+  "
+  [(set_attr "conds" "clob")
+    (set_attr "length" "8")]
+)
+
 ;; We only care about the lower 16 bits of the constant 
 ;; being inserted into the upper 16 bits of the register.
 (define_insn "*arm_movtas_ze" 

^ permalink raw reply	[flat|nested] 13+ messages in thread
* Re: Ping: C-family stack check for threads
@ 2011-07-04 20:28 Thomas Klein
  2011-07-05 16:11 ` Richard Henderson
  0 siblings, 1 reply; 13+ messages in thread
From: Thomas Klein @ 2011-07-04 20:28 UTC (permalink / raw)
  To: rth, gcc-patches

Richard Henderson wrote:
>  On 07/03/2011 08:06 AM, Thomas Klein wrote:
>  >  +/*
>  >  + * Write prolouge part of stack check into asm file.
>  >  + * For Thumb this may look like this:
>  >  + *   push {rsym,ramn}
>  >  + *   ldr rsym, .LSPCHK0
>  >  + *   ldr rsym, [rsym]
>  >  + *   ldr ramn, .LSPCHK0 + 4
>  >  + *   add rsym, rsym, ramn
>  >  + *   cmp sp, rsym
>  >  + *   bhs .LSPCHK1
>  >  + *   push {lr}
>  >  + *   bl __thumb_stack_failure
>  >  + * .align 2
>  >  + * .LSPCHK0:
>  >  + *   .word symbol_addr_of(stack_limit_rtx)
>  >  + *   .word lenght_of(amount)
>  >   + * .LSPCHK1:
>  >  + *   pop {rsym,ramn}
>  >  + */
>  >  +void
>  >  +stack_check_output_function (FILE *f, int reg0, int reg1, unsigned amount,
>  >  +                             unsigned numregs)
>  >  +{
>
>  Is there an exceedingly good reason you're emitting this much code
>  as text, rather than as rtl?

To me, the stack check is one coherent operation.
This is placed after an initial push, which can't be eliminated, but before a major stack adjustment.

I have, had some problems with rtl at prologue stage.
Is there a way to encapsulate a rtl sequence within prologue.
There is a emit_multi_reg_push but is there something like emit_multi_reg_pop, too.
Are the other operations (compare, branche, ..) still allowed?

>  In particular, you adjust the stack but not the unwind info.  So
>  if one puts a breakpoint at your __thumb_stack_failure function,
>  the unwind information will be incorrect.

Yes, if the failure function is taken the info will be wrong.
If this is a major problem do I have to add this info after any push and pop operation?
Will the rtl push/pop do this already for me?

Regards
  Thomas Klein


^ permalink raw reply	[flat|nested] 13+ messages in thread
* Re: Ping: C-family stack check for threads
@ 2011-07-03 13:09 Thomas Klein
  2011-07-03 17:56 ` Richard Henderson
  2011-07-13 12:57 ` Hans-Peter Nilsson
  0 siblings, 2 replies; 13+ messages in thread
From: Thomas Klein @ 2011-07-03 13:09 UTC (permalink / raw)
  To: gcc-patches, joey.ye.cc

Ye Joey wrote:
>  Thomas,
>
>  I think your are working on a very useful feature. I have ARM MCU
>  applications running of out stack space and resulting strange
>  behaviors silently. I'd like to try your patch and probably give
>  further comments
>
>  - Joey

Hi
Due to convention of of thumb prologue to rtl, this patch needs to be modified too.

Regards
   Thomas Klein

gcc/ChangeLog
2011-07-03  Thomas Klein<th.r.klein@web.de>  <mailto:th.r.klein@web.de>

     * opts.c (common_handle_option): introduce additional stack checking
     parameters "direct" and "indirect"
     * flag-types.h (enum stack_check_type): Likewise
     * explow.c (allocate_dynamic_stack_space):
     - suppress stack probing if parameter "direct", "indirect" or if a
     stack-limit is given
     - do additional read of limit value if parameter "indirect" and a
     stack-limit symbol is given
     - emit a call to a stack_failure function [as an alternative to a trap
     call]
     (function probe_stack_range): if allowed to override the range probe
     emit generic_limit_check_stack
     * config/arm/arm.c (stack_check_output_function): new function to write
     the stack check code sequence to the assember file (inside prologue)
     (stack_check_work_registers): new function to find possible working
     registers [only used by "stack check"]
     (arm_expand_prologue): stack check integration for ARM and Thumb-2
     (thumb1_expand_prologue): stack check integration for Thumb-1
     * config/arm/arm.md (probe_stack): do not emit code when parameters
     "direct" or "indirect" given, emit move code as in gcc/explow.c
     [function emit_stack_probe]
     (probe_stack_done): dummy to make sure probe_stack insns are not
     optimized away
     (generic_limit_check_stack): if stack-limit and parameter "generic" is
     given use the limit the same way as in function
     allocate_dynamic_stack_space
     (stack_check): ARM/Thumb-2/Thumb-1 insn to output function
     stack_check_output_function
     (stack_failure): failure call used in function
     allocate_dynamic_stack_space [similar to a trap but avoid conflict with
     builtin_trap]

Index: gcc/flag-types.h
===================================================================
--- gcc/flag-types.h	(revision 175786)
+++ gcc/flag-types.h	(working copy)
@@ -153,7 +153,15 @@ enum stack_check_type

    /* Check the stack and entirely rely on the target configuration
       files, i.e. do not use the generic mechanism at all.  */
-  FULL_BUILTIN_STACK_CHECK
+  FULL_BUILTIN_STACK_CHECK,
+
+  /* Check the stack (if possible) before allocation of local variables at
+     each function entry. The stack limit is directly given e.g. by address
+     of a symbol */
+  DIRECT_STACK_CHECK,
+  /* Check the stack (if possible) before allocation of local variables at
+     each function entry. The stack limit is given by global variable. */
+  INDIRECT_STACK_CHECK
  };

  /* Names for the different levels of -Wstrict-overflow=N.  The numeric
Index: gcc/explow.c
===================================================================
--- gcc/explow.c	(revision 175786)
+++ gcc/explow.c	(working copy)
@@ -1358,7 +1358,12 @@ allocate_dynamic_stack_space (rtx size, unsigned s

    /* If needed, check that we have the required amount of stack.  Take into
       account what has already been checked.  */
-  if (STACK_CHECK_MOVING_SP)
+  if (  STACK_CHECK_MOVING_SP
+#ifdef HAVE_generic_limit_check_stack
+     || crtl->limit_stack
+#endif
+     || flag_stack_check == DIRECT_STACK_CHECK
+     || flag_stack_check == INDIRECT_STACK_CHECK)
      ;
    else if (flag_stack_check == GENERIC_STACK_CHECK)
      probe_stack_range (STACK_OLD_CHECK_PROTECT + STACK_CHECK_MAX_FRAME_SIZE,
@@ -1392,19 +1397,32 @@ allocate_dynamic_stack_space (rtx size, unsigned s
        /* Check stack bounds if necessary.  */
        if (crtl->limit_stack)
  	{
+          rtx limit_rtx;
  	  rtx available;
  	  rtx space_available = gen_label_rtx ();
+          if (  GET_CODE (stack_limit_rtx) == SYMBOL_REF
+&&  flag_stack_check == INDIRECT_STACK_CHECK)
+            limit_rtx = expand_unop (Pmode, mov_optab,
+				    gen_rtx_MEM (Pmode, stack_limit_rtx),
+				    NULL_RTX, 1);
+          else
+            limit_rtx = stack_limit_rtx;
  #ifdef STACK_GROWS_DOWNWARD
  	  available = expand_binop (Pmode, sub_optab,
-				    stack_pointer_rtx, stack_limit_rtx,
+				    stack_pointer_rtx, limit_rtx,
  				    NULL_RTX, 1, OPTAB_WIDEN);
  #else
  	  available = expand_binop (Pmode, sub_optab,
-				    stack_limit_rtx, stack_pointer_rtx,
+				    limit_rtx, stack_pointer_rtx,
  				    NULL_RTX, 1, OPTAB_WIDEN);
  #endif
  	  emit_cmp_and_jump_insns (available, size, GEU, NULL_RTX, Pmode, 1,
  				   space_available);
+#ifdef HAVE_stack_failure
+	  if (HAVE_stack_failure)
+	    emit_insn (gen_stack_failure ());
+	  else
+#endif
  #ifdef HAVE_trap
  	  if (HAVE_trap)
  	    emit_insn (gen_trap ());
@@ -1547,6 +1565,13 @@ probe_stack_range (HOST_WIDE_INT first, rtx size)
  	return;
      }
  #endif
+#ifdef HAVE_generic_limit_check_stack
+  else if (HAVE_generic_limit_check_stack)
+    {
+      rtx addr = memory_address (Pmode,stack_pointer_rtx);
+      emit_insn (gen_generic_limit_check_stack (addr));
+    }
+#endif

    /* Otherwise we have to generate explicit probes.  If we have a constant
       small number of them to generate, that's the easy case.  */
Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c	(revision 175786)
+++ gcc/config/arm/arm.c	(working copy)
@@ -14625,6 +14625,283 @@ arm_output_function_prologue (FILE *f, HOST_WIDE_I

  }

+/*
+ * Write prolouge part of stack check into asm file.
+ * For Thumb this may look like this:
+ *   push {rsym,ramn}
+ *   ldr rsym, .LSPCHK0
+ *   ldr rsym, [rsym]
+ *   ldr ramn, .LSPCHK0 + 4
+ *   add rsym, rsym, ramn
+ *   cmp sp, rsym
+ *   bhs .LSPCHK1
+ *   push {lr}
+ *   bl __thumb_stack_failure
+ * .align 2
+ * .LSPCHK0:
+ *   .word symbol_addr_of(stack_limit_rtx)
+ *   .word lenght_of(amount)
+ * .LSPCHK1:
+ *   pop {rsym,ramn}
+ */
+void
+stack_check_output_function (FILE *f, int reg0, int reg1, unsigned amount,
+                             unsigned numregs)
+{
+  unsigned amount_needsreg;
+  bool amount_const_ok, is_non_opt_thumb2, is_thumb2_hi_reg[2];
+  bool issym=false;
+  static unsigned spchk_labelno = 0;
+  char ok_lable_str[256];
+  char pool_lable_str[256];
+
+  if (TARGET_THUMB1)
+    amount_const_ok = (amount<  256);
+  else
+    amount_const_ok = const_ok_for_arm (amount);
+
+  if (GET_CODE (stack_limit_rtx) == SYMBOL_REF) /*stack_limit_rtx*/
+    {
+      issym = true;
+      amount_needsreg = !amount_const_ok;
+    }
+  else
+    amount_needsreg = (amount>  0);
+	
+  is_non_opt_thumb2 = (TARGET_THUMB2&&  !(optimize_size || optimize>= 2));
+  is_thumb2_hi_reg[0] = (TARGET_THUMB2&&  reg0>7);
+  is_thumb2_hi_reg[1] = (TARGET_THUMB2&&  reg1>7);
+	
+  /*build labels for later use*/
+  if ( (issym&&  !(is_non_opt_thumb2 || is_thumb2_hi_reg[0]))
+     ||(amount&&  !amount_const_ok
+&&  !((issym&&  is_thumb2_hi_reg[1])
+	     || (!issym&&  is_thumb2_hi_reg[0])
+         || is_non_opt_thumb2)))
+    ASM_GENERATE_INTERNAL_LABEL (pool_lable_str, "LSPCHK", spchk_labelno++);
+  ASM_GENERATE_INTERNAL_LABEL (ok_lable_str, "LSPCHK", spchk_labelno++);
+
+  if (issym&&  amount) /*need temp regs for limit and amount*/
+    {
+      if (numregs>= 2)
+        ; /*have 2 regs =>  no need to push*/
+      else if (numregs == 1)
+        {
+          if (amount_needsreg)
+            {
+              /*have one reg but need two regs =>  push temp reg for amount*/
+              if (TARGET_ARM)
+                asm_fprintf (f, "\tstr\t%r, [%r, #-4]!\n", reg1, SP_REGNUM);
+              else
+                asm_fprintf (f, "\tpush\t{%r}\n", reg1);
+	      /*due to additional push try to correct amount*/
+	      if (amount>= 4)
+	        {
+		  if (amount_const_ok)
+		    {
+		      if (TARGET_THUMB1 || const_ok_for_arm(amount - 4))
+		        amount -= 4;
+		      /*on Thumb2 or ARM may not corrected; shouldn't hurt*/
+		    }
+		  else /*will be loaded from pool*/
+		    amount -= 4;
+	        }
+            }
+        }
+      else if (amount_needsreg)
+        {
+          /*have no reg but need two =>  push temp regs for limit and amount*/
+          if (TARGET_ARM)
+            asm_fprintf (f, "\tstmfd\t%r!, {%r,%r}\n", SP_REGNUM, reg0, reg1);
+          else
+            asm_fprintf (f, "\tpush\t{%r,%r}\n", reg0, reg1);
+          /*due to additional push try to correct amount*/
+          if (amount>= 8)
+            {
+              if (amount_const_ok)
+                {
+                  if (TARGET_THUMB1 || const_ok_for_arm(amount - 8))
+                    amount -= 8;
+                  /*on Thumb2 or ARM may not corrected; shouldn't hurt*/
+                }
+              else /*will be loaded from pool*/
+                amount -= 8;
+            }
+        }
+      else
+        {
+          /*have no reg but need one reg =>  push temp reg for limit*/
+          if (TARGET_ARM)
+            asm_fprintf (f, "\tstr\t%r, [%r, #-4]!\n", reg0, SP_REGNUM);
+          else
+            asm_fprintf (f, "\tpush\t{%r}\n", reg0);
+          /*due to additional push try to correct amount*/
+          if (amount>= 4)
+            {
+              if (amount_const_ok)
+                {
+                  if (TARGET_THUMB1 || const_ok_for_arm(amount - 4))
+                    amount -= 4;
+                  /*on Thumb2 or ARM may not corrected; shouldn't hurt*/
+                }
+              else /*will be loaded from pool*/
+                amount -= 4;
+            }
+        }
+    }
+  else if ((issym || amount_needsreg)&&  numregs == 0)
+    { /*push temp reg either for limit or amount*/
+      if (TARGET_ARM)
+        asm_fprintf (f, "\tstr\t%r, [%r, #-4]!\n", reg0, SP_REGNUM);
+      else
+        asm_fprintf (f, "\tpush\t{%r}\n", reg0);
+    }
+
+  if (issym)
+    {
+      if (is_non_opt_thumb2 || is_thumb2_hi_reg[0])
+        {
+          const char *str ;
+          str = (const char *) XSTR  (stack_limit_rtx, 0);
+          asm_fprintf (f, "\tmovw\t%r, #:lower16:%s\n", reg0, str);
+          asm_fprintf (f, "\tmovt\t%r, #:upper16:%s\n", reg0, str);
+        }
+      else
+        {
+          asm_fprintf (f, "\tldr\t%r, ", reg0);
+          assemble_name (f, pool_lable_str); /* =stack_limit_rtx */
+          fputs ("\n", f);
+        }
+
+      if (flag_stack_check == INDIRECT_STACK_CHECK)
+        asm_fprintf (f, "\tldr\t%r, [%r]\n", reg0, reg0);
+      if (amount)
+        {
+          if (amount_const_ok)
+            {
+              if (TARGET_32BIT)
+                asm_fprintf (f, "\tadds\t%r, %r, #%d\n", reg0, reg0, amount);
+              else
+                asm_fprintf (f, "\tadd\t%r, %r, #%d\n", reg0, reg0, amount);
+            }
+          else
+            {
+              if (is_non_opt_thumb2 || is_thumb2_hi_reg[1])
+                {
+                  asm_fprintf (f, "\tmovw\t%r, #0x%X\n", reg1, amount&0xFFFF);
+                  asm_fprintf (f, "\tmovt\t%r, #0x%X\n", reg1,
+				    (amount>>16)&0xFFFF);
+                }
+              else
+                {
+                  asm_fprintf (f, "\tldr\t%r, ", reg1);
+	              assemble_name (f, pool_lable_str); /* =amount */
+                  if (is_thumb2_hi_reg[0])
+                    fputs ("\n", f);
+                  else
+                    fputs (" + 4\n", f);
+                }
+              asm_fprintf (f, "\tadd\t%r, %r, %r\n", reg0, reg0, reg1);
+            }
+        }
+      asm_fprintf (f, "\tcmp\t%r, %r\n", SP_REGNUM, reg0);
+    }
+  else if (amount)
+    {
+      if (amount_const_ok)
+        asm_fprintf (f, "\tmov\t%r, #%d\n", reg0, amount);
+      else
+        {
+          if (is_non_opt_thumb2 || is_thumb2_hi_reg[0])
+            {
+              asm_fprintf (f, "\tmovw\t%r, #0x%X\n", reg0, amount&0xFFFF);
+              asm_fprintf (f, "\tmovt\t%r, #0x%X\n", reg0,(amount>>16)&0xFFFF);
+            }
+          else
+            {
+              asm_fprintf (f, "\tldr\t%r, ", reg0);
+              assemble_name (f, pool_lable_str); /* amount */
+              fputs ("\n", f);
+            }
+        }
+      asm_fprintf (f, "\tadd\t%r, %r, %r\n", reg0,reg0,REGNO(stack_limit_rtx));
+      asm_fprintf (f, "\tcmp\t%r, %r\n", SP_REGNUM, reg0);
+    }
+  else
+    asm_fprintf (f, "\tcmp\t%r, %r\n", SP_REGNUM, REGNO(stack_limit_rtx));
+  asm_fprintf (f, "\tbhs\t");
+  assemble_name (f, ok_lable_str);
+  fputs ("\n", f);
+
+  if (TARGET_ARM)
+    {
+      asm_fprintf (f, "\tstr\t%r, [%r, #-4]!\n", LR_REGNUM, SP_REGNUM);
+      asm_fprintf (f, "\tbl\t__arm_stack_failure\t%@ stack check\n");
+    }
+  else
+    {
+      asm_fprintf (f, "\tpush\t{%r}\n", LR_REGNUM);
+      asm_fprintf (f, "\tbl\t__thumb_stack_failure\t%@ stack check\n");
+    }
+	
+    /*pool*/
+    if ( (issym&&  !(is_non_opt_thumb2 || is_thumb2_hi_reg[0]))
+       ||(amount&&  !amount_const_ok
+&&  !(  (issym&&  is_thumb2_hi_reg[1])
+		     || (!issym&&  is_thumb2_hi_reg[0])
+	         || is_non_opt_thumb2)))
+    {
+      /*temp regs: collect values from here*/
+      if (!TARGET_ARM)
+        ASM_OUTPUT_ALIGN (f, 2);
+      ASM_OUTPUT_LABEL(f,pool_lable_str);
+	  if (issym&&  !(is_non_opt_thumb2 || is_thumb2_hi_reg[0]))
+	    assemble_aligned_integer (UNITS_PER_WORD, stack_limit_rtx);
+      if (amount&&  !amount_const_ok
+&&  !(  (issym&&  is_thumb2_hi_reg[1])
+		     || (!issym&&  is_thumb2_hi_reg[0])
+	         || is_non_opt_thumb2))
+		assemble_aligned_integer (UNITS_PER_WORD, GEN_INT (amount));
+	}
+  ASM_OUTPUT_LABEL(f,ok_lable_str);
+  if (issym&&  amount) /*pop temp regs used by limit and amount*/
+    {
+      if (numregs>= 2)
+        ; /*no need to pop*/
+      else if (numregs == 1)
+        {
+          if (amount_needsreg)
+            {
+              if (TARGET_ARM)
+                asm_fprintf (f, "\tldr\t%r, [%r, #4]!\n", reg1, SP_REGNUM);
+              else
+                asm_fprintf (f, "\tpop\t{%r}\n", reg1);
+            }
+        }
+      else if (amount_needsreg)
+        {
+          if (TARGET_ARM)
+            asm_fprintf (f, "\tldmfd\t%r!, {%r,%r}\n", SP_REGNUM, reg0, reg1);
+          else
+            asm_fprintf (f, "\tpop\t{%r,%r}\n", reg0, reg1);
+        }
+      else
+        {
+          if (TARGET_ARM)
+            asm_fprintf (f, "\tldr\t%r, [%r, #4]!\n", reg0, SP_REGNUM);
+          else
+            asm_fprintf (f, "\tpop\t{%r}\n", reg0);
+        }
+    }
+  else if ((issym || amount_needsreg)&&  numregs == 0)
+    { /*pop temp reg used by limit or amount*/
+      if (TARGET_ARM)
+        asm_fprintf (f, "\tldr\t%r, [%r, #4]!\n", reg0, SP_REGNUM);
+      else
+        asm_fprintf (f, "\tpop\t{%r}\n", reg0);
+    }
+}
+
  const char *
  arm_output_epilogue (rtx sibling)
  {
@@ -15797,6 +16074,72 @@ thumb_set_frame_pointer (arm_stack_offsets *offset
    RTX_FRAME_RELATED_P (insn) = 1;
  }

+/*search for possible work registers for stack-check operation at prologue
+ return the number of register that can be used without extra push/pop */
+
+static int
+stack_check_work_registers (rtx *workreg)
+{
+  int reg, i, k, n, nregs;
+
+  if (crtl->args.info.pcs_variant<= ARM_PCS_AAPCS_LOCAL)
+    {
+      nregs = crtl->args.info.aapcs_next_ncrn;
+    }
+  else
+    nregs = crtl->args.info.nregs;
+
+
+  n = 0;
+  i = 0;
+  /* check if we can use one of the argument registers r0..r3 as long as they
+   * not holding data*/
+  for (reg = 0; reg<= LAST_ARG_REGNUM&&  i<  2; reg++)
+    {
+      if (  !df_regs_ever_live_p (reg)
+         || (cfun->machine->uses_anonymous_args&&  crtl->args.pretend_args_size
+>  (LAST_ARG_REGNUM - reg) * UNITS_PER_WORD)
+         || (!cfun->machine->uses_anonymous_args&&  nregs<  reg + 1)
+         )
+        {
+	  workreg[i++] = gen_rtx_REG (SImode, reg);
+	  n = (reg + 1) % 4;
+        }
+    }
+
+  /* otherwise try to use r4..r7*/
+  for (reg = LAST_ARG_REGNUM + 1; reg<= LAST_LO_REGNUM&&  i<  2; reg++)
+    {
+      if (  df_regs_ever_live_p (reg)
+&&  !fixed_regs[reg]
+&&  reg != FP_REGNUM )
+        {
+	  workreg[i++] = gen_rtx_REG (SImode, reg);
+        }
+    }
+
+  if (TARGET_32BIT)
+    {
+      /* ARM and Thumb-2 can use high regs.  */
+      for (reg = FIRST_HI_REGNUM; reg<= LAST_HI_REGNUM&&  i<  2; reg ++)
+        if (  df_regs_ever_live_p (reg)
+&&  !fixed_regs[reg]
+&&  reg != FP_REGNUM )
+          {
+	    workreg[i++] = gen_rtx_REG (SImode, reg);
+          }
+    }
+
+  k = i;
+  /* if not enough found to be uses without extra push,
+   * collect next from r0..r4*/
+  for ( ; i<2; i++)
+    workreg[i] = gen_rtx_REG (SImode, n++);
+
+  return k;
+}
+
+
  /* Generate the prologue instructions for entry into an ARM or Thumb-2
     function.  */
  void
@@ -16046,6 +16389,23 @@ arm_expand_prologue (void)
      current_function_static_stack_size
        = offsets->outgoing_args - offsets->saved_args;

+  if (  crtl->limit_stack
+&&  !(IS_INTERRUPT (func_type))
+&&  (  flag_stack_check == DIRECT_STACK_CHECK
+        || flag_stack_check == INDIRECT_STACK_CHECK)
+&&  (offsets->outgoing_args - offsets->saved_args)>  0
+     )
+    {
+      rtx reg[2], num_temp_regs;
+
+      amount = GEN_INT (offsets->outgoing_args - saved_regs
+			- offsets->saved_args);
+      num_temp_regs = GEN_INT (stack_check_work_registers(reg));
+      insn = gen_stack_check (stack_pointer_rtx, reg[0], reg[1], amount,
+                              num_temp_regs);
+      insn = emit_insn (insn);
+    }
+
    if (offsets->outgoing_args != offsets->saved_args + saved_regs)
      {
        /* This add can produce multiple insns for a large constant, so we
@@ -21247,6 +21607,22 @@ thumb1_expand_prologue (void)

    amount = offsets->outgoing_args - offsets->saved_regs;
    amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
+
+  if(  crtl->limit_stack
+&&  (  flag_stack_check == DIRECT_STACK_CHECK
+       || flag_stack_check == INDIRECT_STACK_CHECK)
+&&  (offsets->outgoing_args - offsets->saved_args)
+    )
+    {
+      rtx reg[2], num_temp_regs, tmp_amount;
+
+      tmp_amount = GEN_INT (amount);
+      num_temp_regs = GEN_INT (stack_check_work_registers(reg));
+      insn = gen_stack_check (stack_pointer_rtx, reg[0], reg[1], tmp_amount,
+                              num_temp_regs);
+      insn = emit_insn (insn);
+    }
+
    if (amount)
      {
        if (amount<  512)
@@ -21406,6 +21782,7 @@ thumb1_output_interwork (void)
    asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);

    return "";
+
  }

  /* Handle the case of a double word load into a low register from
Index: gcc/config/arm/arm.md
===================================================================
--- gcc/config/arm/arm.md	(revision 175786)
+++ gcc/config/arm/arm.md	(working copy)
@@ -105,6 +105,8 @@
    UNSPEC_SYMBOL_OFFSET  ; The offset of the start of the symbol from
                          ; another symbolic address.
    UNSPEC_MEMORY_BARRIER ; Represent a memory barrier.
+  UNSPEC_PROBE_STACK    ; probe stack memory reference
+  UNSPEC_PROLOGUE_SPCHK ; stack pointer check using the stack_limit_rtx
  ])

  ;; UNSPEC_VOLATILE Usage:
@@ -10758,6 +10760,115 @@

  ;;

+(define_expand "probe_stack"
+  [(match_operand 0 "memory_operand" "")]
+  "TARGET_EITHER"
+{
+  if (  flag_stack_check == DIRECT_STACK_CHECK
+     || flag_stack_check == INDIRECT_STACK_CHECK)
+    ;
+  else
+    {
+      emit_move_insn (operands[0], const0_rtx);
+      emit_insn (gen_probe_stack_done ());
+      emit_insn (gen_blockage ());
+    }
+  DONE;
+}
+)
+
+(define_insn "probe_stack_done"
+  [(unspec_volatile [(const_int 0)] UNSPEC_PROBE_STACK)]
+  "TARGET_EITHER"
+  {return \"@ probe stack done\";}
+  [(set_attr "type" "store1")
+   (set_attr "length" "0")]
+)
+
+(define_expand "generic_limit_check_stack"
+  [(match_operand 0 "memory_operand" "")]
+  "crtl->limit_stack
+&&  flag_stack_check != DIRECT_STACK_CHECK
+&&  flag_stack_check != INDIRECT_STACK_CHECK"
+{
+  rtx label = gen_label_rtx ();
+  rtx addr = copy_rtx (operands[0]);
+  addr = gen_rtx_fmt_ee (MINUS, Pmode, addr, GEN_INT (0));
+  addr = force_operand (addr, NULL_RTX);
+  emit_insn (gen_blockage ());
+  emit_cmp_and_jump_insns (stack_limit_rtx, addr, LEU, NULL_RTX, Pmode, 1,
+                           label);
+  emit_insn (gen_stack_failure ());
+  emit_label (label);
+  emit_insn (gen_blockage ());
+  DONE;
+}
+)
+
+(define_insn "stack_check"
+  [(set
+   (match_operand:SI 0 "register_operand" "=k")
+   (unspec:SI
+    [
+   (match_operand:SI 1 "register_operand" "r")
+   (match_operand:SI 2 "register_operand" "r")
+   (match_operand:SI 3 "general_operand"  "i")
+   (match_operand:SI 4 "general_operand"  "i")
+    ]
+   UNSPEC_PROLOGUE_SPCHK )
+   )
+   (clobber (reg:CC CC_REGNUM))
+  ]
+  "TARGET_EITHER
+&&  (GET_CODE (operands[3]) == CONST_INT)
+&&  (GET_CODE (operands[4]) == CONST_INT)"
+  "*
+  {
+    int reg0, reg1;
+    unsigned amount, numregs;
+    extern void stack_check_output_function (FILE *, int, int, unsigned,
+                                            unsigned);
+
+    reg0 = REGNO (operands[1]);
+    reg1 = REGNO (operands[2]);
+    amount = INTVAL (operands[3]);
+    numregs = INTVAL (operands[4]);
+
+    stack_check_output_function  (asm_out_file, reg0, reg1, amount, numregs);
+  }
+  return \"\";
+  "
+  [(set_attr "conds" "clob")
+   (set (attr "length")
+   (if_then_else (eq_attr "is_thumb" "yes")
+      (const_int 44)
+      (const_int 52)))]
+)
+
+(define_insn "stack_failure"
+  [(trap_if (const_int 1) (const_int 0))]
+  "TARGET_EITHER"
+  "*
+  {
+    rtx ops[2];
+
+    ops[0] = stack_pointer_rtx;
+    ops[1] = gen_rtx_REG (SImode, LR_REGNUM);
+    if (TARGET_ARM)
+      {
+        output_asm_insn (\"str\\t%1, [%0, #-4]!\", ops);
+        output_asm_insn (\"bl\\t__arm_stack_failure\\t%@ trap call\", ops);
+      }
+    else
+      {
+        output_asm_insn (\"push\\t{%1}\", ops);
+        output_asm_insn (\"bl\\t__thumb_stack_failure\\t%@ trap call\", ops);
+      }
+  }
+  return \"\";
+  "
+)
+
  ;; We only care about the lower 16 bits of the constant
  ;; being inserted into the upper 16 bits of the register.
  (define_insn "*arm_movtas_ze"



^ permalink raw reply	[flat|nested] 13+ messages in thread
* Ping: C-family stack check for threads
@ 2011-06-24 14:10 Thomas Klein
  2011-06-30  9:36 ` Ye Joey
  0 siblings, 1 reply; 13+ messages in thread
From: Thomas Klein @ 2011-06-24 14:10 UTC (permalink / raw)
  To: gcc-patches

Hi

This is a ping of 
(http://gcc.gnu.org/ml/gcc-patches/2011-03/msg01226.html).
Repeating my request.

I would like to have a stack check for threads with small amount of 
stack space per thread.
(I'm using a ARM Cortex-M3 microcontroller with a stack size of a 1 
KByte per Thread.)
Each thread having its own limit address.
The thread scheduler can then calculate the limit and store this value 
inside of a global variable.
The compiler may generate code to check the stack for overflow at 
function entry.
In principal this can be done this way:
   - push registers as usual
   - figure out if one or two work registers, that can be used directly 
without extra push
   - if not enough registers found push required work registers to stack
   - load limit address into first working register
   - load value of limit address (into the same register)
   - if stack pointer will go to extend the stack (e.g. for local 
variables)
     load this size value too (here the second work register can be used)
   - compare for overflow
   - if overflow occur "call" stack_failure function
   - pop work registers that are pushed before
   - continue function prologue as usual e.g. extend stack pointer

The ARM target has an option "-mapcs-stack-check" but this is more or 
less not working. (implementation seems to be missing)
There are also architecture independent options like
"-fstack-check=generic", "-fstack-limit-symbol=current_stack_limit" or 
"-fstack-limit-register=r6"
that can be used.

The generic stack check is doing a probe at end of function prologue phase
(e.g by writing 12K ahead the current stack pointer position).
If this stack space is not available the probe may generates a fault.
This require that the CPU is having a MPU or a MMU.
For machines with small memory space an additional mechanism should be
available.

The option "-fstack-check" can be extend by the switches "direct" and 
"indirect" to emit compare code in function prologue.
If switch "direct" is given the address of "-fstack-limit-symbol" 
represents the limit itself.
If switch "indirect" is given "-fstack-limit-symbol" is a kind of global
variable that needs be read before comparison.

I have add an proposal to show how an integration of this behavior can
be done at an ARM architecture.

The generated code look like this
e.g. if using "-fstack-check=indirect -fstack-limit-symbol=stack_limit_var"
->   push {r0}
->   ldr r0, .LSPCHK0
->   ldr r0, [r0]
->   cmp sp, r0
->   bhs .LSPCHK1
->   push {lr}
->   bl __thumb_stack_failure
-> .align 2
-> .LSPCHK0:
-> .word stack_limit_var
-> .LSPCHK1:
->   pop {r0}

Regards
   Thomas Klein

gcc/ChangeLog

2011-06-24  Thomas Klein <th.r.klein@web.de> <mailto:th.r.klein@web.de>
     * opts.c (common_handle_option): introduce additional stack checking
     parameters "direct" and "indirect"
     * flag-types.h (enum stack_check_type): Likewise

     * explow.c (allocate_dynamic_stack_space):
     - suppress stack probing if parameter "direct", "indirect" or if a
     stack-limit is given
     - do additional read of limit value if parameter "indirect" and a
     stack-limit symbol is given
     - emit a call to a stack_failure function [as an alternative to a trap
     call]
     (function probe_stack_range): if allowed to override the range probe
     emit generic_limit_check_stack

     * config/arm/arm.c (stack_check_output_function): new function to 
write
     the stack check code sequence to the assember file (inside prologue)
     (stack_check_work_registers): new function to find possible working
     registers [only used by "stack check"]
     (arm_expand_prologue): stack check integration for ARM and Thumb-2
     (thumb1_output_function_prologue): stack check integration for Thumb-1

     * config/arm/arm.md (probe_stack): do not emit code when parameters
     "direct" or "indirect" given, emit move code as in gcc/explow.c
     [function emit_stack_probe]
     (probe_stack_done): dummy to make sure probe_stack insns are not
     optimized away
     (generic_limit_check_stack): if stack-limit and parameter "generic" is
     given use the limit the same way as in function
     allocate_dynamic_stack_space
     (stack_check): ARM/Thumb-2 insn to output function
     stack_check_output_function
     (stack_failure): failure call used in function
     allocate_dynamic_stack_space [similar to a trap but avoid conflict 
with
     builtin_trap]

Index: gcc/opts.c
===================================================================
--- gcc/opts.c    (revision 175346)
+++ gcc/opts.c    (working copy)
@@ -1629,6 +1629,12 @@ common_handle_option (struct gcc_options *opts,
                 : STACK_CHECK_STATIC_BUILTIN
                   ? STATIC_BUILTIN_STACK_CHECK
                   : GENERIC_STACK_CHECK;
+      else if (!strcmp (arg, "indirect"))
+    /* This is an other stack checking method.  */
+    opts->x_flag_stack_check = INDIRECT_STACK_CHECK;
+      else if (!strcmp (arg, "direct"))
+    /* This is an other stack checking method.  */
+    opts->x_flag_stack_check = DIRECT_STACK_CHECK;
        else
      warning_at (loc, 0, "unknown stack check parameter \"%s\"", arg);
        break;
Index: gcc/function.c
===================================================================
--- gcc/function.c    (revision 175346)
+++ gcc/function.c    (working copy)
@@ -4810,7 +4810,9 @@ expand_function_start (tree subr)
      }

    /* If we are doing generic stack checking, the probe should go here.  */
-  if (flag_stack_check == GENERIC_STACK_CHECK)
+  if(  flag_stack_check /*== GENERIC_STACK_CHECK
+    || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
+    || flag_stack_check == FULL_BUILTIN_STACK_CHECK */)
      stack_check_probe_note = emit_note (NOTE_INSN_DELETED);

    /* Make sure there is a line number after the function entry setup 
code.  */
Index: gcc/flag-types.h
===================================================================
--- gcc/flag-types.h    (revision 175346)
+++ gcc/flag-types.h    (working copy)
@@ -153,7 +153,15 @@ enum stack_check_type

    /* Check the stack and entirely rely on the target configuration
       files, i.e. do not use the generic mechanism at all.  */
-  FULL_BUILTIN_STACK_CHECK
+  FULL_BUILTIN_STACK_CHECK,
+
+  /* Check the stack (if possible) before allocation of local variables at
+     each function entry. The stack limit is directly given e.g. by address
+     of a symbol */
+  DIRECT_STACK_CHECK,
+  /* Check the stack (if possible) before allocation of local variables at
+     each function entry. The stack limit is given by global variable. */
+  INDIRECT_STACK_CHECK
  };

  /* Names for the different levels of -Wstrict-overflow=N.  The numeric
Index: gcc/explow.c
===================================================================
--- gcc/explow.c    (revision 175346)
+++ gcc/explow.c    (working copy)
@@ -1356,7 +1356,12 @@ allocate_dynamic_stack_space (rtx size, unsigned s

    /* If needed, check that we have the required amount of stack.  Take 
into
       account what has already been checked.  */
-  if (STACK_CHECK_MOVING_SP)
+  if (  STACK_CHECK_MOVING_SP
+#ifdef HAVE_generic_limit_check_stack
+     || crtl->limit_stack
+#endif
+     || flag_stack_check == DIRECT_STACK_CHECK
+     || flag_stack_check == INDIRECT_STACK_CHECK)
      ;
    else if (flag_stack_check == GENERIC_STACK_CHECK)
      probe_stack_range (STACK_OLD_CHECK_PROTECT + 
STACK_CHECK_MAX_FRAME_SIZE,
@@ -1390,19 +1395,32 @@ allocate_dynamic_stack_space (rtx size, unsigned s
        /* Check stack bounds if necessary.  */
        if (crtl->limit_stack)
      {
+          rtx limit_rtx;
        rtx available;
        rtx space_available = gen_label_rtx ();
+          if (  GET_CODE (stack_limit_rtx) == SYMBOL_REF
+ && flag_stack_check == INDIRECT_STACK_CHECK)
+            limit_rtx = expand_unop (Pmode, mov_optab,
+                    gen_rtx_MEM (Pmode, stack_limit_rtx),
+                    NULL_RTX, 1);
+          else
+            limit_rtx = stack_limit_rtx;
  #ifdef STACK_GROWS_DOWNWARD
        available = expand_binop (Pmode, sub_optab,
-                    stack_pointer_rtx, stack_limit_rtx,
+                    stack_pointer_rtx, limit_rtx,
                      NULL_RTX, 1, OPTAB_WIDEN);
  #else
        available = expand_binop (Pmode, sub_optab,
-                    stack_limit_rtx, stack_pointer_rtx,
+                    limit_rtx, stack_pointer_rtx,
                      NULL_RTX, 1, OPTAB_WIDEN);
  #endif
        emit_cmp_and_jump_insns (available, size, GEU, NULL_RTX, Pmode, 1,
                     space_available);
+#ifdef HAVE_stack_failure
+      if (HAVE_stack_failure)
+        emit_insn (gen_stack_failure ());
+      else
+#endif
  #ifdef HAVE_trap
        if (HAVE_trap)
          emit_insn (gen_trap ());
@@ -1545,6 +1563,13 @@ probe_stack_range (HOST_WIDE_INT first, rtx size)
      return;
      }
  #endif
+#ifdef HAVE_generic_limit_check_stack
+  else if (HAVE_generic_limit_check_stack)
+    {
+      rtx addr = memory_address (Pmode,stack_pointer_rtx);
+      emit_insn (gen_generic_limit_check_stack (addr));
+    }
+#endif

    /* Otherwise we have to generate explicit probes.  If we have a constant
       small number of them to generate, that's the easy case.  */
Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c    (revision 175346)
+++ gcc/config/arm/arm.c    (working copy)
@@ -14628,6 +14628,283 @@ arm_output_function_prologue (FILE *f, HOST_WIDE_I

  }

+/*
+ * Write prolouge part of stack check into asm file.
+ * For Thumb this may look like this:
+ *   push {rsym,ramn}
+ *   ldr rsym, .LSPCHK0
+ *   ldr rsym, [rsym]
+ *   ldr ramn, .LSPCHK0 + 4
+ *   add rsym, rsym, ramn
+ *   cmp sp, rsym
+ *   bhs .LSPCHK1
+ *   push {lr}
+ *   bl __thumb_stack_failure
+ * .align 2
+ * .LSPCHK0:
+ *   .word symbol_addr_of(stack_limit_rtx)
+ *   .word lenght_of(amount)
+ * .LSPCHK1:
+ *   pop {rsym,ramn}
+ */
+void
+stack_check_output_function (FILE *f, int reg0, int reg1, unsigned amount,
+                             unsigned numregs)
+{
+  unsigned amount_needsreg;
+  bool amount_const_ok, is_non_opt_thumb2, is_thumb2_hi_reg[2];
+  bool issym=false;
+  static unsigned spchk_labelno = 0;
+  char ok_lable_str[256];
+  char pool_lable_str[256];
+
+  if (TARGET_THUMB1)
+    amount_const_ok = (amount < 256);
+  else
+    amount_const_ok = const_ok_for_arm (amount);
+
+  if (GET_CODE (stack_limit_rtx) == SYMBOL_REF) /*stack_limit_rtx*/
+    {
+      issym = true;
+      amount_needsreg = !amount_const_ok;
+    }
+  else
+    amount_needsreg = (amount > 0);
+
+  is_non_opt_thumb2 = (TARGET_THUMB2 && !(optimize_size || optimize >= 2));
+  is_thumb2_hi_reg[0] = (TARGET_THUMB2 && reg0>7);
+  is_thumb2_hi_reg[1] = (TARGET_THUMB2 && reg1>7);
+
+  /*build labels for later use*/
+  if ( (issym && !(is_non_opt_thumb2 || is_thumb2_hi_reg[0]))
+     ||(amount && !amount_const_ok
+ && !((issym && is_thumb2_hi_reg[1])
+         || (!issym && is_thumb2_hi_reg[0])
+         || is_non_opt_thumb2)))
+    ASM_GENERATE_INTERNAL_LABEL (pool_lable_str, "LSPCHK", 
spchk_labelno++);
+  ASM_GENERATE_INTERNAL_LABEL (ok_lable_str, "LSPCHK", spchk_labelno++);
+
+  if (issym && amount) /*need temp regs for limit and amount*/
+    {
+      if (numregs >= 2)
+        ; /*have 2 regs => no need to push*/
+      else if (numregs == 1)
+        {
+          if (amount_needsreg)
+            {
+              /*have one reg but need two regs => push temp reg for 
amount*/
+              if (TARGET_ARM)
+                asm_fprintf (f, "\tstr\t%r, [%r, #-4]!\n", reg1, 
SP_REGNUM);
+              else
+                asm_fprintf (f, "\tpush\t{%r}\n", reg1);
+          /*due to additional push try to correct amount*/
+          if (amount >= 4)
+            {
+          if (amount_const_ok)
+            {
+              if (TARGET_THUMB1 || const_ok_for_arm(amount - 4))
+                amount -= 4;
+              /*on Thumb2 or ARM may not corrected; shouldn't hurt*/
+            }
+          else /*will be loaded from pool*/
+            amount -= 4;
+            }
+            }
+        }
+      else if (amount_needsreg)
+        {
+          /*have no reg but need two => push temp regs for limit and 
amount*/
+          if (TARGET_ARM)
+            asm_fprintf (f, "\tstmfd\t%r!, {%r,%r}\n", SP_REGNUM, reg0, 
reg1);
+          else
+            asm_fprintf (f, "\tpush\t{%r,%r}\n", reg0, reg1);
+          /*due to additional push try to correct amount*/
+          if (amount >= 8)
+            {
+              if (amount_const_ok)
+                {
+                  if (TARGET_THUMB1 || const_ok_for_arm(amount - 8))
+                    amount -= 8;
+                  /*on Thumb2 or ARM may not corrected; shouldn't hurt*/
+                }
+              else /*will be loaded from pool*/
+                amount -= 8;
+            }
+        }
+      else
+        {
+          /*have no reg but need one reg => push temp reg for limit*/
+          if (TARGET_ARM)
+            asm_fprintf (f, "\tstr\t%r, [%r, #-4]!\n", reg0, SP_REGNUM);
+          else
+            asm_fprintf (f, "\tpush\t{%r}\n", reg0);
+          /*due to additional push try to correct amount*/
+          if (amount >= 4)
+            {
+              if (amount_const_ok)
+                {
+                  if (TARGET_THUMB1 || const_ok_for_arm(amount - 4))
+                    amount -= 4;
+                  /*on Thumb2 or ARM may not corrected; shouldn't hurt*/
+                }
+              else /*will be loaded from pool*/
+                amount -= 4;
+            }
+        }
+    }
+  else if ((issym || amount_needsreg) && numregs == 0)
+    { /*push temp reg either for limit or amount*/
+      if (TARGET_ARM)
+        asm_fprintf (f, "\tstr\t%r, [%r, #-4]!\n", reg0, SP_REGNUM);
+      else
+        asm_fprintf (f, "\tpush\t{%r}\n", reg0);
+    }
+
+  if (issym)
+    {
+      if (is_non_opt_thumb2 || is_thumb2_hi_reg[0])
+        {
+          const char *str ;
+          str = (const char *) XSTR  (stack_limit_rtx, 0);
+          asm_fprintf (f, "\tmovw\t%r, #:lower16:%s\n", reg0, str);
+          asm_fprintf (f, "\tmovt\t%r, #:upper16:%s\n", reg0, str);
+        }
+      else
+        {
+          asm_fprintf (f, "\tldr\t%r, ", reg0);
+          assemble_name (f, pool_lable_str); /* =stack_limit_rtx */
+          fputs ("\n", f);
+        }
+
+      if (flag_stack_check == INDIRECT_STACK_CHECK)
+        asm_fprintf (f, "\tldr\t%r, [%r]\n", reg0, reg0);
+      if (amount)
+        {
+          if (amount_const_ok)
+            {
+              if (TARGET_32BIT)
+                asm_fprintf (f, "\tadds\t%r, %r, #%d\n", reg0, reg0, 
amount);
+              else
+                asm_fprintf (f, "\tadd\t%r, %r, #%d\n", reg0, reg0, 
amount);
+            }
+          else
+            {
+              if (is_non_opt_thumb2 || is_thumb2_hi_reg[1])
+                {
+                  asm_fprintf (f, "\tmovw\t%r, #0x%X\n", reg1, 
amount&0xFFFF);
+                  asm_fprintf (f, "\tmovt\t%r, #0x%X\n", reg1,
+                    (amount>>16)&0xFFFF);
+                }
+              else
+                {
+                  asm_fprintf (f, "\tldr\t%r, ", reg1);
+                  assemble_name (f, pool_lable_str); /* =amount */
+                  if (is_thumb2_hi_reg[0])
+                    fputs ("\n", f);
+                  else
+                    fputs (" + 4\n", f);
+                }
+              asm_fprintf (f, "\tadd\t%r, %r, %r\n", reg0, reg0, reg1);
+            }
+        }
+      asm_fprintf (f, "\tcmp\t%r, %r\n", SP_REGNUM, reg0);
+    }
+  else if (amount)
+    {
+      if (amount_const_ok)
+        asm_fprintf (f, "\tmov\t%r, #%d\n", reg0, amount);
+      else
+        {
+          if (is_non_opt_thumb2 || is_thumb2_hi_reg[0])
+            {
+              asm_fprintf (f, "\tmovw\t%r, #0x%X\n", reg0, amount&0xFFFF);
+              asm_fprintf (f, "\tmovt\t%r, #0x%X\n", 
reg0,(amount>>16)&0xFFFF);
+            }
+          else
+            {
+              asm_fprintf (f, "\tldr\t%r, ", reg0);
+              assemble_name (f, pool_lable_str); /* amount */
+              fputs ("\n", f);
+            }
+        }
+      asm_fprintf (f, "\tadd\t%r, %r, %r\n", 
reg0,reg0,REGNO(stack_limit_rtx));
+      asm_fprintf (f, "\tcmp\t%r, %r\n", SP_REGNUM, reg0);
+    }
+  else
+    asm_fprintf (f, "\tcmp\t%r, %r\n", SP_REGNUM, REGNO(stack_limit_rtx));
+  asm_fprintf (f, "\tbhs\t");
+  assemble_name (f, ok_lable_str);
+  fputs ("\n", f);
+
+  if (TARGET_ARM)
+    {
+      asm_fprintf (f, "\tstr\t%r, [%r, #-4]!\n", LR_REGNUM, SP_REGNUM);
+      asm_fprintf (f, "\tbl\t__arm_stack_failure\t%@ stack check\n");
+    }
+  else
+    {
+      asm_fprintf (f, "\tpush\t{%r}\n", LR_REGNUM);
+      asm_fprintf (f, "\tbl\t__thumb_stack_failure\t%@ stack check\n");
+    }
+
+    /*pool*/
+    if ( (issym && !(is_non_opt_thumb2 || is_thumb2_hi_reg[0]))
+       ||(amount && !amount_const_ok
+ && !(  (issym && is_thumb2_hi_reg[1])
+             || (!issym && is_thumb2_hi_reg[0])
+             || is_non_opt_thumb2)))
+    {
+      /*temp regs: collect values from here*/
+      if (!TARGET_ARM)
+        ASM_OUTPUT_ALIGN (f, 2);
+      ASM_OUTPUT_LABEL(f,pool_lable_str);
+      if (issym && !(is_non_opt_thumb2 || is_thumb2_hi_reg[0]))
+        assemble_aligned_integer (UNITS_PER_WORD, stack_limit_rtx);
+      if (amount && !amount_const_ok
+ && !(  (issym && is_thumb2_hi_reg[1])
+             || (!issym && is_thumb2_hi_reg[0])
+             || is_non_opt_thumb2))
+        assemble_aligned_integer (UNITS_PER_WORD, GEN_INT (amount));
+    }
+  ASM_OUTPUT_LABEL(f,ok_lable_str);
+  if (issym && amount) /*pop temp regs used by limit and amount*/
+    {
+      if (numregs >= 2)
+        ; /*no need to pop*/
+      else if (numregs == 1)
+        {
+          if (amount_needsreg)
+            {
+              if (TARGET_ARM)
+                asm_fprintf (f, "\tldr\t%r, [%r, #4]!\n", reg1, SP_REGNUM);
+              else
+                asm_fprintf (f, "\tpop\t{%r}\n", reg1);
+            }
+        }
+      else if (amount_needsreg)
+        {
+          if (TARGET_ARM)
+            asm_fprintf (f, "\tldmfd\t%r!, {%r,%r}\n", SP_REGNUM, reg0, 
reg1);
+          else
+            asm_fprintf (f, "\tpop\t{%r,%r}\n", reg0, reg1);
+        }
+      else
+        {
+          if (TARGET_ARM)
+            asm_fprintf (f, "\tldr\t%r, [%r, #4]!\n", reg0, SP_REGNUM);
+          else
+            asm_fprintf (f, "\tpop\t{%r}\n", reg0);
+        }
+    }
+  else if ((issym || amount_needsreg) && numregs == 0)
+    { /*pop temp reg used by limit or amount*/
+      if (TARGET_ARM)
+        asm_fprintf (f, "\tldr\t%r, [%r, #4]!\n", reg0, SP_REGNUM);
+      else
+        asm_fprintf (f, "\tpop\t{%r}\n", reg0);
+    }
+}
+
  const char *
  arm_output_epilogue (rtx sibling)
  {
@@ -15800,6 +16077,72 @@ thumb_set_frame_pointer (arm_stack_offsets *offset
    RTX_FRAME_RELATED_P (insn) = 1;
  }

+/*search for possible work registers for stack-check operation at prologue
+ return the number of register that can be used without extra push/pop */
+
+static int
+stack_check_work_registers (rtx *workreg)
+{
+  int reg, i, k, n, nregs;
+
+  if (crtl->args.info.pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+    {
+      nregs = crtl->args.info.aapcs_next_ncrn;
+    }
+  else
+    nregs = crtl->args.info.nregs;
+
+
+  n = 0;
+  i = 0;
+  /* check if we can use one of the argument registers r0..r3 as long 
as they
+   * not holding data*/
+  for (reg = 0; reg <= LAST_ARG_REGNUM && i < 2; reg++)
+    {
+      if (  !df_regs_ever_live_p (reg)
+         || (cfun->machine->uses_anonymous_args && 
crtl->args.pretend_args_size
+ > (LAST_ARG_REGNUM - reg) * UNITS_PER_WORD)
+         || (!cfun->machine->uses_anonymous_args && nregs < reg + 1)
+         )
+        {
+      workreg[i++] = gen_rtx_REG (SImode, reg);
+      n = (reg + 1) % 4;
+        }
+    }
+
+  /* otherwise try to use r4..r7*/
+  for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM && i < 2; reg++)
+    {
+      if (  df_regs_ever_live_p (reg)
+ && !fixed_regs[reg]
+ && reg != FP_REGNUM )
+        {
+      workreg[i++] = gen_rtx_REG (SImode, reg);
+        }
+    }
+
+  if (TARGET_32BIT)
+    {
+      /* ARM and Thumb-2 can use high regs.  */
+      for (reg = FIRST_HI_REGNUM; reg <= LAST_HI_REGNUM && i < 2; reg ++)
+        if (  df_regs_ever_live_p (reg)
+ && !fixed_regs[reg]
+ && reg != FP_REGNUM )
+          {
+        workreg[i++] = gen_rtx_REG (SImode, reg);
+          }
+    }
+
+  k = i;
+  /* if not enough found to be uses without extra push,
+   * collect next from r0..r4*/
+  for ( ; i<2; i++)
+    workreg[i] = gen_rtx_REG (SImode, n++);
+
+  return k;
+}
+
+
  /* Generate the prologue instructions for entry into an ARM or Thumb-2
     function.  */
  void
@@ -16049,6 +16392,24 @@ arm_expand_prologue (void)
      current_function_static_stack_size
        = offsets->outgoing_args - offsets->saved_args;

+  if (  crtl->limit_stack
+ && !(IS_INTERRUPT (func_type))
+ && (  flag_stack_check == DIRECT_STACK_CHECK
+        || flag_stack_check == INDIRECT_STACK_CHECK)
+ && (offsets->outgoing_args - offsets->saved_args) > 0
+     )
+    {
+      rtx reg[2], num_temp_regs;
+
+      amount = GEN_INT (offsets->outgoing_args - saved_regs
+            - offsets->saved_args);
+      num_temp_regs = GEN_INT (stack_check_work_registers(reg));
+      insn = gen_stack_check (stack_pointer_rtx,
+                              reg[0], reg[1], stack_limit_rtx,
+                              amount, num_temp_regs);
+      insn = emit_insn (insn);
+    }
+
    if (offsets->outgoing_args != offsets->saved_args + saved_regs)
      {
        /* This add can produce multiple insns for a large constant, so we
@@ -21403,6 +21764,26 @@ thumb1_output_function_prologue (FILE *f, HOST_WID
          thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
      }
      }
+
+  if(  crtl->limit_stack
+ && (  flag_stack_check == DIRECT_STACK_CHECK
+       || flag_stack_check == INDIRECT_STACK_CHECK)
+ && (offsets->outgoing_args - offsets->saved_args)
+    )
+    {
+      unsigned amount, numregs;
+      int reg0, reg1;
+      rtx reg[2];
+
+      amount = offsets->outgoing_args - offsets->saved_regs;
+      amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
+
+      numregs = stack_check_work_registers(reg);
+      reg0 = REGNO (reg[0]);
+      reg1 = REGNO (reg[1]);
+
+      stack_check_output_function  (f, reg0, reg1, amount, numregs);
+    }
  }

  /* Handle the case of a double word load into a low register from
Index: gcc/config/arm/arm.md
===================================================================
--- gcc/config/arm/arm.md    (revision 175346)
+++ gcc/config/arm/arm.md    (working copy)
@@ -105,6 +105,7 @@
    UNSPEC_SYMBOL_OFFSET  ; The offset of the start of the symbol from
                          ; another symbolic address.
    UNSPEC_MEMORY_BARRIER ; Represent a memory barrier.
+  UNSPEC_PROBE_STACK    ; probe stack memory reference
  ])

  ;; UNSPEC_VOLATILE Usage:
@@ -10741,6 +10742,113 @@

  ;;

+(define_expand "probe_stack"
+  [(match_operand 0 "memory_operand" "")]
+  "TARGET_EITHER"
+{
+  if (  flag_stack_check == DIRECT_STACK_CHECK
+     || flag_stack_check == INDIRECT_STACK_CHECK)
+    ;
+  else
+    {
+      emit_move_insn (operands[0], const0_rtx);
+      emit_insn (gen_probe_stack_done ());
+      emit_insn (gen_blockage ());
+    }
+  DONE;
+}
+)
+
+(define_insn "probe_stack_done"
+  [(unspec_volatile [(const_int 0)] UNSPEC_PROBE_STACK)]
+  "TARGET_EITHER"
+  {return \"@ probe stack done\";}
+  [(set_attr "type" "store1")
+   (set_attr "length" "0")]
+)
+
+(define_expand "generic_limit_check_stack"
+  [(match_operand 0 "memory_operand" "")]
+  "crtl->limit_stack
+ && flag_stack_check != DIRECT_STACK_CHECK
+ && flag_stack_check != INDIRECT_STACK_CHECK"
+{
+  rtx label = gen_label_rtx ();
+  rtx addr = copy_rtx (operands[0]);
+  addr = gen_rtx_fmt_ee (MINUS, Pmode, addr, GEN_INT (0));
+  addr = force_operand (addr, NULL_RTX);
+  emit_insn (gen_blockage ());
+  emit_cmp_and_jump_insns (stack_limit_rtx, addr, LEU, NULL_RTX, Pmode, 1,
+                           label);
+  emit_insn (gen_stack_failure ());
+  emit_label (label);
+  emit_insn (gen_blockage ());
+  DONE;
+}
+)
+
+(define_insn "stack_check"
+  [(set
+   (match_operand:SI 0 "register_operand" "=k")
+   (match_operand:SI 3 "general_operand"  "sr")
+   )
+   (match_operand:SI 1 "register_operand" "r")
+   (match_operand:SI 2 "register_operand" "r")
+   (match_operand:SI 4 "general_operand"  "i")
+   (match_operand:SI 5 "general_operand"  "i")
+   (clobber (reg:CC CC_REGNUM))
+  ]
+  "TARGET_32BIT
+ && (operands[3] == stack_limit_rtx)
+ && (GET_CODE (operands[4]) == CONST_INT)
+ && (GET_CODE (operands[5]) == CONST_INT)"
+  "*
+  {
+    int reg0, reg1;
+    unsigned amount, numregs;
+    extern void stack_check_output_function (FILE *, int, int, unsigned,
+                                            unsigned);
+
+    reg0 = REGNO (operands[1]);
+    reg1 = REGNO (operands[2]);
+    amount = INTVAL (operands[4]);
+    numregs = INTVAL (operands[5]);
+
+    stack_check_output_function  (asm_out_file, reg0, reg1, amount, 
numregs);
+  }
+  return \"\";
+  "
+  [(set_attr "conds" "clob")
+   (set (attr "length")
+   (if_then_else (eq_attr "is_thumb" "yes")
+      (const_int 44)
+      (const_int 52)))]
+)
+
+(define_insn "stack_failure"
+  [(trap_if (const_int 1) (const_int 0))]
+  "TARGET_EITHER"
+  "*
+  {
+    rtx ops[2];
+
+    ops[0] = stack_pointer_rtx;
+    ops[1] = gen_rtx_REG (SImode, LR_REGNUM);
+    if (TARGET_ARM)
+      {
+        output_asm_insn (\"str\\t%1, [%0, #-4]!\", ops);
+        output_asm_insn (\"bl\\t__arm_stack_failure\\t%@ trap call\", ops);
+      }
+    else
+      {
+        output_asm_insn (\"push\\t{%1}\", ops);
+        output_asm_insn (\"bl\\t__thumb_stack_failure\\t%@ trap call\", 
ops);
+      }
+  }
+  return \"\";
+  "
+)
+
  ;; We only care about the lower 16 bits of the constant
  ;; being inserted into the upper 16 bits of the register.
  (define_insn "*arm_movtas_ze"

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2011-09-20 20:42 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-09-04 15:48 Ping: C-family stack check for threads Thomas Klein
2011-09-05  9:45 ` Ye Joey
2011-09-05 18:25   ` Thomas Klein
  -- strict thread matches above, loose matches on Subject: below --
2011-09-20 17:09 Thomas Klein
2011-09-20 22:07 ` Joseph S. Myers
2011-07-04 20:28 Thomas Klein
2011-07-05 16:11 ` Richard Henderson
2011-08-02 17:22   ` Thomas Klein
2011-07-03 13:09 Thomas Klein
2011-07-03 17:56 ` Richard Henderson
2011-07-13 12:57 ` Hans-Peter Nilsson
2011-06-24 14:10 Thomas Klein
2011-06-30  9:36 ` Ye Joey

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).