#pragma GCC unroll support

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* #pragma GCC unroll support
@ 2014-12-22 21:51 Mike Stump
  2014-12-23 20:22 ` Andi Kleen
  2015-01-08 12:45 ` Richard Biener
  0 siblings, 2 replies; 25+ messages in thread
From: Mike Stump @ 2014-12-22 21:51 UTC (permalink / raw)
  To: gcc-patches Patches

[-- Attachment #1: Type: text/plain, Size: 1903 bytes --]

So, I have a rough cut of a new feature to add pragma GCC unroll support to gcc.  It is safe wrt the test suite and code-gen, but there are some corners that I need help or suggestions on how to round off nicely.

Things to think about, can we put this in even if we have TODO support for C++ template support?  I think that is only 2-5 lines away from completion, but one needs to play peak-a-boo with some data (the unroll count) and I wanted to let pt fingers decide where to hide the data.  See RANGE_FOR_IVDEP for how I want to do it.

Can I increase the size of all annotations to 3 from 2?  It seemed safer, easier than trying to subdivide it.

I didn’t engineer ivdeps and unroll together.  Does it sound reasonable to allow both to be used at the same time on the same loop?  If so, I can add the two other cases, presently I just handle one of them then the loop.

Fortran support is left to the fortran people, if they want to do it.  I wired it up tantalizingly close for them to complete.

See ICK in the code.  I was unsure how to resolve that code.

Does unroll 8 mean that the loop is repeated 8 times?

Can I turn on peeling in try_peel_loop by simply wanted to do it for 1 loop?

I support using -1 for a directive that says, don’t peel, don’t unroll.  As a UI issue, I think this is wrong.  I want to to be either 0 or 1, those two seem better.  But, not sure which is the right one of the two.  Which number says, don’t unroll, I’m smarter than you think.

If we have a loop that we know can only be unroll 7 times, and the user says unroll 8, should we unroll it 7 times?  Presently I do.  The other option, is to ignore the directive when we know it is non-sensicle.

Yes, I’m aware that this isn’t the right phase for this, but such are business cycles.  It would not go in until we reenter stage 1.  I see no value in trying to squeeze it in past stage 1.


[-- Attachment #2: unroll-2.diffs.txt --]
[-- Type: text/plain, Size: 39310 bytes --]

Index: ada/gcc-interface/trans.c
===================================================================
--- ada/gcc-interface/trans.c	(revision 219031)
+++ ada/gcc-interface/trans.c	(working copy)
@@ -7850,17 +7850,20 @@ gnat_gimplify_stmt (tree *stmt_p)
 	  {
 	    /* Deal with the optimization hints.  */
 	    if (LOOP_STMT_IVDEP (stmt))
-	      gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
+	      gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
 				 build_int_cst (integer_type_node,
-						annot_expr_ivdep_kind));
+						annot_expr_ivdep_kind),
+				 integer_zero_node);
 	    if (LOOP_STMT_NO_VECTOR (stmt))
-	      gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
+	      gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
 				 build_int_cst (integer_type_node,
-						annot_expr_no_vector_kind));
+						annot_expr_no_vector_kind),
+				 integer_zero_node);
 	    if (LOOP_STMT_VECTOR (stmt))
-	      gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
+	      gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
 				 build_int_cst (integer_type_node,
-						annot_expr_vector_kind));
+						annot_expr_vector_kind),
+				 integer_zero_node);
 
 	    gnu_cond
 	      = build3 (COND_EXPR, void_type_node, gnu_cond, NULL_TREE,
Index: c/c-parser.c
===================================================================
--- c/c-parser.c	(revision 219031)
+++ c/c-parser.c	(working copy)
@@ -1206,9 +1206,9 @@ static void c_parser_statement (c_parser
 static void c_parser_statement_after_labels (c_parser *);
 static void c_parser_if_statement (c_parser *);
 static void c_parser_switch_statement (c_parser *);
-static void c_parser_while_statement (c_parser *, bool);
-static void c_parser_do_statement (c_parser *, bool);
-static void c_parser_for_statement (c_parser *, bool);
+static void c_parser_while_statement (c_parser *, bool, int);
+static void c_parser_do_statement (c_parser *, bool, int);
+static void c_parser_for_statement (c_parser *, bool, int);
 static tree c_parser_asm_statement (c_parser *);
 static tree c_parser_asm_operands (c_parser *);
 static tree c_parser_asm_goto_operands (c_parser *);
@@ -4925,13 +4925,13 @@ c_parser_statement_after_labels (c_parse
 	  c_parser_switch_statement (parser);
 	  break;
 	case RID_WHILE:
-	  c_parser_while_statement (parser, false);
+	  c_parser_while_statement (parser, false, 0);
 	  break;
 	case RID_DO:
-	  c_parser_do_statement (parser, false);
+	  c_parser_do_statement (parser, false, 0);
 	  break;
 	case RID_FOR:
-	  c_parser_for_statement (parser, false);
+	  c_parser_for_statement (parser, false, 0);
 	  break;
 	case RID_CILK_FOR:
 	  if (!flag_cilkplus)
@@ -5293,7 +5293,7 @@ c_parser_switch_statement (c_parser *par
 */
 
 static void
-c_parser_while_statement (c_parser *parser, bool ivdep)
+c_parser_while_statement (c_parser *parser, bool ivdep, int unroll)
 {
   tree block, cond, body, save_break, save_cont;
   location_t loc;
@@ -5307,9 +5307,15 @@ c_parser_while_statement (c_parser *pars
 	 "%<_Cilk_spawn%> statement cannot be used as a condition for while statement"))
     cond = error_mark_node;
   if (ivdep && cond != error_mark_node)
-    cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
 		   build_int_cst (integer_type_node,
-		   annot_expr_ivdep_kind));
+				  annot_expr_ivdep_kind),
+		   integer_zero_node);
+  if (unroll && cond != error_mark_node)
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+		   build_int_cst (integer_type_node,
+				  annot_expr_unroll_kind),
+		   build_int_cst (integer_type_node, unroll));
   save_break = c_break_label;
   c_break_label = NULL_TREE;
   save_cont = c_cont_label;
@@ -5328,7 +5334,7 @@ c_parser_while_statement (c_parser *pars
 */
 
 static void
-c_parser_do_statement (c_parser *parser, bool ivdep)
+c_parser_do_statement (c_parser *parser, bool ivdep, int unroll)
 {
   tree block, cond, body, save_break, save_cont, new_break, new_cont;
   location_t loc;
@@ -5356,9 +5362,16 @@ c_parser_do_statement (c_parser *parser,
 	 "%<_Cilk_spawn%> statement cannot be used as a condition for a do-while statement"))
     cond = error_mark_node;
   if (ivdep && cond != error_mark_node)
-    cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+		   build_int_cst (integer_type_node,
+				  annot_expr_ivdep_kind),
+		   integer_zero_node);
+  if (unroll && cond != error_mark_node)
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
 		   build_int_cst (integer_type_node,
-		   annot_expr_ivdep_kind));
+				  annot_expr_unroll_kind),
+		   build_int_cst (integer_type_node,
+				  unroll));
   if (!c_parser_require (parser, CPP_SEMICOLON, "expected %<;%>"))
     c_parser_skip_to_end_of_block_or_statement (parser);
   c_finish_loop (loc, cond, NULL, body, new_break, new_cont, false);
@@ -5422,7 +5435,7 @@ c_parser_do_statement (c_parser *parser,
 */
 
 static void
-c_parser_for_statement (c_parser *parser, bool ivdep)
+c_parser_for_statement (c_parser *parser, bool ivdep, int unroll)
 {
   tree block, cond, incr, save_break, save_cont, body;
   /* The following are only used when parsing an ObjC foreach statement.  */
@@ -5540,6 +5553,12 @@ c_parser_for_statement (c_parser *parser
 				  "%<GCC ivdep%> pragma");
 		  cond = error_mark_node;
 		}
+	      else if (unroll)
+		{
+		  c_parser_error (parser, "missing loop condition in loop with "
+				  "%<GCC unroll%> pragma");
+		  cond = error_mark_node;
+		}
 	      else
 		{
 		  c_parser_consume_token (parser);
@@ -5557,9 +5576,15 @@ c_parser_for_statement (c_parser *parser
 					 "expected %<;%>");
 	    }
 	  if (ivdep && cond != error_mark_node)
-	    cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+	    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+			   build_int_cst (integer_type_node,
+					  annot_expr_ivdep_kind),
+			   integer_zero_node);
+	  if (unroll && cond != error_mark_node)
+	    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
 			   build_int_cst (integer_type_node,
-			   annot_expr_ivdep_kind));
+					  annot_expr_unroll_kind),
+			   build_int_cst (integer_type_node, unroll));
 	}
       /* Parse the increment expression (the third expression in a
 	 for-statement).  In the case of a foreach-statement, this is
@@ -9658,12 +9683,42 @@ c_parser_pragma (c_parser *parser, enum
 	  return false;
 	}
       if (c_parser_next_token_is_keyword (parser, RID_FOR))
-	c_parser_for_statement (parser, true);
+	c_parser_for_statement (parser, true, 0);
       else if (c_parser_next_token_is_keyword (parser, RID_WHILE))
-	c_parser_while_statement (parser, true);
+	c_parser_while_statement (parser, true, 0);
       else
-	c_parser_do_statement (parser, true);
+	c_parser_do_statement (parser, true, 0);
       return false;
+    case PRAGMA_UNROLL:
+      {
+	c_parser_consume_pragma (parser);
+	tree expr = c_parser_expr_no_commas (parser, NULL).value;
+	mark_exp_read (expr);
+	expr = c_fully_fold (expr, false, NULL);
+	if (!INTEGRAL_TYPE_P (TREE_TYPE (expr))
+	    || TREE_CODE (expr) != INTEGER_CST)
+	  {
+	    c_parser_error (parser, "%<#pragma GCC unroll%> requires a number");
+	    expr = integer_zero_node;
+	  }
+	int unroll = tree_to_shwi (expr);
+
+	c_parser_skip_to_pragma_eol (parser);
+	if (!c_parser_next_token_is_keyword (parser, RID_FOR)
+	    && !c_parser_next_token_is_keyword (parser, RID_WHILE)
+	    && !c_parser_next_token_is_keyword (parser, RID_DO))
+	  {
+	    c_parser_error (parser, "for, while or do statement expected");
+	    return false;
+	  }
+	if (c_parser_next_token_is_keyword (parser, RID_FOR))
+	  c_parser_for_statement (parser, false, unroll);
+	else if (c_parser_next_token_is_keyword (parser, RID_WHILE))
+	  c_parser_while_statement (parser, false, unroll);
+	else
+	  c_parser_do_statement (parser, false, unroll);
+	return false;
+      }
 
     case PRAGMA_GCC_PCH_PREPROCESS:
       c_parser_error (parser, "%<#pragma GCC pch_preprocess%> must be first");
Index: c-family/c-pragma.c
===================================================================
--- c-family/c-pragma.c	(revision 219031)
+++ c-family/c-pragma.c	(working copy)
@@ -1415,6 +1415,10 @@ init_pragma (void)
     cpp_register_deferred_pragma (parse_in, "GCC", "ivdep", PRAGMA_IVDEP, false,
 				  false);
 
+  if (!flag_preprocess_only)
+    cpp_register_deferred_pragma (parse_in, "GCC", "unroll", PRAGMA_UNROLL, false,
+				  false);
+
   if (flag_cilkplus && !flag_preprocess_only)
     cpp_register_deferred_pragma (parse_in, "cilk", "grainsize",
 				  PRAGMA_CILK_GRAINSIZE, true, false);
Index: c-family/c-pragma.h
===================================================================
--- c-family/c-pragma.h	(revision 219031)
+++ c-family/c-pragma.h	(working copy)
@@ -60,6 +60,7 @@ typedef enum pragma_kind {
 
   PRAGMA_GCC_PCH_PREPROCESS,
   PRAGMA_IVDEP,
+  PRAGMA_UNROLL,
 
   PRAGMA_FIRST_EXTERNAL
 } pragma_kind;
Index: cfgloop.h
===================================================================
--- cfgloop.h	(revision 219031)
+++ cfgloop.h	(working copy)
@@ -189,6 +189,11 @@ struct GTY ((chain_next ("%h.next"))) lo
      of the loop can be safely evaluated concurrently.  */
   int safelen;
 
+
+  /* The number of times to unroll the loop.  0, means no information
+     given, just do what we always do.  */
+  int unroll;
+
   /* True if this loop should never be vectorized.  */
   bool dont_vectorize;
 
Index: cp/cp-array-notation.c
===================================================================
--- cp/cp-array-notation.c	(revision 219031)
+++ cp/cp-array-notation.c	(working copy)
@@ -71,7 +71,7 @@ create_an_loop (tree init, tree cond, tr
   finish_expr_stmt (init);
   for_stmt = begin_for_stmt (NULL_TREE, NULL_TREE);
   finish_for_init_stmt (for_stmt);
-  finish_for_cond (cond, for_stmt, false);
+  finish_for_cond (cond, for_stmt, false, 0);
   finish_for_expr (incr, for_stmt);
   finish_expr_stmt (body);
   finish_for_stmt (for_stmt);
Index: cp/cp-tree.h
===================================================================
--- cp/cp-tree.h	(revision 219031)
+++ cp/cp-tree.h	(working copy)
@@ -5645,7 +5645,7 @@ extern tree implicitly_declare_fn
 extern bool maybe_clone_body			(tree);
 
 /* In parser.c */
-extern tree cp_convert_range_for (tree, tree, tree, bool);
+extern tree cp_convert_range_for (tree, tree, tree, bool, int);
 extern bool parsing_nsdmi (void);
 extern void inject_this_parameter (tree, cp_cv_quals);
 
@@ -5881,16 +5881,16 @@ extern void begin_else_clause			(tree);
 extern void finish_else_clause			(tree);
 extern void finish_if_stmt			(tree);
 extern tree begin_while_stmt			(void);
-extern void finish_while_stmt_cond		(tree, tree, bool);
+extern void finish_while_stmt_cond		(tree, tree, bool, int);
 extern void finish_while_stmt			(tree);
 extern tree begin_do_stmt			(void);
 extern void finish_do_body			(tree);
-extern void finish_do_stmt			(tree, tree, bool);
+extern void finish_do_stmt			(tree, tree, bool, int);
 extern tree finish_return_stmt			(tree);
 extern tree begin_for_scope			(tree *);
 extern tree begin_for_stmt			(tree, tree);
 extern void finish_for_init_stmt		(tree);
-extern void finish_for_cond			(tree, tree, bool);
+extern void finish_for_cond			(tree, tree, bool, int);
 extern void finish_for_expr			(tree, tree);
 extern void finish_for_stmt			(tree);
 extern tree begin_range_for_stmt		(tree, tree);
Index: cp/init.c
===================================================================
--- cp/init.c	(revision 219031)
+++ cp/init.c	(working copy)
@@ -3689,7 +3689,7 @@ build_vec_init (tree base, tree maxindex
       finish_for_init_stmt (for_stmt);
       finish_for_cond (build2 (NE_EXPR, boolean_type_node, iterator,
 			       build_int_cst (TREE_TYPE (iterator), -1)),
-		       for_stmt, false);
+		       for_stmt, false, 0);
       elt_init = cp_build_unary_op (PREDECREMENT_EXPR, iterator, 0,
 				    complain);
       if (elt_init == error_mark_node)
Index: cp/parser.c
===================================================================
--- cp/parser.c	(revision 219031)
+++ cp/parser.c	(working copy)
@@ -2038,15 +2038,15 @@ static tree cp_parser_selection_statemen
 static tree cp_parser_condition
   (cp_parser *);
 static tree cp_parser_iteration_statement
-  (cp_parser *, bool);
+  (cp_parser *, bool, int);
 static bool cp_parser_for_init_statement
   (cp_parser *, tree *decl);
 static tree cp_parser_for
-  (cp_parser *, bool);
+  (cp_parser *, bool, int);
 static tree cp_parser_c_for
-  (cp_parser *, tree, tree, bool);
+  (cp_parser *, tree, tree, bool, int);
 static tree cp_parser_range_for
-  (cp_parser *, tree, tree, tree, bool);
+  (cp_parser *, tree, tree, tree, bool, int);
 static void do_range_for_auto_deduction
   (tree, tree);
 static tree cp_parser_perform_range_for_lookup
@@ -9652,7 +9652,7 @@ cp_parser_statement (cp_parser* parser,
 	case RID_WHILE:
 	case RID_DO:
 	case RID_FOR:
-	  statement = cp_parser_iteration_statement (parser, false);
+	  statement = cp_parser_iteration_statement (parser, false, 0);
 	  break;
 
 	case RID_CILK_FOR:
@@ -10344,7 +10344,7 @@ cp_parser_condition (cp_parser* parser)
    not included. */
 
 static tree
-cp_parser_for (cp_parser *parser, bool ivdep)
+cp_parser_for (cp_parser *parser, bool ivdep, int unroll)
 {
   tree init, scope, decl;
   bool is_range_for;
@@ -10356,13 +10356,13 @@ cp_parser_for (cp_parser *parser, bool i
   is_range_for = cp_parser_for_init_statement (parser, &decl);
 
   if (is_range_for)
-    return cp_parser_range_for (parser, scope, init, decl, ivdep);
+    return cp_parser_range_for (parser, scope, init, decl, ivdep, unroll);
   else
-    return cp_parser_c_for (parser, scope, init, ivdep);
+    return cp_parser_c_for (parser, scope, init, ivdep, unroll);
 }
 
 static tree
-cp_parser_c_for (cp_parser *parser, tree scope, tree init, bool ivdep)
+cp_parser_c_for (cp_parser *parser, tree scope, tree init, bool ivdep, int unroll)
 {
   /* Normal for loop */
   tree condition = NULL_TREE;
@@ -10383,7 +10383,13 @@ cp_parser_c_for (cp_parser *parser, tree
 		       "%<GCC ivdep%> pragma");
       condition = error_mark_node;
     }
-  finish_for_cond (condition, stmt, ivdep);
+  else if (unroll)
+    {
+      cp_parser_error (parser, "missing loop condition in loop with "
+		       "%<GCC unroll%> pragma");
+      condition = error_mark_node;
+    }
+  finish_for_cond (condition, stmt, ivdep, unroll);
   /* Look for the `;'.  */
   cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON);
 
@@ -10407,7 +10413,7 @@ cp_parser_c_for (cp_parser *parser, tree
 
 static tree
 cp_parser_range_for (cp_parser *parser, tree scope, tree init, tree range_decl,
-		     bool ivdep)
+		     bool ivdep, int unroll)
 {
   tree stmt, range_expr;
 
@@ -10428,6 +10434,8 @@ cp_parser_range_for (cp_parser *parser,
       stmt = begin_range_for_stmt (scope, init);
       if (ivdep)
 	RANGE_FOR_IVDEP (stmt) = 1;
+      if (unroll)
+	/* TODO */(void)0;
       finish_range_for_decl (stmt, range_decl, range_expr);
       if (!type_dependent_expression_p (range_expr)
 	  /* do_auto_deduction doesn't mess with template init-lists.  */
@@ -10437,7 +10445,7 @@ cp_parser_range_for (cp_parser *parser,
   else
     {
       stmt = begin_for_stmt (scope, init);
-      stmt = cp_convert_range_for (stmt, range_decl, range_expr, ivdep);
+      stmt = cp_convert_range_for (stmt, range_decl, range_expr, ivdep, unroll);
     }
   return stmt;
 }
@@ -10529,7 +10537,7 @@ do_range_for_auto_deduction (tree decl,
 
 tree
 cp_convert_range_for (tree statement, tree range_decl, tree range_expr,
-		      bool ivdep)
+		      bool ivdep, int unroll)
 {
   tree begin, end;
   tree iter_type, begin_expr, end_expr;
@@ -10586,7 +10594,7 @@ cp_convert_range_for (tree statement, tr
 				 begin, ERROR_MARK,
 				 end, ERROR_MARK,
 				 NULL, tf_warning_or_error);
-  finish_for_cond (condition, statement, ivdep);
+  finish_for_cond (condition, statement, ivdep, unroll);
 
   /* The new increment expression.  */
   expression = finish_unary_op_expr (input_location,
@@ -10747,7 +10755,7 @@ cp_parser_range_for_member_function (tre
    Returns the new WHILE_STMT, DO_STMT, FOR_STMT or RANGE_FOR_STMT.  */
 
 static tree
-cp_parser_iteration_statement (cp_parser* parser, bool ivdep)
+cp_parser_iteration_statement (cp_parser* parser, bool ivdep, int unroll)
 {
   cp_token *token;
   enum rid keyword;
@@ -10777,7 +10785,7 @@ cp_parser_iteration_statement (cp_parser
 	cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN);
 	/* Parse the condition.  */
 	condition = cp_parser_condition (parser);
-	finish_while_stmt_cond (condition, statement, ivdep);
+	finish_while_stmt_cond (condition, statement, ivdep, unroll);
 	/* Look for the `)'.  */
 	cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN);
 	/* Parse the dependent statement.  */
@@ -10807,7 +10815,7 @@ cp_parser_iteration_statement (cp_parser
 	/* Parse the expression.  */
 	expression = cp_parser_expression (parser);
 	/* We're done with the do-statement.  */
-	finish_do_stmt (expression, statement, ivdep);
+	finish_do_stmt (expression, statement, ivdep, unroll);
 	/* Look for the `)'.  */
 	cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN);
 	/* Look for the `;'.  */
@@ -10820,7 +10828,7 @@ cp_parser_iteration_statement (cp_parser
 	/* Look for the `('.  */
 	cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN);
 
-	statement = cp_parser_for (parser, ivdep);
+	statement = cp_parser_for (parser, ivdep, unroll);
 
 	/* Look for the `)'.  */
 	cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN);
@@ -32135,7 +32143,32 @@ cp_parser_pragma (cp_parser *parser, enu
 	    cp_parser_error (parser, "for, while or do statement expected");
 	    return false;
 	  }
-	cp_parser_iteration_statement (parser, true);
+	cp_parser_iteration_statement (parser, true, 0);
+	return true;
+      }
+
+    case PRAGMA_UNROLL:
+      {
+	tree expr = cp_parser_constant_expression (parser);
+	expr = maybe_constant_value (expr);
+	cp_parser_skip_to_pragma_eol (parser, pragma_tok);
+	if (!INTEGRAL_TYPE_P (TREE_TYPE (expr))
+	    || TREE_CODE (expr) != INTEGER_CST)
+	  {
+	    cp_parser_error (parser, "%<#pragma GCC unroll%> requires a number");
+	    expr = integer_zero_node;
+	  }
+	int unroll = tree_to_shwi (expr);
+	cp_token *tok;
+	tok = cp_lexer_peek_token (the_parser->lexer);
+	if (tok->type != CPP_KEYWORD
+	    || (tok->keyword != RID_FOR && tok->keyword != RID_WHILE
+		&& tok->keyword != RID_DO))
+	  {
+	    cp_parser_error (parser, "for, while or do statement expected");
+	    return false;
+	  }
+	cp_parser_iteration_statement (parser, false, unroll);
 	return true;
       }
 
Index: cp/pt.c
===================================================================
--- cp/pt.c	(revision 219031)
+++ cp/pt.c	(working copy)
@@ -13876,7 +13876,7 @@ tsubst_expr (tree t, tree args, tsubst_f
       RECUR (FOR_INIT_STMT (t));
       finish_for_init_stmt (stmt);
       tmp = RECUR (FOR_COND (t));
-      finish_for_cond (tmp, stmt, false);
+      finish_for_cond (tmp, stmt, false, 0);
       tmp = RECUR (FOR_EXPR (t));
       finish_for_expr (tmp, stmt);
       RECUR (FOR_BODY (t));
@@ -13891,7 +13891,7 @@ tsubst_expr (tree t, tree args, tsubst_f
         decl = tsubst (decl, args, complain, in_decl);
         maybe_push_decl (decl);
         expr = RECUR (RANGE_FOR_EXPR (t));
-        stmt = cp_convert_range_for (stmt, decl, expr, RANGE_FOR_IVDEP (t));
+        stmt = cp_convert_range_for (stmt, decl, expr, RANGE_FOR_IVDEP (t), 0);
         RECUR (RANGE_FOR_BODY (t));
         finish_for_stmt (stmt);
       }
@@ -13900,7 +13900,7 @@ tsubst_expr (tree t, tree args, tsubst_f
     case WHILE_STMT:
       stmt = begin_while_stmt ();
       tmp = RECUR (WHILE_COND (t));
-      finish_while_stmt_cond (tmp, stmt, false);
+      finish_while_stmt_cond (tmp, stmt, false, 0);
       RECUR (WHILE_BODY (t));
       finish_while_stmt (stmt);
       break;
@@ -13910,7 +13910,7 @@ tsubst_expr (tree t, tree args, tsubst_f
       RECUR (DO_BODY (t));
       finish_do_body (stmt);
       tmp = RECUR (DO_COND (t));
-      finish_do_stmt (tmp, stmt, false);
+      finish_do_stmt (tmp, stmt, false, 0);
       break;
 
     case IF_STMT:
@@ -14348,8 +14348,8 @@ tsubst_expr (tree t, tree args, tsubst_f
 
     case ANNOTATE_EXPR:
       tmp = RECUR (TREE_OPERAND (t, 0));
-      RETURN (build2_loc (EXPR_LOCATION (t), ANNOTATE_EXPR,
-			  TREE_TYPE (tmp), tmp, RECUR (TREE_OPERAND (t, 1))));
+      RETURN (build3_loc (EXPR_LOCATION (t), ANNOTATE_EXPR,
+			  TREE_TYPE (tmp), tmp, RECUR (TREE_OPERAND (t, 1)), RECUR (TREE_OPERAND (t, 2))));
 
     default:
       gcc_assert (!STATEMENT_CODE_P (TREE_CODE (t)));
Index: cp/semantics.c
===================================================================
--- cp/semantics.c	(revision 219031)
+++ cp/semantics.c	(working copy)
@@ -796,7 +796,7 @@ begin_while_stmt (void)
    WHILE_STMT.  */
 
 void
-finish_while_stmt_cond (tree cond, tree while_stmt, bool ivdep)
+finish_while_stmt_cond (tree cond, tree while_stmt, bool ivdep, int unroll)
 {
   if (check_no_cilk (cond,
       "Cilk array notation cannot be used as a condition for while statement",
@@ -806,11 +806,19 @@ finish_while_stmt_cond (tree cond, tree
   finish_cond (&WHILE_COND (while_stmt), cond);
   begin_maybe_infinite_loop (cond);
   if (ivdep && cond != error_mark_node)
-    WHILE_COND (while_stmt) = build2 (ANNOTATE_EXPR,
+    WHILE_COND (while_stmt) = build3 (ANNOTATE_EXPR,
 				      TREE_TYPE (WHILE_COND (while_stmt)),
 				      WHILE_COND (while_stmt),
 				      build_int_cst (integer_type_node,
-						     annot_expr_ivdep_kind));
+						     annot_expr_ivdep_kind),
+				      integer_zero_node);
+  if (unroll && cond != error_mark_node)
+    WHILE_COND (while_stmt) = build3 (ANNOTATE_EXPR,
+				      TREE_TYPE (WHILE_COND (while_stmt)),
+				      WHILE_COND (while_stmt),
+				      build_int_cst (integer_type_node,
+						     annot_expr_unroll_kind),
+				      build_int_cst (integer_type_node, unroll));
   simplify_loop_decl_cond (&WHILE_COND (while_stmt), WHILE_BODY (while_stmt));
 }
 
@@ -855,7 +863,7 @@ finish_do_body (tree do_stmt)
    COND is as indicated.  */
 
 void
-finish_do_stmt (tree cond, tree do_stmt, bool ivdep)
+finish_do_stmt (tree cond, tree do_stmt, bool ivdep, int unroll)
 {
   if (check_no_cilk (cond,
   "Cilk array notation cannot be used as a condition for a do-while statement",
@@ -864,8 +872,13 @@ finish_do_stmt (tree cond, tree do_stmt,
   cond = maybe_convert_cond (cond);
   end_maybe_infinite_loop (cond);
   if (ivdep && cond != error_mark_node)
-    cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
-		   build_int_cst (integer_type_node, annot_expr_ivdep_kind));
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+		   build_int_cst (integer_type_node, annot_expr_ivdep_kind),
+		   integer_zero_node);
+  if (unroll && cond != error_mark_node)
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+		   build_int_cst (integer_type_node, annot_expr_unroll_kind),
+		   build_int_cst (integer_type_node, unroll));
   DO_COND (do_stmt) = cond;
 }
 
@@ -968,7 +981,7 @@ finish_for_init_stmt (tree for_stmt)
    FOR_STMT.  */
 
 void
-finish_for_cond (tree cond, tree for_stmt, bool ivdep)
+finish_for_cond (tree cond, tree for_stmt, bool ivdep, int unroll)
 {
   if (check_no_cilk (cond,
 	 "Cilk array notation cannot be used in a condition for a for-loop",
@@ -978,11 +991,20 @@ finish_for_cond (tree cond, tree for_stm
   finish_cond (&FOR_COND (for_stmt), cond);
   begin_maybe_infinite_loop (cond);
   if (ivdep && cond != error_mark_node)
-    FOR_COND (for_stmt) = build2 (ANNOTATE_EXPR,
+    FOR_COND (for_stmt) = build3 (ANNOTATE_EXPR,
 				  TREE_TYPE (FOR_COND (for_stmt)),
 				  FOR_COND (for_stmt),
 				  build_int_cst (integer_type_node,
-						 annot_expr_ivdep_kind));
+						 annot_expr_ivdep_kind),
+				  integer_zero_node);
+  if (unroll && cond != error_mark_node)
+    FOR_COND (for_stmt) = build3 (ANNOTATE_EXPR,
+				  TREE_TYPE (FOR_COND (for_stmt)),
+				  FOR_COND (for_stmt),
+				  build_int_cst (integer_type_node,
+						 annot_expr_unroll_kind),
+				  build_int_cst (integer_type_node,
+						 unroll));
   simplify_loop_decl_cond (&FOR_COND (for_stmt), FOR_BODY (for_stmt));
 }
 
Index: fortran/trans-stmt.c
===================================================================
--- fortran/trans-stmt.c	(revision 219031)
+++ fortran/trans-stmt.c	(working copy)
@@ -2790,9 +2790,10 @@ gfc_trans_forall_loop (forall_info *fora
       cond = fold_build2_loc (input_location, LE_EXPR, boolean_type_node,
 			      count, build_int_cst (TREE_TYPE (count), 0));
       if (forall_tmp->do_concurrent)
-	cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+	cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
 		       build_int_cst (integer_type_node,
-				      annot_expr_ivdep_kind));
+				      annot_expr_ivdep_kind),
+		       integer_zero_node);
 
       tmp = build1_v (GOTO_EXPR, exit_label);
       tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node,
Index: gimplify.c
===================================================================
--- gimplify.c	(revision 219031)
+++ gimplify.c	(working copy)
@@ -2888,6 +2888,7 @@ gimple_boolify (tree expr)
 	case annot_expr_ivdep_kind:
 	case annot_expr_no_vector_kind:
 	case annot_expr_vector_kind:
+	case annot_expr_unroll_kind:
 	  TREE_OPERAND (expr, 0) = gimple_boolify (TREE_OPERAND (expr, 0));
 	  if (TREE_CODE (type) != BOOLEAN_TYPE)
 	    TREE_TYPE (expr) = boolean_type_node;
@@ -7784,6 +7785,7 @@ gimplify_expr (tree *expr_p, gimple_seq
 	  {
 	    tree cond = TREE_OPERAND (*expr_p, 0);
 	    tree kind = TREE_OPERAND (*expr_p, 1);
+	    tree data = TREE_OPERAND (*expr_p, 2);
 	    tree type = TREE_TYPE (cond);
 	    if (!INTEGRAL_TYPE_P (type))
 	      {
@@ -7794,7 +7796,7 @@ gimplify_expr (tree *expr_p, gimple_seq
 	    tree tmp = create_tmp_var (type);
 	    gimplify_arg (&cond, pre_p, EXPR_LOCATION (*expr_p));
 	    gcall *call
-	      = gimple_build_call_internal (IFN_ANNOTATE, 2, cond, kind);
+	      = gimple_build_call_internal (IFN_ANNOTATE, 3, cond, kind, data);
 	    gimple_call_set_lhs (call, tmp);
 	    gimplify_seq_add_stmt (pre_p, call);
 	    *expr_p = tmp;
Index: loop-unroll.c
===================================================================
--- loop-unroll.c	(revision 219031)
+++ loop-unroll.c	(working copy)
@@ -405,6 +405,19 @@ decide_unroll_constant_iterations (struc
       return;
     }
 
+  if (loop->unroll > 0)
+    {
+      loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
+      loop->lpt_decision.times = loop->unroll - 1;
+      if (loop->lpt_decision.times > desc->niter - 2) /* ICK: see ICK below.  */
+	{
+	  /* They won't do this for us.  */
+	  loop->lpt_decision.decision = LPT_NONE;
+	  loop->lpt_decision.times = desc->niter - 2;
+	}
+      return;
+    }
+
   /* Check whether the loop rolls enough to consider.  
      Consult also loop bounds and profile; in the case the loop has more
      than one exit it may well loop less than determined maximal number
@@ -426,7 +439,7 @@ decide_unroll_constant_iterations (struc
   best_copies = 2 * nunroll + 10;
 
   i = 2 * nunroll + 2;
-  if (i - 1 >= desc->niter)
+  if (i > desc->niter - 2) /* ICK, compare the first assert in unroll_loop_constant_iterations */
     i = desc->niter - 2;
 
   for (; i >= nunroll - 1; i--)
@@ -678,6 +691,9 @@ decide_unroll_runtime_iterations (struct
   if (targetm.loop_unroll_adjust)
     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 
+  if (loop->unroll > 0)
+    nunroll = loop->unroll;
+
   /* Skip big loops.  */
   if (nunroll <= 1)
     {
@@ -707,9 +723,11 @@ decide_unroll_runtime_iterations (struct
     }
 
   /* Check whether the loop rolls.  */
-  if ((get_estimated_loop_iterations (loop, &iterations)
-       || get_max_loop_iterations (loop, &iterations))
-      && wi::ltu_p (iterations, 2 * nunroll))
+  if (loop->unroll)
+    ;
+  else if ((get_estimated_loop_iterations (loop, &iterations)
+	    || get_max_loop_iterations (loop, &iterations))
+	   && wi::ltu_p (iterations, 2 * nunroll))
     {
       if (dump_file)
 	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
@@ -1125,6 +1143,9 @@ decide_unroll_stupid (struct loop *loop,
   if (targetm.loop_unroll_adjust)
     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 
+  if (loop->unroll > 0)
+    nunroll = loop->unroll;
+
   /* Skip big loops.  */
   if (nunroll <= 1)
     {
Index: lto-streamer-in.c
===================================================================
--- lto-streamer-in.c	(revision 219031)
+++ lto-streamer-in.c	(working copy)
@@ -734,6 +734,7 @@ input_cfg (struct lto_input_block *ib, s
 
       /* Read OMP SIMD related info.  */
       loop->safelen = streamer_read_hwi (ib);
+      loop->unroll = streamer_read_hwi (ib);
       loop->dont_vectorize = streamer_read_hwi (ib);
       loop->force_vectorize = streamer_read_hwi (ib);
       loop->simduid = stream_read_tree (ib, data_in);
Index: lto-streamer-out.c
===================================================================
--- lto-streamer-out.c	(revision 219031)
+++ lto-streamer-out.c	(working copy)
@@ -1863,6 +1863,7 @@ output_cfg (struct output_block *ob, str
 
       /* Write OMP SIMD related info.  */
       streamer_write_hwi (ob, loop->safelen);
+      streamer_write_hwi (ob, loop->unroll);
       streamer_write_hwi (ob, loop->dont_vectorize);
       streamer_write_hwi (ob, loop->force_vectorize);
       stream_write_tree (ob, loop->simduid, true);
Index: tree-cfg.c
===================================================================
--- tree-cfg.c	(revision 219031)
+++ tree-cfg.c	(working copy)
@@ -299,6 +299,9 @@ replace_loop_annotate_in_block (basic_bl
 	  loop->force_vectorize = true;
 	  cfun->has_force_vectorize_loops = true;
 	  break;
+	case annot_expr_unroll_kind:
+	  loop->unroll = tree_to_shwi (gimple_call_arg (stmt, 2));
+	  break;
 	default:
 	  gcc_unreachable ();
 	}
@@ -348,6 +351,7 @@ replace_loop_annotate (void)
 	    case annot_expr_ivdep_kind:
 	    case annot_expr_no_vector_kind:
 	    case annot_expr_vector_kind:
+	    case annot_expr_unroll_kind:
 	      break;
 	    default:
 	      gcc_unreachable ();
Index: tree-core.h
===================================================================
--- tree-core.h	(revision 219031)
+++ tree-core.h	(working copy)
@@ -685,6 +685,7 @@ enum annot_expr_kind {
   annot_expr_ivdep_kind,
   annot_expr_no_vector_kind,
   annot_expr_vector_kind,
+  annot_expr_unroll_kind,
   annot_expr_kind_last
 };
 
Index: tree-pretty-print.c
===================================================================
--- tree-pretty-print.c	(revision 219031)
+++ tree-pretty-print.c	(working copy)
@@ -2165,6 +2165,10 @@ dump_generic_node (pretty_printer *pp, t
 	case annot_expr_vector_kind:
 	  pp_string (pp, ", vector");
 	  break;
+	case annot_expr_unroll_kind:
+	  pp_printf (buffer, ", unroll %ld",
+		     (long)TREE_INT_CST_LOW (TREE_OPERAND (node, 2)));
+	  break;
 	default:
 	  gcc_unreachable ();
 	}
Index: tree-ssa-loop-ivcanon.c
===================================================================
--- tree-ssa-loop-ivcanon.c	(revision 219031)
+++ tree-ssa-loop-ivcanon.c	(working copy)
@@ -341,7 +341,10 @@ tree_estimate_loop_size (struct loop *lo
 	      if (likely_eliminated || likely_eliminated_last)
 		size->last_iteration_eliminated_by_peeling += num;
 	    }
-	  if ((size->overall * 3 / 2 - size->eliminated_by_peeling
+	  /* A loop that we want to unroll is never too large.  */
+	  if (loop->unroll > 0)
+	    ;
+	  else if ((size->overall * 3 / 2 - size->eliminated_by_peeling
 	      - size->last_iteration_eliminated_by_peeling) > upper_bound)
 	    {
               free (body);
@@ -725,7 +728,8 @@ try_unroll_loop_completely (struct loop
   if (!n_unroll_found)
     return false;
 
-  if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
+  if (loop->unroll <= 0 &&
+      n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "Not unrolling loop %d "
@@ -747,11 +751,19 @@ try_unroll_loop_completely (struct loop
       if (ul == UL_SINGLE_ITER)
 	return false;
 
+      if (loop->unroll == -1)
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "Not unrolling loop %d: already peeled for the user.\n",
+		     loop->num);
+	  return false;
+	}
+
       large = tree_estimate_loop_size
 		 (loop, exit, edge_to_cancel, &size,
 		  PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
       ninsns = size.overall;
-      if (large)
+      if (loop->unroll == 0 && large)
 	{
 	  if (dump_file && (dump_flags & TDF_DETAILS))
 	    fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
@@ -767,13 +779,27 @@ try_unroll_loop_completely (struct loop
 		   (int) unr_insns);
 	}
 
-      /* If the code is going to shrink, we don't need to be extra cautious
-	 on guessing if the unrolling is going to be profitable.  */
-      if (unr_insns
-	  /* If there is IV variable that will become constant, we save
-	     one instruction in the loop prologue we do not account
-	     otherwise.  */
-	  <= ninsns + (size.constant_iv != false))
+      /* It the user asked us to do it, we don't need to be cautious.
+         Even if the user said unroll more than what the code says,
+         trust the code.  */
+      if (loop->unroll > 0 && n_unroll <= (unsigned)loop->unroll - 1)
+	n_unroll = MIN ((unsigned)loop->unroll - 1, n_unroll);
+      else if (loop->unroll)
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "Not unrolling loop %d: "
+		     "user didn't want the entire thing unrolled.\n",
+		     loop->num);
+	  return false;
+	}
+      /* If the code is going to shrink, we don't need to be extra
+	 cautious on guessing if the unrolling is going to be
+	 profitable.  */
+      else if (unr_insns
+	       /* If there is IV variable that will become constant,
+		  we save one instruction in the loop prologue we do
+		  not account otherwise.  */
+	       <= ninsns + (size.constant_iv != false))
 	;
       /* We unroll only inner loops, because we do not consider it profitable
 	 otheriwse.  We still can cancel loopback edge of not rolling loop;
@@ -965,18 +991,26 @@ try_peel_loop (struct loop *loop,
   if (TREE_CODE (niter) != INTEGER_CST)
     exit = NULL;
 
-  if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0)
+  if ((!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0)
+      && loop->unroll > 0)
     return false;
 
   /* Peel only innermost loops.  */
-  if (loop->inner)
+  if (loop->unroll == 0 && loop->inner)
     {
       if (dump_file)
         fprintf (dump_file, "Not peeling: outer loop\n");
       return false;
     }
 
-  if (!optimize_loop_for_speed_p (loop))
+  if (loop->unroll < 0)
+    {
+      if (dump_file)
+        fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
+      return false;
+    }
+
+  if (loop->unroll == 0 && !optimize_loop_for_speed_p (loop))
     {
       if (dump_file)
         fprintf (dump_file, "Not peeling: cold loop\n");
@@ -985,6 +1019,10 @@ try_peel_loop (struct loop *loop,
 
   /* Check if there is an estimate on the number of iterations.  */
   npeel = estimated_loop_iterations_int (loop);
+
+  if (loop->unroll > 0 && npeel > 0 && npeel > loop->unroll)
+    npeel = loop->unroll - 1;
+
   if (npeel < 0)
     {
       if (dump_file)
@@ -992,7 +1030,7 @@ try_peel_loop (struct loop *loop,
 	         "estimated\n");
       return false;
     }
-  if (maxiter >= 0 && maxiter <= npeel)
+  if (loop->unroll == 0 && maxiter >= 0 && maxiter <= npeel)
     {
       if (dump_file)
         fprintf (dump_file, "Not peeling: upper bound is known so can "
@@ -1003,7 +1041,8 @@ try_peel_loop (struct loop *loop,
   /* We want to peel estimated number of iterations + 1 (so we never
      enter the loop on quick path).  Check against PARAM_MAX_PEEL_TIMES
      and be sure to avoid overflows.  */
-  if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
+  if (loop->unroll == 0
+      && npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
     {
       if (dump_file)
         fprintf (dump_file, "Not peeling: rolls too much "
@@ -1015,7 +1054,9 @@ try_peel_loop (struct loop *loop,
   /* Check peeled loops size.  */
   tree_estimate_loop_size (loop, exit, NULL, &size,
 			   PARAM_VALUE (PARAM_MAX_PEELED_INSNS));
-  if ((peeled_size = estimated_peeled_sequence_size (&size, npeel))
+  if (loop->unroll > 0)
+    ;
+  else if ((peeled_size = estimated_peeled_sequence_size (&size, npeel))
       > PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
     {
       if (dump_file)
@@ -1051,6 +1092,8 @@ try_peel_loop (struct loop *loop,
       fprintf (dump_file, "Peeled loop %d, %i times.\n",
 	       loop->num, npeel);
     }
+  /* If we peeled things, don't try and expand it further later.  */
+  loop->unroll = -1;
   if (loop->any_upper_bound)
     loop->nb_iterations_upper_bound -= npeel;
   loop->nb_iterations_estimate = 0;
@@ -1296,7 +1339,9 @@ tree_unroll_loops_completely_1 (bool may
   if (!loop_father)
     return false;
 
-  if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
+  if (loop->unroll > 0)
+    ul = UL_ALL;
+  else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
       /* Unroll outermost loops only if asked to do so or they do
 	 not cause code growth.  */
       && (unroll_outer || loop_outer (loop_father)))
Index: tree.def
===================================================================
--- tree.def	(revision 219031)
+++ tree.def	(working copy)
@@ -1315,8 +1315,9 @@ DEFTREECODE (TARGET_OPTION_NODE, "target
 
 /* ANNOTATE_EXPR.
    Operand 0 is the expression to be annotated.
-   Operand 1 is the annotation kind.  */
-DEFTREECODE (ANNOTATE_EXPR, "annotate_expr", tcc_expression, 2)
+   Operand 1 is the annotation kind.
+   Operand 2 is optional data.  */
+DEFTREECODE (ANNOTATE_EXPR, "annotate_expr", tcc_expression, 3)
 
 /* Cilk spawn statement
    Operand 0 is the CALL_EXPR.  */

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: #pragma GCC unroll support
  2014-12-22 21:51 #pragma GCC unroll support Mike Stump
@ 2014-12-23 20:22 ` Andi Kleen
  2015-01-08 12:45 ` Richard Biener
  1 sibling, 0 replies; 25+ messages in thread
From: Andi Kleen @ 2014-12-23 20:22 UTC (permalink / raw)
  To: Mike Stump; +Cc: gcc-patches Patches

Mike Stump <mikestump@comcast.net> writes:

Thanks for doing that. I missed this pragma several times.

> I didn’t engineer ivdeps and unroll together.  Does it sound
> reasonable to allow both to be used at the same time on the same loop?
> If so, I can add the two other cases, presently I just handle one of
> them then the loop.

Yes it would be useful, in theory this could allow more vectorizing
(although I'm not sure the BB vectorizer would handle it right now)

>
> I support using -1 for a directive that says, don’t peel, don’t
> unroll.  As a UI issue, I think this is wrong.  I want to to be either
> 0 or 1, those two seem better.  But, not sure which is the right one
> of the two.  Which number says, don’t unroll, I’m smarter than you
> think.

Define a new pragma for these cases?

>
> If we have a loop that we know can only be unroll 7 times, and the
> user says unroll 8, should we unroll it 7 times?  Presently I do.  The
> other option, is to ignore the directive when we know it is
> non-sensicle.

I think it's fine to only unroll 7 times, better than not unrolling.

Patch looks ok to me from a quick read except for the missing
documentation (but I cannot approve anything)

-Andi

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: #pragma GCC unroll support
  2014-12-22 21:51 #pragma GCC unroll support Mike Stump
  2014-12-23 20:22 ` Andi Kleen
@ 2015-01-08 12:45 ` Richard Biener
  2015-01-26 21:13   ` Mike Stump
  1 sibling, 1 reply; 25+ messages in thread
From: Richard Biener @ 2015-01-08 12:45 UTC (permalink / raw)
  To: Mike Stump; +Cc: gcc-patches Patches

On Mon, Dec 22, 2014 at 10:13 PM, Mike Stump <mikestump@comcast.net> wrote:
> So, I have a rough cut of a new feature to add pragma GCC unroll support to gcc.  It is safe wrt the test suite and code-gen, but there are some corners that I need help or suggestions on how to round off nicely.
>
> Things to think about, can we put this in even if we have TODO support for C++ template support?  I think that is only 2-5 lines away from completion, but one needs to play peak-a-boo with some data (the unroll count) and I wanted to let pt fingers decide where to hide the data.  See RANGE_FOR_IVDEP for how I want to do it.
>
> Can I increase the size of all annotations to 3 from 2?  It seemed safer, easier than trying to subdivide it.

Sure.  But I'd make the 2nd operand optional (thus use NULL_TREE,
not integer_zero_node for existing builds).

You also need to check that 'unroll' fits in the target integer_type_node
(consider AVR!) and otherwise diagnose it.

The 'unroll' member in struct loop should be unsigned (or you need to
document what negative values are supposed to mean).  It also
should be smaller than int (struct loop may be heavily used), possibly
short or even (un)signed char(?).

@@ -341,7 +341,10 @@ tree_estimate_loop_size (struct loop *lo
              if (likely_eliminated || likely_eliminated_last)
                size->last_iteration_eliminated_by_peeling += num;
            }
-         if ((size->overall * 3 / 2 - size->eliminated_by_peeling
+         /* A loop that we want to unroll is never too large.  */
+         if (loop->unroll > 0)
+           ;

but if we end up unrolling more than loop->unroll then it may be too large?
That is this check should be in the caller, not here.

I think you miss updating copy_loop_info (and places where we don't
use that but still copy loops somehow).

> I didn’t engineer ivdeps and unroll together.  Does it sound reasonable to allow both to be used at the same time on the same loop?  If so, I can add the two other cases, presently I just handle one of them then the loop.

Yes.

> Fortran support is left to the fortran people, if they want to do it.  I wired it up tantalizingly close for them to complete.
>
> See ICK in the code.  I was unsure how to resolve that code.
>
> Does unroll 8 mean that the loop is repeated 8 times?

Up to you to define - what do other compilers do for #pragma unroll 0
and #pragma unroll 1?

> Can I turn on peeling in try_peel_loop by simply wanted to do it for 1 loop?

?

> I support using -1 for a directive that says, don’t peel, don’t unroll.  As a UI issue, I think this is wrong.  I want to to be either 0 or 1, those two seem better.  But, not sure which is the right one of the two.  Which number says, don’t unroll, I’m smarter than you think.

#pragma unroll 0

or

#pragma nounroll

what do other compilers do?

> If we have a loop that we know can only be unroll 7 times, and the user says unroll 8, should we unroll it 7 times?  Presently I do.  The other option, is to ignore the directive when we know it is non-sensicle.

Yes, unroll 7 times.

> Yes, I’m aware that this isn’t the right phase for this, but such are business cycles.  It would not go in until we reenter stage 1.  I see no value in trying to squeeze it in past stage 1.

The interesting parts are mostly frontend stuff, the middle-end bits look
fine apart from the above issues (it feels like you need to add too many
checks for loop->unroll in the peeler...)

Richard.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: #pragma GCC unroll support
  2015-01-08 12:45 ` Richard Biener
@ 2015-01-26 21:13   ` Mike Stump
  2015-01-26 21:18     ` Mike Stump
  2015-01-30  2:54     ` Joseph Myers
  0 siblings, 2 replies; 25+ messages in thread
From: Mike Stump @ 2015-01-26 21:13 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches Patches, Jason Merrill, Joseph S. Myers

[-- Attachment #1: Type: text/plain, Size: 4369 bytes --]

Jason, Joseph, this is stage 1 material (unless someone else wants to try and make an argument for it sooner), if you could review the parser (frontend) bits, that would be wonderful.  The mid-end, and back-end bits Richard was reviewing.

On Jan 8, 2015, at 4:45 AM, Richard Biener <richard.guenther@gmail.com> wrote:
> But I'd make the 2nd operand optional (thus use NULL_TREE,
> not integer_zero_node for existing builds).

Fixed.

> You also need to check that 'unroll' fits in the target integer_type_node
> (consider AVR!) and otherwise diagnose it.

Yeah, I’ve tightened this down.

> The 'unroll' member in struct loop should be unsigned

Fixed.

> (or you need to document what negative values are supposed to mean).

Fixed.

> It also should be smaller than int (struct loop may be heavily used), possibly
> short or even (un)signed char(?).

We talked around here, and we felt unsigned short would be a good choice.  Seems to be more in the GNU spirit as well.  Cray’s documentation has a limit of 64.

> @@ -341,7 +341,10 @@ tree_estimate_loop_size (struct loop *lo
>              if (likely_eliminated || likely_eliminated_last)
>                size->last_iteration_eliminated_by_peeling += num;
>            }
> -         if ((size->overall * 3 / 2 - size->eliminated_by_peeling
> +         /* A loop that we want to unroll is never too large.  */
> +         if (loop->unroll > 0)
> +           ;
> 
> but if we end up unrolling more than loop->unroll then it may be too large?
> That is this check should be in the caller, not here.

Fixed.

> I think you miss updating copy_loop_info (and places where we don't
> use that but still copy loops somehow).

I’ve fixed up copy_loop_info.  I searched for other routines based upon that other members, and found two, first one is:

/* Allocates and returns new loop structure.  */

struct loop *
alloc_loop (void)

and I thought about adding code to initialize it:

  loop->unroll = 0;

but after tracing to ensure that ggc_cleared_alloc meant what I thought it mean, that would be redundant, so I did not.

I missed print_loop, which I added:

+  if (loop->unroll)
+    fprintf (file, ", unroll = %d", loop->unroll);

I didn’t find any other hits.

>> I didn’t engineer ivdeps and unroll together.  Does it sound reasonable to allow both to be used at the same time on the same loop?  If so, I can add the two other cases, presently I just handle one of them then the loop.
> 
> Yes.

Fixed.

>> Does unroll 8 mean that the loop is repeated 8 times?
> 
> Up to you to define - what do other compilers do for #pragma unroll 0
> and #pragma unroll 1?

I googled for cray pragma unroll and found it.  They have a pragma unroll and it takes a numeric argument.  They have 0 and 1 mean don’t unroll.  That mirrors roughly with what I was thinking those two numbers meant, so I pushed the code in that direction and got rid of the -1.  I don’t see any compelling reason to deviate from what they did, and this should make the compiler more predictable by people in this space.  I’m happier now having read the Cray documentation about the mapping.

>> Can I turn on peeling in try_peel_loop by simply wanted to do it for 1 loop?
> 
> ?

So, after fixing up all the other code to unroll, it seems sufficient to rely upon it and not have the peeler do anything.  This allows me to not use the peeler, so this question is now moot.

> #pragma unroll 0
> 
> or
> 
> #pragma nounroll
> 
> what do other compilers do?

Good question.  I googled for a hypothetical pragma unroll and found it.  :-)  They support 0 and 1 as don’t unroll the loop.  I’ve adjusted the patch to do the same.

>> Yes, I’m aware that this isn’t the right phase for this, but such are business cycles.  It would not go in until we reenter stage 1.  I see no value in trying to squeeze it in past stage 1.

> the middle-end bits look fine apart from the above issues (it feels like you need to add too many
> checks for loop->unroll in the peeler…)

I did a cleanup pass to collapse down what tests I could from the entire patch set.  I also was able to engineer out the peeler from doing the unrolling.  Should be nicer now.

Tested on x86_64-unknown-linux-gnu.

Back-end/mid-end bits Ok?

C front-end bits Ok?

C++ front-end bits Ok?


[-- Attachment #2: unroll-3e.diffs.txt --]
[-- Type: text/plain, Size: 59623 bytes --]

Index: gcc/ada/gcc-interface/trans.c
===================================================================
--- gcc/ada/gcc-interface/trans.c	(revision 220084)
+++ gcc/ada/gcc-interface/trans.c	(working copy)
@@ -7870,17 +7870,20 @@ gnat_gimplify_stmt (tree *stmt_p)
 	  {
 	    /* Deal with the optimization hints.  */
 	    if (LOOP_STMT_IVDEP (stmt))
-	      gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
+	      gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
 				 build_int_cst (integer_type_node,
-						annot_expr_ivdep_kind));
+						annot_expr_ivdep_kind),
+				 NULL_TREE);
 	    if (LOOP_STMT_NO_VECTOR (stmt))
-	      gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
+	      gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
 				 build_int_cst (integer_type_node,
-						annot_expr_no_vector_kind));
+						annot_expr_no_vector_kind),
+				 NULL_TREE);
 	    if (LOOP_STMT_VECTOR (stmt))
-	      gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
+	      gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
 				 build_int_cst (integer_type_node,
-						annot_expr_vector_kind));
+						annot_expr_vector_kind),
+				 NULL_TREE);
 
 	    gnu_cond
 	      = build3 (COND_EXPR, void_type_node, gnu_cond, NULL_TREE,
Index: gcc/c/c-parser.c
===================================================================
--- gcc/c/c-parser.c	(revision 220084)
+++ gcc/c/c-parser.c	(working copy)
@@ -1217,9 +1217,9 @@ static void c_parser_statement (c_parser
 static void c_parser_statement_after_labels (c_parser *);
 static void c_parser_if_statement (c_parser *);
 static void c_parser_switch_statement (c_parser *);
-static void c_parser_while_statement (c_parser *, bool);
-static void c_parser_do_statement (c_parser *, bool);
-static void c_parser_for_statement (c_parser *, bool);
+static void c_parser_while_statement (c_parser *, bool, unsigned short);
+static void c_parser_do_statement (c_parser *, bool, unsigned short);
+static void c_parser_for_statement (c_parser *, bool, unsigned short);
 static tree c_parser_asm_statement (c_parser *);
 static tree c_parser_asm_operands (c_parser *);
 static tree c_parser_asm_goto_operands (c_parser *);
@@ -4972,13 +4972,13 @@ c_parser_statement_after_labels (c_parse
 	  c_parser_switch_statement (parser);
 	  break;
 	case RID_WHILE:
-	  c_parser_while_statement (parser, false);
+	  c_parser_while_statement (parser, false, 0);
 	  break;
 	case RID_DO:
-	  c_parser_do_statement (parser, false);
+	  c_parser_do_statement (parser, false, 0);
 	  break;
 	case RID_FOR:
-	  c_parser_for_statement (parser, false);
+	  c_parser_for_statement (parser, false, 0);
 	  break;
 	case RID_CILK_FOR:
 	  if (!flag_cilkplus)
@@ -5340,7 +5340,7 @@ c_parser_switch_statement (c_parser *par
 */
 
 static void
-c_parser_while_statement (c_parser *parser, bool ivdep)
+c_parser_while_statement (c_parser *parser, bool ivdep, unsigned short unroll)
 {
   tree block, cond, body, save_break, save_cont;
   location_t loc;
@@ -5354,9 +5354,15 @@ c_parser_while_statement (c_parser *pars
 	 "%<_Cilk_spawn%> statement cannot be used as a condition for while statement"))
     cond = error_mark_node;
   if (ivdep && cond != error_mark_node)
-    cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
 		   build_int_cst (integer_type_node,
-		   annot_expr_ivdep_kind));
+				  annot_expr_ivdep_kind),
+		   NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+		   build_int_cst (integer_type_node,
+				  annot_expr_unroll_kind),
+		   build_int_cst (integer_type_node, unroll));
   save_break = c_break_label;
   c_break_label = NULL_TREE;
   save_cont = c_cont_label;
@@ -5375,7 +5381,7 @@ c_parser_while_statement (c_parser *pars
 */
 
 static void
-c_parser_do_statement (c_parser *parser, bool ivdep)
+c_parser_do_statement (c_parser *parser, bool ivdep, unsigned short unroll)
 {
   tree block, cond, body, save_break, save_cont, new_break, new_cont;
   location_t loc;
@@ -5403,9 +5409,16 @@ c_parser_do_statement (c_parser *parser,
 	 "%<_Cilk_spawn%> statement cannot be used as a condition for a do-while statement"))
     cond = error_mark_node;
   if (ivdep && cond != error_mark_node)
-    cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+		   build_int_cst (integer_type_node,
+				  annot_expr_ivdep_kind),
+		   NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
 		   build_int_cst (integer_type_node,
-		   annot_expr_ivdep_kind));
+				  annot_expr_unroll_kind),
+		   build_int_cst (integer_type_node,
+				  unroll));
   if (!c_parser_require (parser, CPP_SEMICOLON, "expected %<;%>"))
     c_parser_skip_to_end_of_block_or_statement (parser);
   c_finish_loop (loc, cond, NULL, body, new_break, new_cont, false);
@@ -5469,7 +5482,7 @@ c_parser_do_statement (c_parser *parser,
 */
 
 static void
-c_parser_for_statement (c_parser *parser, bool ivdep)
+c_parser_for_statement (c_parser *parser, bool ivdep, unsigned short unroll)
 {
   tree block, cond, incr, save_break, save_cont, body;
   /* The following are only used when parsing an ObjC foreach statement.  */
@@ -5587,6 +5600,12 @@ c_parser_for_statement (c_parser *parser
 				  "%<GCC ivdep%> pragma");
 		  cond = error_mark_node;
 		}
+	      else if (unroll)
+		{
+		  c_parser_error (parser, "missing loop condition in loop with "
+				  "%<GCC unroll%> pragma");
+		  cond = error_mark_node;
+		}
 	      else
 		{
 		  c_parser_consume_token (parser);
@@ -5604,9 +5623,15 @@ c_parser_for_statement (c_parser *parser
 					 "expected %<;%>");
 	    }
 	  if (ivdep && cond != error_mark_node)
-	    cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+	    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+			   build_int_cst (integer_type_node,
+					  annot_expr_ivdep_kind),
+			   NULL_TREE);
+	  if (unroll && cond != error_mark_node)
+	    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
 			   build_int_cst (integer_type_node,
-			   annot_expr_ivdep_kind));
+					  annot_expr_unroll_kind),
+			   build_int_cst (integer_type_node, unroll));
 	}
       /* Parse the increment expression (the third expression in a
 	 for-statement).  In the case of a foreach-statement, this is
@@ -9592,6 +9617,42 @@ c_parser_objc_at_dynamic_declaration (c_
 }
 
 \f
+static bool
+c_parse_pragma_ivdep (c_parser *parser)
+{
+  c_parser_consume_pragma (parser);
+  c_parser_skip_to_pragma_eol (parser);
+  return true;
+}
+
+static unsigned short
+c_parser_pragma_unroll (c_parser *parser)
+{
+  unsigned short unroll;
+  c_parser_consume_pragma (parser);
+  tree expr = c_parser_expr_no_commas (parser, NULL).value;
+  mark_exp_read (expr);
+  expr = c_fully_fold (expr, false, NULL);
+  HOST_WIDE_INT lunroll = 0;
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (expr))
+      || TREE_CODE (expr) != INTEGER_CST
+      || (lunroll = tree_to_shwi (expr)) < 0
+      || lunroll > USHRT_MAX)
+    {
+      c_parser_error (parser, "%<#pragma GCC unroll%> requires a non-negative number");
+      unroll = 0;
+    }
+  else
+    {
+      unroll = (unsigned short)lunroll;
+      if (unroll == 0)
+	unroll = 1;
+    }
+
+  c_parser_skip_to_pragma_eol (parser);
+  return unroll;
+}
+
 /* Handle pragmas.  Some OpenMP pragmas are associated with, and therefore
    should be considered, statements.  ALLOW_STMT is true if we're within
    the context of a function and such pragmas are to be allowed.  Returns
@@ -9714,21 +9775,46 @@ c_parser_pragma (c_parser *parser, enum
       c_parser_omp_declare (parser, context);
       return false;
     case PRAGMA_IVDEP:
-      c_parser_consume_pragma (parser);
-      c_parser_skip_to_pragma_eol (parser);
-      if (!c_parser_next_token_is_keyword (parser, RID_FOR)
-	  && !c_parser_next_token_is_keyword (parser, RID_WHILE)
-	  && !c_parser_next_token_is_keyword (parser, RID_DO))
-	{
-	  c_parser_error (parser, "for, while or do statement expected");
-	  return false;
-	}
-      if (c_parser_next_token_is_keyword (parser, RID_FOR))
-	c_parser_for_statement (parser, true);
-      else if (c_parser_next_token_is_keyword (parser, RID_WHILE))
-	c_parser_while_statement (parser, true);
-      else
-	c_parser_do_statement (parser, true);
+      {
+	bool ivdep = c_parse_pragma_ivdep (parser);
+	unsigned short unroll = 0;
+	if (c_parser_peek_token (parser)->pragma_kind == PRAGMA_UNROLL)
+	  unroll = c_parser_pragma_unroll (parser);
+	if (!c_parser_next_token_is_keyword (parser, RID_FOR)
+	    && !c_parser_next_token_is_keyword (parser, RID_WHILE)
+	    && !c_parser_next_token_is_keyword (parser, RID_DO))
+	  {
+	    c_parser_error (parser, "for, while or do statement expected");
+	    return false;
+	  }
+	if (c_parser_next_token_is_keyword (parser, RID_FOR))
+	  c_parser_for_statement (parser, ivdep, unroll);
+	else if (c_parser_next_token_is_keyword (parser, RID_WHILE))
+	  c_parser_while_statement (parser, ivdep, unroll);
+	else
+	  c_parser_do_statement (parser, ivdep, unroll);
+      }
+      return false;
+    case PRAGMA_UNROLL:
+      {
+	unsigned short unroll = c_parser_pragma_unroll (parser);
+	bool ivdep = false;
+	if (c_parser_peek_token (parser)->pragma_kind == PRAGMA_IVDEP)
+	  ivdep = c_parse_pragma_ivdep (parser);
+	if (!c_parser_next_token_is_keyword (parser, RID_FOR)
+	    && !c_parser_next_token_is_keyword (parser, RID_WHILE)
+	    && !c_parser_next_token_is_keyword (parser, RID_DO))
+	  {
+	    c_parser_error (parser, "for, while or do statement expected");
+	    return false;
+	  }
+	if (c_parser_next_token_is_keyword (parser, RID_FOR))
+	  c_parser_for_statement (parser, ivdep, unroll);
+	else if (c_parser_next_token_is_keyword (parser, RID_WHILE))
+	  c_parser_while_statement (parser, ivdep, unroll);
+	else
+	  c_parser_do_statement (parser, ivdep, unroll);
+      }
       return false;
 
     case PRAGMA_GCC_PCH_PREPROCESS:
Index: gcc/c-family/c-pragma.c
===================================================================
--- gcc/c-family/c-pragma.c	(revision 220084)
+++ gcc/c-family/c-pragma.c	(working copy)
@@ -1456,6 +1456,10 @@ init_pragma (void)
     cpp_register_deferred_pragma (parse_in, "GCC", "ivdep", PRAGMA_IVDEP, false,
 				  false);
 
+  if (!flag_preprocess_only)
+    cpp_register_deferred_pragma (parse_in, "GCC", "unroll", PRAGMA_UNROLL, false,
+				  false);
+
   if (flag_cilkplus && !flag_preprocess_only)
     cpp_register_deferred_pragma (parse_in, "cilk", "grainsize",
 				  PRAGMA_CILK_GRAINSIZE, true, false);
Index: gcc/c-family/c-pragma.h
===================================================================
--- gcc/c-family/c-pragma.h	(revision 220084)
+++ gcc/c-family/c-pragma.h	(working copy)
@@ -69,6 +69,7 @@ typedef enum pragma_kind {
 
   PRAGMA_GCC_PCH_PREPROCESS,
   PRAGMA_IVDEP,
+  PRAGMA_UNROLL,
 
   PRAGMA_FIRST_EXTERNAL
 } pragma_kind;
Index: gcc/cfgloop.h
===================================================================
--- gcc/cfgloop.h	(revision 220084)
+++ gcc/cfgloop.h	(working copy)
@@ -189,6 +189,11 @@ struct GTY ((chain_next ("%h.next"))) lo
      of the loop can be safely evaluated concurrently.  */
   int safelen;
 
+  /* The number of times to unroll the loop.  0, means no information
+     given, just do what we always do.  A value of 1, means don't unroll
+     the loop.  */
+  unsigned short unroll;
+
   /* True if this loop should never be vectorized.  */
   bool dont_vectorize;
 
Index: gcc/cfgloopmanip.c
===================================================================
--- gcc/cfgloopmanip.c	(revision 220084)
+++ gcc/cfgloopmanip.c	(working copy)
@@ -1038,6 +1038,7 @@ copy_loop_info (struct loop *loop, struc
   target->estimate_state = loop->estimate_state;
   target->warned_aggressive_loop_optimizations
     |= loop->warned_aggressive_loop_optimizations;
+  target->unroll = loop->unroll;
 }
 
 /* Copies copy of LOOP as subloop of TARGET loop, placing newly
Index: gcc/cp/cp-array-notation.c
===================================================================
--- gcc/cp/cp-array-notation.c	(revision 220084)
+++ gcc/cp/cp-array-notation.c	(working copy)
@@ -81,7 +81,7 @@ create_an_loop (tree init, tree cond, tr
   finish_expr_stmt (init);
   for_stmt = begin_for_stmt (NULL_TREE, NULL_TREE);
   finish_for_init_stmt (for_stmt);
-  finish_for_cond (cond, for_stmt, false);
+  finish_for_cond (cond, for_stmt, false, 0);
   finish_for_expr (incr, for_stmt);
   finish_expr_stmt (body);
   finish_for_stmt (for_stmt);
Index: gcc/cp/cp-tree.h
===================================================================
--- gcc/cp/cp-tree.h	(revision 220084)
+++ gcc/cp/cp-tree.h	(working copy)
@@ -5644,7 +5644,7 @@ extern tree implicitly_declare_fn
 extern bool maybe_clone_body			(tree);
 
 /* In parser.c */
-extern tree cp_convert_range_for (tree, tree, tree, bool);
+extern tree cp_convert_range_for (tree, tree, tree, bool, unsigned short);
 extern bool parsing_nsdmi (void);
 extern void inject_this_parameter (tree, cp_cv_quals);
 
@@ -5880,16 +5880,16 @@ extern void begin_else_clause			(tree);
 extern void finish_else_clause			(tree);
 extern void finish_if_stmt			(tree);
 extern tree begin_while_stmt			(void);
-extern void finish_while_stmt_cond		(tree, tree, bool);
+extern void finish_while_stmt_cond		(tree, tree, bool, unsigned short);
 extern void finish_while_stmt			(tree);
 extern tree begin_do_stmt			(void);
 extern void finish_do_body			(tree);
-extern void finish_do_stmt			(tree, tree, bool);
+extern void finish_do_stmt			(tree, tree, bool, unsigned short);
 extern tree finish_return_stmt			(tree);
 extern tree begin_for_scope			(tree *);
 extern tree begin_for_stmt			(tree, tree);
 extern void finish_for_init_stmt		(tree);
-extern void finish_for_cond			(tree, tree, bool);
+extern void finish_for_cond			(tree, tree, bool, unsigned short);
 extern void finish_for_expr			(tree, tree);
 extern void finish_for_stmt			(tree);
 extern tree begin_range_for_stmt		(tree, tree);
Index: gcc/cp/init.c
===================================================================
--- gcc/cp/init.c	(revision 220084)
+++ gcc/cp/init.c	(working copy)
@@ -3693,7 +3693,7 @@ build_vec_init (tree base, tree maxindex
       finish_for_init_stmt (for_stmt);
       finish_for_cond (build2 (NE_EXPR, boolean_type_node, iterator,
 			       build_int_cst (TREE_TYPE (iterator), -1)),
-		       for_stmt, false);
+		       for_stmt, false, 0);
       elt_init = cp_build_unary_op (PREDECREMENT_EXPR, iterator, 0,
 				    complain);
       if (elt_init == error_mark_node)
Index: gcc/cp/parser.c
===================================================================
--- gcc/cp/parser.c	(revision 220084)
+++ gcc/cp/parser.c	(working copy)
@@ -2044,15 +2044,15 @@ static tree cp_parser_selection_statemen
 static tree cp_parser_condition
   (cp_parser *);
 static tree cp_parser_iteration_statement
-  (cp_parser *, bool);
+  (cp_parser *, bool, unsigned short);
 static bool cp_parser_for_init_statement
   (cp_parser *, tree *decl);
 static tree cp_parser_for
-  (cp_parser *, bool);
+  (cp_parser *, bool, unsigned short);
 static tree cp_parser_c_for
-  (cp_parser *, tree, tree, bool);
+  (cp_parser *, tree, tree, bool, unsigned short);
 static tree cp_parser_range_for
-  (cp_parser *, tree, tree, tree, bool);
+  (cp_parser *, tree, tree, tree, bool, unsigned short);
 static void do_range_for_auto_deduction
   (tree, tree);
 static tree cp_parser_perform_range_for_lookup
@@ -9698,7 +9698,7 @@ cp_parser_statement (cp_parser* parser,
 	case RID_WHILE:
 	case RID_DO:
 	case RID_FOR:
-	  statement = cp_parser_iteration_statement (parser, false);
+	  statement = cp_parser_iteration_statement (parser, false, 0);
 	  break;
 
 	case RID_CILK_FOR:
@@ -10390,7 +10390,7 @@ cp_parser_condition (cp_parser* parser)
    not included. */
 
 static tree
-cp_parser_for (cp_parser *parser, bool ivdep)
+cp_parser_for (cp_parser *parser, bool ivdep, unsigned short unroll)
 {
   tree init, scope, decl;
   bool is_range_for;
@@ -10402,13 +10402,14 @@ cp_parser_for (cp_parser *parser, bool i
   is_range_for = cp_parser_for_init_statement (parser, &decl);
 
   if (is_range_for)
-    return cp_parser_range_for (parser, scope, init, decl, ivdep);
+    return cp_parser_range_for (parser, scope, init, decl, ivdep, unroll);
   else
-    return cp_parser_c_for (parser, scope, init, ivdep);
+    return cp_parser_c_for (parser, scope, init, ivdep, unroll);
 }
 
 static tree
-cp_parser_c_for (cp_parser *parser, tree scope, tree init, bool ivdep)
+cp_parser_c_for (cp_parser *parser, tree scope, tree init, bool ivdep,
+		 unsigned short unroll)
 {
   /* Normal for loop */
   tree condition = NULL_TREE;
@@ -10429,7 +10430,13 @@ cp_parser_c_for (cp_parser *parser, tree
 		       "%<GCC ivdep%> pragma");
       condition = error_mark_node;
     }
-  finish_for_cond (condition, stmt, ivdep);
+  else if (unroll)
+    {
+      cp_parser_error (parser, "missing loop condition in loop with "
+		       "%<GCC unroll%> pragma");
+      condition = error_mark_node;
+    }
+  finish_for_cond (condition, stmt, ivdep, unroll);
   /* Look for the `;'.  */
   cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON);
 
@@ -10453,7 +10460,7 @@ cp_parser_c_for (cp_parser *parser, tree
 
 static tree
 cp_parser_range_for (cp_parser *parser, tree scope, tree init, tree range_decl,
-		     bool ivdep)
+		     bool ivdep, unsigned short unroll)
 {
   tree stmt, range_expr;
 
@@ -10474,6 +10481,8 @@ cp_parser_range_for (cp_parser *parser,
       stmt = begin_range_for_stmt (scope, init);
       if (ivdep)
 	RANGE_FOR_IVDEP (stmt) = 1;
+      if (unroll)
+	/* TODO */(void)0;
       finish_range_for_decl (stmt, range_decl, range_expr);
       if (!type_dependent_expression_p (range_expr)
 	  /* do_auto_deduction doesn't mess with template init-lists.  */
@@ -10483,7 +10492,7 @@ cp_parser_range_for (cp_parser *parser,
   else
     {
       stmt = begin_for_stmt (scope, init);
-      stmt = cp_convert_range_for (stmt, range_decl, range_expr, ivdep);
+      stmt = cp_convert_range_for (stmt, range_decl, range_expr, ivdep, unroll);
     }
   return stmt;
 }
@@ -10575,7 +10584,7 @@ do_range_for_auto_deduction (tree decl,
 
 tree
 cp_convert_range_for (tree statement, tree range_decl, tree range_expr,
-		      bool ivdep)
+		      bool ivdep, unsigned short unroll)
 {
   tree begin, end;
   tree iter_type, begin_expr, end_expr;
@@ -10632,7 +10641,7 @@ cp_convert_range_for (tree statement, tr
 				 begin, ERROR_MARK,
 				 end, ERROR_MARK,
 				 NULL, tf_warning_or_error);
-  finish_for_cond (condition, statement, ivdep);
+  finish_for_cond (condition, statement, ivdep, unroll);
 
   /* The new increment expression.  */
   expression = finish_unary_op_expr (input_location,
@@ -10793,7 +10802,8 @@ cp_parser_range_for_member_function (tre
    Returns the new WHILE_STMT, DO_STMT, FOR_STMT or RANGE_FOR_STMT.  */
 
 static tree
-cp_parser_iteration_statement (cp_parser* parser, bool ivdep)
+cp_parser_iteration_statement (cp_parser* parser, bool ivdep,
+			       unsigned short unroll)
 {
   cp_token *token;
   enum rid keyword;
@@ -10823,7 +10833,7 @@ cp_parser_iteration_statement (cp_parser
 	cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN);
 	/* Parse the condition.  */
 	condition = cp_parser_condition (parser);
-	finish_while_stmt_cond (condition, statement, ivdep);
+	finish_while_stmt_cond (condition, statement, ivdep, unroll);
 	/* Look for the `)'.  */
 	cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN);
 	/* Parse the dependent statement.  */
@@ -10853,7 +10863,7 @@ cp_parser_iteration_statement (cp_parser
 	/* Parse the expression.  */
 	expression = cp_parser_expression (parser);
 	/* We're done with the do-statement.  */
-	finish_do_stmt (expression, statement, ivdep);
+	finish_do_stmt (expression, statement, ivdep, unroll);
 	/* Look for the `)'.  */
 	cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN);
 	/* Look for the `;'.  */
@@ -10866,7 +10876,7 @@ cp_parser_iteration_statement (cp_parser
 	/* Look for the `('.  */
 	cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN);
 
-	statement = cp_parser_for (parser, ivdep);
+	statement = cp_parser_for (parser, ivdep, unroll);
 
 	/* Look for the `)'.  */
 	cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN);
@@ -32901,6 +32911,39 @@ cp_parser_cilk_grainsize (cp_parser *par
   cp_parser_skip_to_pragma_eol (parser, pragma_tok);
 }
 
+static bool
+cp_parser_pragma_ivdep (cp_parser *parser, cp_token *pragma_tok)
+{
+  cp_parser_skip_to_pragma_eol (parser, pragma_tok);
+  return true;
+}
+
+static unsigned short
+cp_parser_pragma_unroll (cp_parser *parser, cp_token *pragma_tok)
+{
+  tree expr = cp_parser_constant_expression (parser);
+  unsigned short unroll;
+  expr = maybe_constant_value (expr);
+  cp_parser_skip_to_pragma_eol (parser, pragma_tok);
+  HOST_WIDE_INT lunroll = 0;
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (expr))
+      || TREE_CODE (expr) != INTEGER_CST
+      || (lunroll = tree_to_shwi (expr)) < 0
+      || lunroll > USHRT_MAX)
+    {
+      cp_parser_error (parser,
+        "%<#pragma GCC unroll%> requires a non-negative number");
+      unroll = 0;
+    }
+  else
+    {
+      unroll = (unsigned short)lunroll;
+      if (unroll == 0)
+	unroll = 1;
+    }
+  return unroll;
+}
+
 /* Normal parsing of a pragma token.  Here we can (and must) use the
    regular lexer.  */
 
@@ -33068,9 +33111,39 @@ cp_parser_pragma (cp_parser *parser, enu
 
     case PRAGMA_IVDEP:
       {
-	cp_parser_skip_to_pragma_eol (parser, pragma_tok);
+	bool ivdep = cp_parser_pragma_ivdep (parser, pragma_tok);
+	unsigned short unroll = 0;
 	cp_token *tok;
 	tok = cp_lexer_peek_token (the_parser->lexer);
+	if (tok->type == CPP_PRAGMA &&
+	    tok->pragma_kind == PRAGMA_UNROLL)
+	  {
+	    unroll = cp_parser_pragma_unroll (parser, pragma_tok);
+	    tok = cp_lexer_peek_token (the_parser->lexer);
+	  }
+	if (tok->type != CPP_KEYWORD
+	    || (tok->keyword != RID_FOR && tok->keyword != RID_WHILE
+		&& tok->keyword != RID_DO))
+	  {
+	    cp_parser_error (parser, "for, while or do statement expected");
+	    return false;
+	  }
+	cp_parser_iteration_statement (parser, ivdep, unroll);
+	return true;
+      }
+
+    case PRAGMA_UNROLL:
+      {
+	unsigned short unroll = cp_parser_pragma_unroll (parser, pragma_tok);
+	bool ivdep = false;
+	cp_token *tok;
+	tok = cp_lexer_peek_token (the_parser->lexer);
+	if (tok->type == CPP_PRAGMA &&
+	    tok->pragma_kind == PRAGMA_IVDEP)
+	  {
+	    ivdep = cp_parser_pragma_ivdep (parser, tok);
+	    tok = cp_lexer_peek_token (the_parser->lexer);
+	  }
 	if (tok->type != CPP_KEYWORD
 	    || (tok->keyword != RID_FOR && tok->keyword != RID_WHILE
 		&& tok->keyword != RID_DO))
@@ -33078,7 +33151,7 @@ cp_parser_pragma (cp_parser *parser, enu
 	    cp_parser_error (parser, "for, while or do statement expected");
 	    return false;
 	  }
-	cp_parser_iteration_statement (parser, true);
+	cp_parser_iteration_statement (parser, ivdep, unroll);
 	return true;
       }
 
Index: gcc/cp/pt.c
===================================================================
--- gcc/cp/pt.c	(revision 220084)
+++ gcc/cp/pt.c	(working copy)
@@ -13886,7 +13886,7 @@ tsubst_expr (tree t, tree args, tsubst_f
       RECUR (FOR_INIT_STMT (t));
       finish_for_init_stmt (stmt);
       tmp = RECUR (FOR_COND (t));
-      finish_for_cond (tmp, stmt, false);
+      finish_for_cond (tmp, stmt, false, 0);
       tmp = RECUR (FOR_EXPR (t));
       finish_for_expr (tmp, stmt);
       RECUR (FOR_BODY (t));
@@ -13901,7 +13901,7 @@ tsubst_expr (tree t, tree args, tsubst_f
         decl = tsubst (decl, args, complain, in_decl);
         maybe_push_decl (decl);
         expr = RECUR (RANGE_FOR_EXPR (t));
-        stmt = cp_convert_range_for (stmt, decl, expr, RANGE_FOR_IVDEP (t));
+        stmt = cp_convert_range_for (stmt, decl, expr, RANGE_FOR_IVDEP (t), 0);
         RECUR (RANGE_FOR_BODY (t));
         finish_for_stmt (stmt);
       }
@@ -13910,7 +13910,7 @@ tsubst_expr (tree t, tree args, tsubst_f
     case WHILE_STMT:
       stmt = begin_while_stmt ();
       tmp = RECUR (WHILE_COND (t));
-      finish_while_stmt_cond (tmp, stmt, false);
+      finish_while_stmt_cond (tmp, stmt, false, 0);
       RECUR (WHILE_BODY (t));
       finish_while_stmt (stmt);
       break;
@@ -13920,7 +13920,7 @@ tsubst_expr (tree t, tree args, tsubst_f
       RECUR (DO_BODY (t));
       finish_do_body (stmt);
       tmp = RECUR (DO_COND (t));
-      finish_do_stmt (tmp, stmt, false);
+      finish_do_stmt (tmp, stmt, false, 0);
       break;
 
     case IF_STMT:
@@ -14358,8 +14358,10 @@ tsubst_expr (tree t, tree args, tsubst_f
 
     case ANNOTATE_EXPR:
       tmp = RECUR (TREE_OPERAND (t, 0));
-      RETURN (build2_loc (EXPR_LOCATION (t), ANNOTATE_EXPR,
-			  TREE_TYPE (tmp), tmp, RECUR (TREE_OPERAND (t, 1))));
+      RETURN (build3_loc (EXPR_LOCATION (t), ANNOTATE_EXPR,
+			  TREE_TYPE (tmp), tmp,
+			  RECUR (TREE_OPERAND (t, 1)),
+			  RECUR (TREE_OPERAND (t, 2))));
 
     default:
       gcc_assert (!STATEMENT_CODE_P (TREE_CODE (t)));
Index: gcc/cp/semantics.c
===================================================================
--- gcc/cp/semantics.c	(revision 220084)
+++ gcc/cp/semantics.c	(working copy)
@@ -802,7 +802,8 @@ begin_while_stmt (void)
    WHILE_STMT.  */
 
 void
-finish_while_stmt_cond (tree cond, tree while_stmt, bool ivdep)
+finish_while_stmt_cond (tree cond, tree while_stmt, bool ivdep,
+			unsigned short unroll)
 {
   if (check_no_cilk (cond,
       "Cilk array notation cannot be used as a condition for while statement",
@@ -812,11 +813,19 @@ finish_while_stmt_cond (tree cond, tree
   finish_cond (&WHILE_COND (while_stmt), cond);
   begin_maybe_infinite_loop (cond);
   if (ivdep && cond != error_mark_node)
-    WHILE_COND (while_stmt) = build2 (ANNOTATE_EXPR,
+    WHILE_COND (while_stmt) = build3 (ANNOTATE_EXPR,
 				      TREE_TYPE (WHILE_COND (while_stmt)),
 				      WHILE_COND (while_stmt),
 				      build_int_cst (integer_type_node,
-						     annot_expr_ivdep_kind));
+						     annot_expr_ivdep_kind),
+				      NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    WHILE_COND (while_stmt) = build3 (ANNOTATE_EXPR,
+				      TREE_TYPE (WHILE_COND (while_stmt)),
+				      WHILE_COND (while_stmt),
+				      build_int_cst (integer_type_node,
+						     annot_expr_unroll_kind),
+				      build_int_cst (integer_type_node, unroll));
   simplify_loop_decl_cond (&WHILE_COND (while_stmt), WHILE_BODY (while_stmt));
 }
 
@@ -861,7 +870,7 @@ finish_do_body (tree do_stmt)
    COND is as indicated.  */
 
 void
-finish_do_stmt (tree cond, tree do_stmt, bool ivdep)
+finish_do_stmt (tree cond, tree do_stmt, bool ivdep, unsigned short unroll)
 {
   if (check_no_cilk (cond,
   "Cilk array notation cannot be used as a condition for a do-while statement",
@@ -870,8 +879,13 @@ finish_do_stmt (tree cond, tree do_stmt,
   cond = maybe_convert_cond (cond);
   end_maybe_infinite_loop (cond);
   if (ivdep && cond != error_mark_node)
-    cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
-		   build_int_cst (integer_type_node, annot_expr_ivdep_kind));
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+		   build_int_cst (integer_type_node, annot_expr_ivdep_kind),
+		   NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+		   build_int_cst (integer_type_node, annot_expr_unroll_kind),
+		   build_int_cst (integer_type_node, unroll));
   DO_COND (do_stmt) = cond;
 }
 
@@ -974,7 +988,7 @@ finish_for_init_stmt (tree for_stmt)
    FOR_STMT.  */
 
 void
-finish_for_cond (tree cond, tree for_stmt, bool ivdep)
+finish_for_cond (tree cond, tree for_stmt, bool ivdep, unsigned short unroll)
 {
   if (check_no_cilk (cond,
 	 "Cilk array notation cannot be used in a condition for a for-loop",
@@ -984,11 +998,20 @@ finish_for_cond (tree cond, tree for_stm
   finish_cond (&FOR_COND (for_stmt), cond);
   begin_maybe_infinite_loop (cond);
   if (ivdep && cond != error_mark_node)
-    FOR_COND (for_stmt) = build2 (ANNOTATE_EXPR,
+    FOR_COND (for_stmt) = build3 (ANNOTATE_EXPR,
 				  TREE_TYPE (FOR_COND (for_stmt)),
 				  FOR_COND (for_stmt),
 				  build_int_cst (integer_type_node,
-						 annot_expr_ivdep_kind));
+						 annot_expr_ivdep_kind),
+				  NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    FOR_COND (for_stmt) = build3 (ANNOTATE_EXPR,
+				  TREE_TYPE (FOR_COND (for_stmt)),
+				  FOR_COND (for_stmt),
+				  build_int_cst (integer_type_node,
+						 annot_expr_unroll_kind),
+				  build_int_cst (integer_type_node,
+						 unroll));
   simplify_loop_decl_cond (&FOR_COND (for_stmt), FOR_BODY (for_stmt));
 }
 
Index: gcc/fortran/trans-stmt.c
===================================================================
--- gcc/fortran/trans-stmt.c	(revision 220084)
+++ gcc/fortran/trans-stmt.c	(working copy)
@@ -2876,9 +2876,10 @@ gfc_trans_forall_loop (forall_info *fora
       cond = fold_build2_loc (input_location, LE_EXPR, boolean_type_node,
 			      count, build_int_cst (TREE_TYPE (count), 0));
       if (forall_tmp->do_concurrent)
-	cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+	cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
 		       build_int_cst (integer_type_node,
-				      annot_expr_ivdep_kind));
+				      annot_expr_ivdep_kind),
+		       NULL_TREE);
 
       tmp = build1_v (GOTO_EXPR, exit_label);
       tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node,
Index: gcc/function.h
===================================================================
--- gcc/function.h	(revision 220084)
+++ gcc/function.h	(working copy)
@@ -670,6 +670,10 @@ struct GTY(()) function {
 
   /* Set when the tail call has been identified.  */
   unsigned int tail_call_marked : 1;
+
+  /* Set when #pragma unroll has been used in the body.  Used by rtl
+     unrolling to know when to perform unrolling in the function.  */
+  unsigned int has_unroll : 1;
 };
 
 /* Add the decl D to the local_decls list of FUN.  */
Index: gcc/gimple-low.c
===================================================================
--- gcc/gimple-low.c	(revision 220084)
+++ gcc/gimple-low.c	(working copy)
@@ -347,7 +347,7 @@ lower_stmt (gimple_stmt_iterator *gsi, s
 	for (i = 0; i < gimple_call_num_args (stmt); i++)
 	  {
 	    tree arg = gimple_call_arg (stmt, i);
-	    if (EXPR_P (arg))
+	    if (arg && EXPR_P (arg))
 	      TREE_SET_BLOCK (arg, data->block);
 	  }
 
Index: gcc/gimple-walk.c
===================================================================
--- gcc/gimple-walk.c	(revision 220084)
+++ gcc/gimple-walk.c	(working copy)
@@ -261,7 +261,7 @@ walk_gimple_op (gimple stmt, walk_tree_f
 
       for (i = 0; i < gimple_call_num_args (stmt); i++)
 	{
-	  if (wi)
+	  if (wi && gimple_call_arg (stmt, i))
 	    wi->val_only
 	      = is_gimple_reg_type (TREE_TYPE (gimple_call_arg (stmt, i)));
 	  ret = walk_tree (gimple_call_arg_ptr (stmt, i), callback_op, wi,
Index: gcc/gimplify.c
===================================================================
--- gcc/gimplify.c	(revision 220084)
+++ gcc/gimplify.c	(working copy)
@@ -2908,6 +2908,9 @@ gimple_boolify (tree expr)
     case ANNOTATE_EXPR:
       switch ((enum annot_expr_kind) TREE_INT_CST_LOW (TREE_OPERAND (expr, 1)))
 	{
+	case annot_expr_unroll_kind:
+	  cfun->has_unroll = 1;
+	  /* fall-through */
 	case annot_expr_ivdep_kind:
 	case annot_expr_no_vector_kind:
 	case annot_expr_vector_kind:
@@ -7947,6 +7950,7 @@ gimplify_expr (tree *expr_p, gimple_seq
 	  {
 	    tree cond = TREE_OPERAND (*expr_p, 0);
 	    tree kind = TREE_OPERAND (*expr_p, 1);
+	    tree data = TREE_OPERAND (*expr_p, 2);
 	    tree type = TREE_TYPE (cond);
 	    if (!INTEGRAL_TYPE_P (type))
 	      {
@@ -7957,7 +7961,7 @@ gimplify_expr (tree *expr_p, gimple_seq
 	    tree tmp = create_tmp_var (type);
 	    gimplify_arg (&cond, pre_p, EXPR_LOCATION (*expr_p));
 	    gcall *call
-	      = gimple_build_call_internal (IFN_ANNOTATE, 2, cond, kind);
+	      = gimple_build_call_internal (IFN_ANNOTATE, 3, cond, kind, data);
 	    gimple_call_set_lhs (call, tmp);
 	    gimplify_seq_add_stmt (pre_p, call);
 	    *expr_p = tmp;
Index: gcc/loop-init.c
===================================================================
--- gcc/loop-init.c	(revision 220084)
+++ gcc/loop-init.c	(working copy)
@@ -375,6 +375,7 @@ pass_loop2::gate (function *fun)
       && (flag_move_loop_invariants
 	  || flag_unswitch_loops
 	  || flag_unroll_loops
+	  || cfun->has_unroll
 #ifdef HAVE_doloop_end
 	  || (flag_branch_on_count_reg && HAVE_doloop_end)
 #endif
@@ -576,7 +577,8 @@ public:
   /* opt_pass methods: */
   virtual bool gate (function *)
     {
-      return (flag_peel_loops || flag_unroll_loops || flag_unroll_all_loops);
+      return (flag_peel_loops || flag_unroll_loops || flag_unroll_all_loops
+	      || cfun->has_unroll);
     }
 
   virtual unsigned int execute (function *);
Index: gcc/loop-unroll.c
===================================================================
--- gcc/loop-unroll.c	(revision 220084)
+++ gcc/loop-unroll.c	(working copy)
@@ -243,16 +243,26 @@ report_unroll (struct loop *loop, locati
 
 /* Decide whether unroll loops and how much.  */
 static void
-decide_unrolling (int flags)
+decide_unrolling (int base_flags)
 {
   struct loop *loop;
 
   /* Scan the loops, inner ones first.  */
   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
     {
+      int flags = base_flags;
+      if (loop->unroll > 1)
+	flags = UAP_UNROLL | UAP_UNROLL_ALL;
       loop->lpt_decision.decision = LPT_NONE;
       location_t locus = get_loop_location (loop);
 
+      if (loop->unroll == 1)
+	{
+	  dump_printf_loc (TDF_RTL, locus,
+			   "not unrolling loop, user didn't want it unrolled\n");
+	  continue;
+	}
+
       if (dump_enabled_p ())
 	dump_printf_loc (TDF_RTL, locus,
                          ";; *** Considering loop %d at BB %d for "
@@ -422,6 +432,19 @@ decide_unroll_constant_iterations (struc
       return;
     }
 
+  if (loop->unroll)
+    {
+      loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
+      loop->lpt_decision.times = loop->unroll - 1;
+      if (loop->lpt_decision.times > desc->niter - 2)
+	{
+	  /* They won't do this for us.  */
+	  loop->lpt_decision.decision = LPT_NONE;
+	  loop->lpt_decision.times = desc->niter - 2;
+	}
+      return;
+    }
+
   /* Check whether the loop rolls enough to consider.  
      Consult also loop bounds and profile; in the case the loop has more
      than one exit it may well loop less than determined maximal number
@@ -443,7 +466,7 @@ decide_unroll_constant_iterations (struc
   best_copies = 2 * nunroll + 10;
 
   i = 2 * nunroll + 2;
-  if (i - 1 >= desc->niter)
+  if (i > desc->niter - 2)
     i = desc->niter - 2;
 
   for (; i >= nunroll - 1; i--)
@@ -695,6 +718,9 @@ decide_unroll_runtime_iterations (struct
   if (targetm.loop_unroll_adjust)
     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 
+  if (loop->unroll)
+    nunroll = loop->unroll;
+
   /* Skip big loops.  */
   if (nunroll <= 1)
     {
@@ -733,8 +759,9 @@ decide_unroll_runtime_iterations (struct
       return;
     }
 
-  /* Success; now force nunroll to be power of 2, as we are unable to
-     cope with overflows in computation of number of iterations.  */
+  /* Success; now force nunroll to be power of 2, as code-gen
+     requires it, we are unable to cope with overflows in
+     computation of number of iterations.  */
   for (i = 1; 2 * i <= nunroll; i *= 2)
     continue;
 
@@ -843,9 +870,10 @@ compare_and_jump_seq (rtx op0, rtx op1,
   return seq;
 }
 
-/* Unroll LOOP for which we are able to count number of iterations in runtime
-   LOOP->LPT_DECISION.TIMES times.  The transformation does this (with some
-   extra care for case n < 0):
+/* Unroll LOOP for which we are able to count number of iterations in
+   runtime LOOP->LPT_DECISION.TIMES times.  The times value must be a
+   power of two.  The transformation does this (with some extra care
+   for case n < 0):
 
    for (i = 0; i < n; i++)
      body;
@@ -1142,6 +1170,9 @@ decide_unroll_stupid (struct loop *loop,
   if (targetm.loop_unroll_adjust)
     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 
+  if (loop->unroll)
+    nunroll = loop->unroll;
+
   /* Skip big loops.  */
   if (nunroll <= 1)
     {
Index: gcc/lto-streamer-in.c
===================================================================
--- gcc/lto-streamer-in.c	(revision 220084)
+++ gcc/lto-streamer-in.c	(working copy)
@@ -751,6 +751,7 @@ input_cfg (struct lto_input_block *ib, s
 
       /* Read OMP SIMD related info.  */
       loop->safelen = streamer_read_hwi (ib);
+      loop->unroll = streamer_read_hwi (ib);
       loop->dont_vectorize = streamer_read_hwi (ib);
       loop->force_vectorize = streamer_read_hwi (ib);
       loop->simduid = stream_read_tree (ib, data_in);
Index: gcc/lto-streamer-out.c
===================================================================
--- gcc/lto-streamer-out.c	(revision 220084)
+++ gcc/lto-streamer-out.c	(working copy)
@@ -1884,6 +1884,7 @@ output_cfg (struct output_block *ob, str
 
       /* Write OMP SIMD related info.  */
       streamer_write_hwi (ob, loop->safelen);
+      streamer_write_hwi (ob, loop->unroll);
       streamer_write_hwi (ob, loop->dont_vectorize);
       streamer_write_hwi (ob, loop->force_vectorize);
       stream_write_tree (ob, loop->simduid, true);
Index: gcc/testsuite/gcc.dg/unroll-7.c
===================================================================
--- gcc/testsuite/gcc.dg/unroll-7.c	(revision 0)
+++ gcc/testsuite/gcc.dg/unroll-7.c	(working copy)
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdisable-tree-cunroll -fdump-rtl-loop2_unroll -fdump-tree-cunrolli-details" } */
+
+void bar(int);
+
+int j;
+
+void foo()
+{
+  unsigned long m = j;
+  unsigned long i;
+
+    /* { dg-final { scan-tree-dump "loop with 9 iterations completely unrolled" "cunrolli" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "21:5: note: loop unrolled 7 times" "loop2_unroll" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= j; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "26:5: note: loop unrolled 3 times" "loop2_unroll" } } */
+  #pragma GCC unroll 7
+  for (unsigned long i = 1; i <= j; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "33:3: note: loop unrolled 2 times" "loop2_unroll" } } */
+  i = 0;
+  #pragma GCC unroll 3
+  do {
+    bar(i);
+  } while (++i < 9);
+}
+
+/* { dXg-final { cleanup-rtl-dump "loop2_unroll" } } */
Index: gcc/testsuite/gcc.dg/unroll-8.c
===================================================================
--- gcc/testsuite/gcc.dg/unroll-8.c	(revision 0)
+++ gcc/testsuite/gcc.dg/unroll-8.c	(working copy)
@@ -0,0 +1,72 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-loop2_unroll -fdump-tree-cunrolli-details" } */
+
+void bar(int);
+
+int j;
+
+void foo()
+{
+  unsigned long m = j;
+  unsigned long i;
+
+  /* { dg-final { scan-tree-dump "15:3: note: loop turned into non-loop; it never loops"  "cunrolli" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "21:5: note: loop unrolled 7 times" "loop2_unroll" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= j; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "26:5: note: loop unrolled 3 times" "loop2_unroll" } } */
+  #pragma GCC unroll 7
+  for (unsigned long i = 1; i <= j; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "33:3: note: loop unrolled 2 times" "loop2_unroll" } } */
+  i = 0;
+  #pragma GCC unroll 3
+  do {
+    bar(i);
+  } while (++i < 9);
+}
+
+void test2 () {
+  unsigned long m = j;
+  unsigned long i;
+
+  /* { dg-final { scan-tree-dump "42:3: note: loop turned into non-loop; it never loops" "cunrolli" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= 7; ++i)
+    bar(i);
+
+  /* { dg-final { scan-tree-dump "47:3: note: loop turned into non-loop; it never loops" "cunrolli" } } */
+  #pragma GCC unroll 9
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "52:3: note: loop unrolled 3 times" "loop2_unroll" } } */
+  #pragma GCC unroll 4
+  for (unsigned long i = 1; i <= 15; ++i)
+    bar(i);
+
+  /* { dg-final { scan-tree-dump "57:3: note: loop turned into non-loop; it never loops" "cunrolli" } } */
+  #pragma GCC unroll 709
+  for (unsigned long i = 1; i <= 709; ++i)
+    bar(i);
+
+  /* { dg-final { scan-tree-dump "62:3: note: not unrolling loop, user didn't want it unrolled completely" "cunrolli" } } */
+  #pragma GCC unroll 0
+  for (unsigned long i = 1; i <= 3; ++i)
+    bar(i);
+}
+
+ void test3 () {
+  unsigned long m = j;
+  unsigned long i;
+}
+
+/* { dXg-final { cleanup-tree-dump "cunrolli" } } */
+/* { dXg-final { cleanup-rtl-dump "loop2_unroll" } } */
Index: gcc/testsuite/gcc.dg/unroll-9.c
===================================================================
--- gcc/testsuite/gcc.dg/unroll-9.c	(revision 0)
+++ gcc/testsuite/gcc.dg/unroll-9.c	(working copy)
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -funroll-all-loops -fdump-rtl-loop2_unroll -fdump-tree-cunrolli-details" } */
+
+void bar(int);
+
+int j;
+
+void test1()
+{
+  unsigned long m = j;
+  unsigned long i;
+
+  /* { dg-final { scan-tree-dump "16:3: note: not unrolling loop, user didn't want it unrolled completely" "cunrolli" } } */
+  /* { dg-final { scan-rtl-dump "16:3: note: not unrolling loop, user didn't want it unrolled" "loop2_unroll" } } */
+  #pragma GCC unroll 0
+  for (unsigned long i = 1; i <= 3; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "21:3: note: not unrolling loop, user didn't want it unrolled" "loop2_unroll" } } */
+  #pragma GCC unroll 0
+  for (unsigned long i = 1; i <= m; ++i)
+    bar(i);
+}
+
+/* { dXg-final { cleanup-tree-dump "cunrolli" } } */
+/* { dXg-final { cleanup-rtl-dump "loop2_unroll" } } */
Index: gcc/tree-cfg.c
===================================================================
--- gcc/tree-cfg.c	(revision 220084)
+++ gcc/tree-cfg.c	(working copy)
@@ -316,6 +316,10 @@ replace_loop_annotate_in_block (basic_bl
 	  loop->force_vectorize = true;
 	  cfun->has_force_vectorize_loops = true;
 	  break;
+	case annot_expr_unroll_kind:
+	  loop->unroll = (unsigned short)tree_to_shwi (gimple_call_arg (stmt,
+									2));
+	  break;
 	default:
 	  gcc_unreachable ();
 	}
@@ -365,6 +369,7 @@ replace_loop_annotate (void)
 	    case annot_expr_ivdep_kind:
 	    case annot_expr_no_vector_kind:
 	    case annot_expr_vector_kind:
+	    case annot_expr_unroll_kind:
 	      break;
 	    default:
 	      gcc_unreachable ();
@@ -3385,6 +3390,8 @@ verify_gimple_call (gcall *stmt)
   for (i = 0; i < gimple_call_num_args (stmt); ++i)
     {
       tree arg = gimple_call_arg (stmt, i);
+      if (! arg)
+	continue;
       if ((is_gimple_reg_type (TREE_TYPE (arg))
 	   && !is_gimple_val (arg))
 	  || (!is_gimple_reg_type (TREE_TYPE (arg))
@@ -7512,6 +7519,8 @@ print_loop (FILE *file, struct loop *loo
       fprintf (file, ", estimate = ");
       print_decu (loop->nb_iterations_estimate, file);
     }
+  if (loop->unroll)
+    fprintf (file, ", unroll = %d", loop->unroll);
   fprintf (file, ")\n");
 
   /* Print loop's body.  */
Index: gcc/tree-core.h
===================================================================
--- gcc/tree-core.h	(revision 220084)
+++ gcc/tree-core.h	(working copy)
@@ -725,6 +725,7 @@ enum annot_expr_kind {
   annot_expr_ivdep_kind,
   annot_expr_no_vector_kind,
   annot_expr_vector_kind,
+  annot_expr_unroll_kind,
   annot_expr_kind_last
 };
 
Index: gcc/tree-pretty-print.c
===================================================================
--- gcc/tree-pretty-print.c	(revision 220084)
+++ gcc/tree-pretty-print.c	(working copy)
@@ -2313,6 +2313,10 @@ dump_generic_node (pretty_printer *pp, t
 	case annot_expr_vector_kind:
 	  pp_string (pp, ", vector");
 	  break;
+	case annot_expr_unroll_kind:
+	  pp_printf (pp, ", unroll %d",
+		     (int)TREE_INT_CST_LOW (TREE_OPERAND (node, 2)));
+	  break;
 	default:
 	  gcc_unreachable ();
 	}
Index: gcc/tree-ssa-loop-ivcanon.c
===================================================================
--- gcc/tree-ssa-loop-ivcanon.c	(revision 220084)
+++ gcc/tree-ssa-loop-ivcanon.c	(working copy)
@@ -686,8 +686,7 @@ try_unroll_loop_completely (struct loop
 			    HOST_WIDE_INT maxiter,
 			    location_t locus)
 {
-  unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns;
-  struct loop_size size;
+  unsigned HOST_WIDE_INT n_unroll = 0;
   bool n_unroll_found = false;
   edge edge_to_cancel = NULL;
   int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
@@ -731,7 +730,8 @@ try_unroll_loop_completely (struct loop
   if (!n_unroll_found)
     return false;
 
-  if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
+  if (loop->unroll == 0 &&
+      n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "Not unrolling loop %d "
@@ -753,107 +753,130 @@ try_unroll_loop_completely (struct loop
       if (ul == UL_SINGLE_ITER)
 	return false;
 
-      large = tree_estimate_loop_size
-		 (loop, exit, edge_to_cancel, &size,
-		  PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
-      ninsns = size.overall;
-      if (large)
+      if (loop->unroll)
 	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
-		     loop->num);
-	  return false;
+	  /* If they wanted to unroll more than we want, don't unroll
+	     it completely.  */
+	  if (n_unroll > (unsigned)loop->unroll)
+	    {
+	      dump_printf_loc (report_flags, locus,
+	        "not unrolling loop, "
+		"user didn't want it unrolled completely.\n");
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file,
+		  "Not unrolling loop %d: "
+		  "user didn't want it unrolled completely.\n",
+			 loop->num);
+	      return false;
+	    }
 	}
-
-      unr_insns = estimated_unrolled_size (&size, n_unroll);
-      if (dump_file && (dump_flags & TDF_DETAILS))
+      else
 	{
-	  fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
-	  fprintf (dump_file, "  Estimated size after unrolling: %d\n",
-		   (int) unr_insns);
-	}
+	  struct loop_size size;
+	  large = tree_estimate_loop_size
+	            (loop, exit, edge_to_cancel, &size,
+		     PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
+	  unsigned HOST_WIDE_INT ninsns = size.overall;
+	  if (large)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
+			 loop->num);
+	      return false;
+	    }
 
-      /* If the code is going to shrink, we don't need to be extra cautious
-	 on guessing if the unrolling is going to be profitable.  */
-      if (unr_insns
-	  /* If there is IV variable that will become constant, we save
-	     one instruction in the loop prologue we do not account
-	     otherwise.  */
-	  <= ninsns + (size.constant_iv != false))
-	;
-      /* We unroll only inner loops, because we do not consider it profitable
-	 otheriwse.  We still can cancel loopback edge of not rolling loop;
-	 this is always a good idea.  */
-      else if (ul == UL_NO_GROWTH)
-	{
+	  unsigned HOST_WIDE_INT unr_insns
+	    = estimated_unrolled_size (&size, n_unroll);
 	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
-		     loop->num);
-	  return false;
-	}
-      /* Outer loops tend to be less interesting candidates for complete
-	 unrolling unless we can do a lot of propagation into the inner loop
-	 body.  For now we disable outer loop unrolling when the code would
-	 grow.  */
-      else if (loop->inner)
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     "it is not innermost and code would grow.\n",
-		     loop->num);
-	  return false;
-	}
-      /* If there is call on a hot path through the loop, then
-	 there is most probably not much to optimize.  */
-      else if (size.num_non_pure_calls_on_hot_path)
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     "contains call and code would grow.\n",
-		     loop->num);
-	  return false;
-	}
-      /* If there is pure/const call in the function, then we
-	 can still optimize the unrolled loop body if it contains
-	 some other interesting code than the calls and code
-	 storing or cumulating the return value.  */
-      else if (size.num_pure_calls_on_hot_path
-	       /* One IV increment, one test, one ivtmp store
-		  and one useful stmt.  That is about minimal loop
-		  doing pure call.  */
-	       && (size.non_call_stmts_on_hot_path
-		   <= 3 + size.num_pure_calls_on_hot_path))
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     "contains just pure calls and code would grow.\n",
-		     loop->num);
-	  return false;
-	}
-      /* Complette unrolling is major win when control flow is removed and
-	 one big basic block is created.  If the loop contains control flow
-	 the optimization may still be a win because of eliminating the loop
-	 overhead but it also may blow the branch predictor tables.
-	 Limit number of branches on the hot path through the peeled
-	 sequence.  */
-      else if (size.num_branches_on_hot_path * (int)n_unroll
-	       > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     " number of branches on hot path in the unrolled sequence"
-		     " reach --param max-peel-branches limit.\n",
-		     loop->num);
-	  return false;
-	}
-      else if (unr_insns
-	       > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     "(--param max-completely-peeled-insns limit reached).\n",
-		     loop->num);
-	  return false;
+	    {
+	      fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
+	      fprintf (dump_file, "  Estimated size after unrolling: %d\n",
+		       (int) unr_insns);
+	    }
+
+	  /* If the code is going to shrink, we don't need to be extra
+	     cautious on guessing if the unrolling is going to be
+	     profitable.  */
+	  if (unr_insns
+	      /* If there is IV variable that will become constant, we
+		 save one instruction in the loop prologue we do not
+		 account otherwise.  */
+	      <= ninsns + (size.constant_iv != false))
+	    ;
+	  /* We unroll only inner loops, because we do not consider it
+	     profitable otheriwse.  We still can cancel loopback edge
+	     of not rolling loop; this is always a good idea.  */
+	  else if (ul == UL_NO_GROWTH)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
+			 loop->num);
+	      return false;
+	    }
+	  /* Outer loops tend to be less interesting candidates for
+	     complete unrolling unless we can do a lot of propagation
+	     into the inner loop body.  For now we disable outer loop
+	     unrolling when the code would grow.  */
+	  else if (loop->inner)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "it is not innermost and code would grow.\n",
+			 loop->num);
+	      return false;
+	    }
+	  /* If there is call on a hot path through the loop, then
+	     there is most probably not much to optimize.  */
+	  else if (size.num_non_pure_calls_on_hot_path)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "contains call and code would grow.\n",
+			 loop->num);
+	      return false;
+	    }
+	  /* If there is pure/const call in the function, then we can
+	     still optimize the unrolled loop body if it contains some
+	     other interesting code than the calls and code storing or
+	     cumulating the return value.  */
+	  else if (size.num_pure_calls_on_hot_path
+		   /* One IV increment, one test, one ivtmp store and
+		      one useful stmt.  That is about minimal loop
+		      doing pure call.  */
+		   && (size.non_call_stmts_on_hot_path
+		       <= 3 + size.num_pure_calls_on_hot_path))
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "contains just pure calls and code would grow.\n",
+			 loop->num);
+	      return false;
+	    }
+	  /* Complete unrolling is major win when control flow is
+	     removed and one big basic block is created.  If the loop
+	     contains control flow the optimization may still be a win
+	     because of eliminating the loop overhead but it also may
+	     blow the branch predictor tables.  Limit number of
+	     branches on the hot path through the peeled sequence.  */
+	  else if (size.num_branches_on_hot_path * (int)n_unroll
+		   > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 " number of branches on hot path in the unrolled sequence"
+			 " reach --param max-peel-branches limit.\n",
+			 loop->num);
+	      return false;
+	    }
+	  else if (unr_insns
+		   > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "(--param max-completely-peeled-insns limit reached).\n",
+			 loop->num);
+	      return false;
+	    }
 	}
       dump_printf_loc (report_flags, locus,
                        "loop turned into non-loop; it never loops.\n");
@@ -897,8 +920,9 @@ try_unroll_loop_completely (struct loop
       else
 	gimple_cond_make_true (cond);
       update_stmt (cond);
-      /* Do not remove the path. Doing so may remove outer loop
-	 and confuse bookkeeping code in tree_unroll_loops_completelly.  */
+      /* Do not remove the path. Doing so may remove outer loop and
+	 confuse bookkeeping code in
+	 tree_unroll_loops_completelly.  */
     }
 
   /* Store the loop for later unlooping and exit removal.  */
@@ -974,23 +998,33 @@ try_peel_loop (struct loop *loop,
   if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0)
     return false;
 
+  /* We don't peel loops that will be unrolled as this can duplicate a
+     loop more times than the user requested.  */
+  if (loop->unroll)
+    {
+      if (dump_file)
+        fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
+      return false;
+    }
+
   /* Peel only innermost loops.  */
   if (loop->inner)
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: outer loop\n");
+	fprintf (dump_file, "Not peeling: outer loop\n");
       return false;
     }
 
   if (!optimize_loop_for_speed_p (loop))
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: cold loop\n");
+	fprintf (dump_file, "Not peeling: cold loop\n");
       return false;
     }
 
   /* Check if there is an estimate on the number of iterations.  */
   npeel = estimated_loop_iterations_int (loop);
+
   if (npeel < 0)
     {
       if (dump_file)
@@ -998,10 +1032,11 @@ try_peel_loop (struct loop *loop,
 	         "estimated\n");
       return false;
     }
+
   if (maxiter >= 0 && maxiter <= npeel)
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: upper bound is known so can "
+	fprintf (dump_file, "Not peeling: upper bound is known so can "
 		 "unroll completely\n");
       return false;
     }
@@ -1012,7 +1047,7 @@ try_peel_loop (struct loop *loop,
   if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: rolls too much "
+	fprintf (dump_file, "Not peeling: rolls too much "
 		 "(%i + 1 > --param max-peel-times)\n", npeel);
       return false;
     }
@@ -1025,7 +1060,7 @@ try_peel_loop (struct loop *loop,
       > PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: peeled sequence size is too large "
+	fprintf (dump_file, "Not peeling: peeled sequence size is too large "
 		 "(%i insns > --param max-peel-insns)", peeled_size);
       return false;
     }
@@ -1302,7 +1337,9 @@ tree_unroll_loops_completely_1 (bool may
   if (!loop_father)
     return false;
 
-  if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
+  if (loop->unroll > 1)
+    ul = UL_ALL;
+  else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
       /* Unroll outermost loops only if asked to do so or they do
 	 not cause code growth.  */
       && (unroll_outer || loop_outer (loop_father)))
@@ -1539,7 +1576,9 @@ public:
   {}
 
   /* opt_pass methods: */
-  virtual bool gate (function *) { return optimize >= 2; }
+  virtual bool gate (function *) {
+    return optimize >= 2 || cfun->has_unroll;
+  }
   virtual unsigned int execute (function *);
 
 }; // class pass_complete_unrolli
Index: gcc/tree.def
===================================================================
--- gcc/tree.def	(revision 220084)
+++ gcc/tree.def	(working copy)
@@ -1365,8 +1365,9 @@ DEFTREECODE (TARGET_OPTION_NODE, "target
 
 /* ANNOTATE_EXPR.
    Operand 0 is the expression to be annotated.
-   Operand 1 is the annotation kind.  */
-DEFTREECODE (ANNOTATE_EXPR, "annotate_expr", tcc_expression, 2)
+   Operand 1 is the annotation kind.
+   Operand 2 is optional data.  */
+DEFTREECODE (ANNOTATE_EXPR, "annotate_expr", tcc_expression, 3)
 
 /* Cilk spawn statement
    Operand 0 is the CALL_EXPR.  */

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: #pragma GCC unroll support
  2015-01-26 21:13   ` Mike Stump
@ 2015-01-26 21:18     ` Mike Stump
  2015-01-30  2:51       ` Joseph Myers
  2015-01-30  2:54     ` Joseph Myers
  1 sibling, 1 reply; 25+ messages in thread
From: Mike Stump @ 2015-01-26 21:18 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches Patches, Jason Merrill, Joseph S. Myers

[-- Attachment #1: Type: text/plain, Size: 79 bytes --]

I missed including the documentation patch in the last set.  :-(  Here it is:


[-- Attachment #2: unroll-3e-doc.diffs.txt --]
[-- Type: text/plain, Size: 748 bytes --]

Index: extend.texi
===================================================================
--- extend.texi	(revision 220084)
+++ extend.texi	(working copy)
@@ -17881,6 +17881,17 @@ void ignore_vec_dep (int *a, int k, int
 @}
 @end smallexample
 
+@table @code
+@item #pragma GCC unroll @var{"n"}
+@cindex pragma GCC unroll @var{"n"}
+
+With this pragma, the programmer informs the optimizer how many times
+a loop should be unrolled.  A 0 or 1 informs the compiler to not
+perform any loop unrolling.  The pragma must be immediately before
+@samp{#pragma ivdep} or a @code{for}, @code{while} or @code{do} loop
+and applies only to the loop that follows.
+
+@end table
 
 @node Unnamed Fields
 @section Unnamed struct/union fields within structs/unions

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: #pragma GCC unroll support
  2015-01-26 21:18     ` Mike Stump
@ 2015-01-30  2:51       ` Joseph Myers
  2015-01-30  6:12         ` Mike Stump
  0 siblings, 1 reply; 25+ messages in thread
From: Joseph Myers @ 2015-01-30  2:51 UTC (permalink / raw)
  To: Mike Stump; +Cc: Richard Biener, gcc-patches Patches, Jason Merrill

On Mon, 26 Jan 2015, Mike Stump wrote:

> +@item #pragma GCC unroll @var{"n"}
> +@cindex pragma GCC unroll @var{"n"}

@var contains the name of a metasyntactic variable; it doesn't make sense 
for quotes to be included in that name.  And as far as I can tell, the 
quotes aren't part of the syntax either.  That is, you should just have 
@var{n} there, and state explicitly in the text what @var{n} is (an 
assignment-expression that evaluates to an integer constant, it looks 
like).

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: #pragma GCC unroll support
  2015-01-26 21:13   ` Mike Stump
  2015-01-26 21:18     ` Mike Stump
@ 2015-01-30  2:54     ` Joseph Myers
  2015-01-30  6:57       ` Mike Stump
  1 sibling, 1 reply; 25+ messages in thread
From: Joseph Myers @ 2015-01-30  2:54 UTC (permalink / raw)
  To: Mike Stump; +Cc: Richard Biener, gcc-patches Patches, Jason Merrill

On Mon, 26 Jan 2015, Mike Stump wrote:

> +  if (!INTEGRAL_TYPE_P (TREE_TYPE (expr))
> +      || TREE_CODE (expr) != INTEGER_CST
> +      || (lunroll = tree_to_shwi (expr)) < 0
> +      || lunroll > USHRT_MAX)
> +    {
> +      c_parser_error (parser, "%<#pragma GCC unroll%> requires a non-negative number");

That error seems misleading for the case where a number above USHRT_MAX is 
passed.

There should also be tests for the error cases.

> +      unroll = (unsigned short)lunroll;

Casts should have spaces in, "(unsigned short) lunroll"; see 
codingconventions.html.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: #pragma GCC unroll support
  2015-01-30  2:51       ` Joseph Myers
@ 2015-01-30  6:12         ` Mike Stump
  0 siblings, 0 replies; 25+ messages in thread
From: Mike Stump @ 2015-01-30  6:12 UTC (permalink / raw)
  To: Joseph Myers; +Cc: Richard Biener, gcc-patches Patches, Jason Merrill

On Jan 29, 2015, at 3:06 PM, Joseph Myers <joseph@codesourcery.com> wrote:
> On Mon, 26 Jan 2015, Mike Stump wrote:
> 
>> +@item #pragma GCC unroll @var{"n"}
>> +@cindex pragma GCC unroll @var{"n"}
> 
> @var contains the name of a metasyntactic variable; it doesn't make sense 
> for quotes to be included in that name.  And as far as I can tell, the 
> quotes aren't part of the syntax either.  That is, you should just have 
> @var{n} there, and state explicitly in the text what @var{n} is (an 
> assignment-expression that evaluates to an integer constant, it looks 
> like).

Fixed.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: #pragma GCC unroll support
  2015-01-30  2:54     ` Joseph Myers
@ 2015-01-30  6:57       ` Mike Stump
  2015-01-30 17:46         ` Joseph Myers
  0 siblings, 1 reply; 25+ messages in thread
From: Mike Stump @ 2015-01-30  6:57 UTC (permalink / raw)
  To: Joseph Myers; +Cc: Richard Biener, gcc-patches Patches, Jason Merrill

[-- Attachment #1: Type: text/plain, Size: 726 bytes --]

On Jan 29, 2015, at 3:12 PM, Joseph Myers <joseph@codesourcery.com> wrote:
> On Mon, 26 Jan 2015, Mike Stump wrote:
> 
>> +  if (!INTEGRAL_TYPE_P (TREE_TYPE (expr))
>> +      || TREE_CODE (expr) != INTEGER_CST
>> +      || (lunroll = tree_to_shwi (expr)) < 0
>> +      || lunroll > USHRT_MAX)
>> +    {
>> +      c_parser_error (parser, "%<#pragma GCC unroll%> requires a non-negative number");
> 
> That error seems misleading for the case where a number above USHRT_MAX is 
> passed.

Fixed.

> There should also be tests for the error cases.

Fixed.

>> +      unroll = (unsigned short)lunroll;
> 
> Casts should have spaces in, "(unsigned short) lunroll"; see 
> codingconventions.html.

Fixed.


[-- Attachment #2: unroll-4.diffs.txt --]
[-- Type: text/plain, Size: 62689 bytes --]

Index: ada/gcc-interface/trans.c
===================================================================
--- ada/gcc-interface/trans.c	(revision 220084)
+++ ada/gcc-interface/trans.c	(working copy)
@@ -7870,17 +7870,20 @@ gnat_gimplify_stmt (tree *stmt_p)
 	  {
 	    /* Deal with the optimization hints.  */
 	    if (LOOP_STMT_IVDEP (stmt))
-	      gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
+	      gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
 				 build_int_cst (integer_type_node,
-						annot_expr_ivdep_kind));
+						annot_expr_ivdep_kind),
+				 NULL_TREE);
 	    if (LOOP_STMT_NO_VECTOR (stmt))
-	      gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
+	      gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
 				 build_int_cst (integer_type_node,
-						annot_expr_no_vector_kind));
+						annot_expr_no_vector_kind),
+				 NULL_TREE);
 	    if (LOOP_STMT_VECTOR (stmt))
-	      gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
+	      gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
 				 build_int_cst (integer_type_node,
-						annot_expr_vector_kind));
+						annot_expr_vector_kind),
+				 NULL_TREE);
 
 	    gnu_cond
 	      = build3 (COND_EXPR, void_type_node, gnu_cond, NULL_TREE,
Index: c/c-parser.c
===================================================================
--- c/c-parser.c	(revision 220084)
+++ c/c-parser.c	(working copy)
@@ -1217,9 +1217,9 @@ static void c_parser_statement (c_parser
 static void c_parser_statement_after_labels (c_parser *);
 static void c_parser_if_statement (c_parser *);
 static void c_parser_switch_statement (c_parser *);
-static void c_parser_while_statement (c_parser *, bool);
-static void c_parser_do_statement (c_parser *, bool);
-static void c_parser_for_statement (c_parser *, bool);
+static void c_parser_while_statement (c_parser *, bool, unsigned short);
+static void c_parser_do_statement (c_parser *, bool, unsigned short);
+static void c_parser_for_statement (c_parser *, bool, unsigned short);
 static tree c_parser_asm_statement (c_parser *);
 static tree c_parser_asm_operands (c_parser *);
 static tree c_parser_asm_goto_operands (c_parser *);
@@ -4972,13 +4972,13 @@ c_parser_statement_after_labels (c_parse
 	  c_parser_switch_statement (parser);
 	  break;
 	case RID_WHILE:
-	  c_parser_while_statement (parser, false);
+	  c_parser_while_statement (parser, false, 0);
 	  break;
 	case RID_DO:
-	  c_parser_do_statement (parser, false);
+	  c_parser_do_statement (parser, false, 0);
 	  break;
 	case RID_FOR:
-	  c_parser_for_statement (parser, false);
+	  c_parser_for_statement (parser, false, 0);
 	  break;
 	case RID_CILK_FOR:
 	  if (!flag_cilkplus)
@@ -5340,7 +5340,7 @@ c_parser_switch_statement (c_parser *par
 */
 
 static void
-c_parser_while_statement (c_parser *parser, bool ivdep)
+c_parser_while_statement (c_parser *parser, bool ivdep, unsigned short unroll)
 {
   tree block, cond, body, save_break, save_cont;
   location_t loc;
@@ -5354,9 +5354,15 @@ c_parser_while_statement (c_parser *pars
 	 "%<_Cilk_spawn%> statement cannot be used as a condition for while statement"))
     cond = error_mark_node;
   if (ivdep && cond != error_mark_node)
-    cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
 		   build_int_cst (integer_type_node,
-		   annot_expr_ivdep_kind));
+				  annot_expr_ivdep_kind),
+		   NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+		   build_int_cst (integer_type_node,
+				  annot_expr_unroll_kind),
+		   build_int_cst (integer_type_node, unroll));
   save_break = c_break_label;
   c_break_label = NULL_TREE;
   save_cont = c_cont_label;
@@ -5375,7 +5381,7 @@ c_parser_while_statement (c_parser *pars
 */
 
 static void
-c_parser_do_statement (c_parser *parser, bool ivdep)
+c_parser_do_statement (c_parser *parser, bool ivdep, unsigned short unroll)
 {
   tree block, cond, body, save_break, save_cont, new_break, new_cont;
   location_t loc;
@@ -5403,9 +5409,16 @@ c_parser_do_statement (c_parser *parser,
 	 "%<_Cilk_spawn%> statement cannot be used as a condition for a do-while statement"))
     cond = error_mark_node;
   if (ivdep && cond != error_mark_node)
-    cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+		   build_int_cst (integer_type_node,
+				  annot_expr_ivdep_kind),
+		   NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
 		   build_int_cst (integer_type_node,
-		   annot_expr_ivdep_kind));
+				  annot_expr_unroll_kind),
+		   build_int_cst (integer_type_node,
+				  unroll));
   if (!c_parser_require (parser, CPP_SEMICOLON, "expected %<;%>"))
     c_parser_skip_to_end_of_block_or_statement (parser);
   c_finish_loop (loc, cond, NULL, body, new_break, new_cont, false);
@@ -5469,7 +5482,7 @@ c_parser_do_statement (c_parser *parser,
 */
 
 static void
-c_parser_for_statement (c_parser *parser, bool ivdep)
+c_parser_for_statement (c_parser *parser, bool ivdep, unsigned short unroll)
 {
   tree block, cond, incr, save_break, save_cont, body;
   /* The following are only used when parsing an ObjC foreach statement.  */
@@ -5587,6 +5600,12 @@ c_parser_for_statement (c_parser *parser
 				  "%<GCC ivdep%> pragma");
 		  cond = error_mark_node;
 		}
+	      else if (unroll)
+		{
+		  c_parser_error (parser, "missing loop condition in loop with "
+				  "%<GCC unroll%> pragma");
+		  cond = error_mark_node;
+		}
 	      else
 		{
 		  c_parser_consume_token (parser);
@@ -5604,9 +5623,15 @@ c_parser_for_statement (c_parser *parser
 					 "expected %<;%>");
 	    }
 	  if (ivdep && cond != error_mark_node)
-	    cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+	    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+			   build_int_cst (integer_type_node,
+					  annot_expr_ivdep_kind),
+			   NULL_TREE);
+	  if (unroll && cond != error_mark_node)
+	    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
 			   build_int_cst (integer_type_node,
-			   annot_expr_ivdep_kind));
+					  annot_expr_unroll_kind),
+			   build_int_cst (integer_type_node, unroll));
 	}
       /* Parse the increment expression (the third expression in a
 	 for-statement).  In the case of a foreach-statement, this is
@@ -9592,6 +9617,49 @@ c_parser_objc_at_dynamic_declaration (c_
 }
 
 \f
+static bool
+c_parse_pragma_ivdep (c_parser *parser)
+{
+  c_parser_consume_pragma (parser);
+  c_parser_skip_to_pragma_eol (parser);
+  return true;
+}
+
+static unsigned short
+c_parser_pragma_unroll (c_parser *parser)
+{
+  unsigned short unroll;
+  c_parser_consume_pragma (parser);
+  tree expr = c_parser_expr_no_commas (parser, NULL).value;
+  mark_exp_read (expr);
+  expr = c_fully_fold (expr, false, NULL);
+  HOST_WIDE_INT lunroll = 0;
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (expr))
+      || TREE_CODE (expr) != INTEGER_CST
+      || (lunroll = tree_to_shwi (expr)) < 0
+      || lunroll > USHRT_MAX)
+    {
+      char msgid[150];
+      char num[10];
+      strcpy (msgid, "%<#pragma GCC unroll%> requires an assignment-expression"
+	      " that evaluates to a non-negative integral constant less than"
+	      " or equal to ");
+      sprintf (num, "%u", USHRT_MAX);
+      strcat (msgid, num);
+      c_parser_error (parser, msgid);
+      unroll = 0;
+    }
+  else
+    {
+      unroll = (unsigned short) lunroll;
+      if (unroll == 0)
+	unroll = 1;
+    }
+
+  c_parser_skip_to_pragma_eol (parser);
+  return unroll;
+}
+
 /* Handle pragmas.  Some OpenMP pragmas are associated with, and therefore
    should be considered, statements.  ALLOW_STMT is true if we're within
    the context of a function and such pragmas are to be allowed.  Returns
@@ -9714,21 +9782,46 @@ c_parser_pragma (c_parser *parser, enum
       c_parser_omp_declare (parser, context);
       return false;
     case PRAGMA_IVDEP:
-      c_parser_consume_pragma (parser);
-      c_parser_skip_to_pragma_eol (parser);
-      if (!c_parser_next_token_is_keyword (parser, RID_FOR)
-	  && !c_parser_next_token_is_keyword (parser, RID_WHILE)
-	  && !c_parser_next_token_is_keyword (parser, RID_DO))
-	{
-	  c_parser_error (parser, "for, while or do statement expected");
-	  return false;
-	}
-      if (c_parser_next_token_is_keyword (parser, RID_FOR))
-	c_parser_for_statement (parser, true);
-      else if (c_parser_next_token_is_keyword (parser, RID_WHILE))
-	c_parser_while_statement (parser, true);
-      else
-	c_parser_do_statement (parser, true);
+      {
+	bool ivdep = c_parse_pragma_ivdep (parser);
+	unsigned short unroll = 0;
+	if (c_parser_peek_token (parser)->pragma_kind == PRAGMA_UNROLL)
+	  unroll = c_parser_pragma_unroll (parser);
+	if (!c_parser_next_token_is_keyword (parser, RID_FOR)
+	    && !c_parser_next_token_is_keyword (parser, RID_WHILE)
+	    && !c_parser_next_token_is_keyword (parser, RID_DO))
+	  {
+	    c_parser_error (parser, "for, while or do statement expected");
+	    return false;
+	  }
+	if (c_parser_next_token_is_keyword (parser, RID_FOR))
+	  c_parser_for_statement (parser, ivdep, unroll);
+	else if (c_parser_next_token_is_keyword (parser, RID_WHILE))
+	  c_parser_while_statement (parser, ivdep, unroll);
+	else
+	  c_parser_do_statement (parser, ivdep, unroll);
+      }
+      return false;
+    case PRAGMA_UNROLL:
+      {
+	unsigned short unroll = c_parser_pragma_unroll (parser);
+	bool ivdep = false;
+	if (c_parser_peek_token (parser)->pragma_kind == PRAGMA_IVDEP)
+	  ivdep = c_parse_pragma_ivdep (parser);
+	if (!c_parser_next_token_is_keyword (parser, RID_FOR)
+	    && !c_parser_next_token_is_keyword (parser, RID_WHILE)
+	    && !c_parser_next_token_is_keyword (parser, RID_DO))
+	  {
+	    c_parser_error (parser, "for, while or do statement expected");
+	    return false;
+	  }
+	if (c_parser_next_token_is_keyword (parser, RID_FOR))
+	  c_parser_for_statement (parser, ivdep, unroll);
+	else if (c_parser_next_token_is_keyword (parser, RID_WHILE))
+	  c_parser_while_statement (parser, ivdep, unroll);
+	else
+	  c_parser_do_statement (parser, ivdep, unroll);
+      }
       return false;
 
     case PRAGMA_GCC_PCH_PREPROCESS:
Index: c-family/c-pragma.c
===================================================================
--- c-family/c-pragma.c	(revision 220084)
+++ c-family/c-pragma.c	(working copy)
@@ -1456,6 +1456,10 @@ init_pragma (void)
     cpp_register_deferred_pragma (parse_in, "GCC", "ivdep", PRAGMA_IVDEP, false,
 				  false);
 
+  if (!flag_preprocess_only)
+    cpp_register_deferred_pragma (parse_in, "GCC", "unroll", PRAGMA_UNROLL, false,
+				  false);
+
   if (flag_cilkplus && !flag_preprocess_only)
     cpp_register_deferred_pragma (parse_in, "cilk", "grainsize",
 				  PRAGMA_CILK_GRAINSIZE, true, false);
Index: c-family/c-pragma.h
===================================================================
--- c-family/c-pragma.h	(revision 220084)
+++ c-family/c-pragma.h	(working copy)
@@ -69,6 +69,7 @@ typedef enum pragma_kind {
 
   PRAGMA_GCC_PCH_PREPROCESS,
   PRAGMA_IVDEP,
+  PRAGMA_UNROLL,
 
   PRAGMA_FIRST_EXTERNAL
 } pragma_kind;
Index: cfgloop.h
===================================================================
--- cfgloop.h	(revision 220084)
+++ cfgloop.h	(working copy)
@@ -189,6 +189,11 @@ struct GTY ((chain_next ("%h.next"))) lo
      of the loop can be safely evaluated concurrently.  */
   int safelen;
 
+  /* The number of times to unroll the loop.  0, means no information
+     given, just do what we always do.  A value of 1, means don't unroll
+     the loop.  */
+  unsigned short unroll;
+
   /* True if this loop should never be vectorized.  */
   bool dont_vectorize;
 
Index: cfgloopmanip.c
===================================================================
--- cfgloopmanip.c	(revision 220084)
+++ cfgloopmanip.c	(working copy)
@@ -1038,6 +1038,7 @@ copy_loop_info (struct loop *loop, struc
   target->estimate_state = loop->estimate_state;
   target->warned_aggressive_loop_optimizations
     |= loop->warned_aggressive_loop_optimizations;
+  target->unroll = loop->unroll;
 }
 
 /* Copies copy of LOOP as subloop of TARGET loop, placing newly
Index: cp/cp-array-notation.c
===================================================================
--- cp/cp-array-notation.c	(revision 220084)
+++ cp/cp-array-notation.c	(working copy)
@@ -81,7 +81,7 @@ create_an_loop (tree init, tree cond, tr
   finish_expr_stmt (init);
   for_stmt = begin_for_stmt (NULL_TREE, NULL_TREE);
   finish_for_init_stmt (for_stmt);
-  finish_for_cond (cond, for_stmt, false);
+  finish_for_cond (cond, for_stmt, false, 0);
   finish_for_expr (incr, for_stmt);
   finish_expr_stmt (body);
   finish_for_stmt (for_stmt);
Index: cp/cp-tree.h
===================================================================
--- cp/cp-tree.h	(revision 220084)
+++ cp/cp-tree.h	(working copy)
@@ -5644,7 +5644,7 @@ extern tree implicitly_declare_fn
 extern bool maybe_clone_body			(tree);
 
 /* In parser.c */
-extern tree cp_convert_range_for (tree, tree, tree, bool);
+extern tree cp_convert_range_for (tree, tree, tree, bool, unsigned short);
 extern bool parsing_nsdmi (void);
 extern void inject_this_parameter (tree, cp_cv_quals);
 
@@ -5880,16 +5880,16 @@ extern void begin_else_clause			(tree);
 extern void finish_else_clause			(tree);
 extern void finish_if_stmt			(tree);
 extern tree begin_while_stmt			(void);
-extern void finish_while_stmt_cond		(tree, tree, bool);
+extern void finish_while_stmt_cond		(tree, tree, bool, unsigned short);
 extern void finish_while_stmt			(tree);
 extern tree begin_do_stmt			(void);
 extern void finish_do_body			(tree);
-extern void finish_do_stmt			(tree, tree, bool);
+extern void finish_do_stmt			(tree, tree, bool, unsigned short);
 extern tree finish_return_stmt			(tree);
 extern tree begin_for_scope			(tree *);
 extern tree begin_for_stmt			(tree, tree);
 extern void finish_for_init_stmt		(tree);
-extern void finish_for_cond			(tree, tree, bool);
+extern void finish_for_cond			(tree, tree, bool, unsigned short);
 extern void finish_for_expr			(tree, tree);
 extern void finish_for_stmt			(tree);
 extern tree begin_range_for_stmt		(tree, tree);
Index: cp/init.c
===================================================================
--- cp/init.c	(revision 220084)
+++ cp/init.c	(working copy)
@@ -3693,7 +3693,7 @@ build_vec_init (tree base, tree maxindex
       finish_for_init_stmt (for_stmt);
       finish_for_cond (build2 (NE_EXPR, boolean_type_node, iterator,
 			       build_int_cst (TREE_TYPE (iterator), -1)),
-		       for_stmt, false);
+		       for_stmt, false, 0);
       elt_init = cp_build_unary_op (PREDECREMENT_EXPR, iterator, 0,
 				    complain);
       if (elt_init == error_mark_node)
Index: cp/parser.c
===================================================================
--- cp/parser.c	(revision 220084)
+++ cp/parser.c	(working copy)
@@ -2044,15 +2044,15 @@ static tree cp_parser_selection_statemen
 static tree cp_parser_condition
   (cp_parser *);
 static tree cp_parser_iteration_statement
-  (cp_parser *, bool);
+  (cp_parser *, bool, unsigned short);
 static bool cp_parser_for_init_statement
   (cp_parser *, tree *decl);
 static tree cp_parser_for
-  (cp_parser *, bool);
+  (cp_parser *, bool, unsigned short);
 static tree cp_parser_c_for
-  (cp_parser *, tree, tree, bool);
+  (cp_parser *, tree, tree, bool, unsigned short);
 static tree cp_parser_range_for
-  (cp_parser *, tree, tree, tree, bool);
+  (cp_parser *, tree, tree, tree, bool, unsigned short);
 static void do_range_for_auto_deduction
   (tree, tree);
 static tree cp_parser_perform_range_for_lookup
@@ -9698,7 +9698,7 @@ cp_parser_statement (cp_parser* parser,
 	case RID_WHILE:
 	case RID_DO:
 	case RID_FOR:
-	  statement = cp_parser_iteration_statement (parser, false);
+	  statement = cp_parser_iteration_statement (parser, false, 0);
 	  break;
 
 	case RID_CILK_FOR:
@@ -10390,7 +10390,7 @@ cp_parser_condition (cp_parser* parser)
    not included. */
 
 static tree
-cp_parser_for (cp_parser *parser, bool ivdep)
+cp_parser_for (cp_parser *parser, bool ivdep, unsigned short unroll)
 {
   tree init, scope, decl;
   bool is_range_for;
@@ -10402,13 +10402,14 @@ cp_parser_for (cp_parser *parser, bool i
   is_range_for = cp_parser_for_init_statement (parser, &decl);
 
   if (is_range_for)
-    return cp_parser_range_for (parser, scope, init, decl, ivdep);
+    return cp_parser_range_for (parser, scope, init, decl, ivdep, unroll);
   else
-    return cp_parser_c_for (parser, scope, init, ivdep);
+    return cp_parser_c_for (parser, scope, init, ivdep, unroll);
 }
 
 static tree
-cp_parser_c_for (cp_parser *parser, tree scope, tree init, bool ivdep)
+cp_parser_c_for (cp_parser *parser, tree scope, tree init, bool ivdep,
+		 unsigned short unroll)
 {
   /* Normal for loop */
   tree condition = NULL_TREE;
@@ -10429,7 +10430,13 @@ cp_parser_c_for (cp_parser *parser, tree
 		       "%<GCC ivdep%> pragma");
       condition = error_mark_node;
     }
-  finish_for_cond (condition, stmt, ivdep);
+  else if (unroll)
+    {
+      cp_parser_error (parser, "missing loop condition in loop with "
+		       "%<GCC unroll%> pragma");
+      condition = error_mark_node;
+    }
+  finish_for_cond (condition, stmt, ivdep, unroll);
   /* Look for the `;'.  */
   cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON);
 
@@ -10453,7 +10460,7 @@ cp_parser_c_for (cp_parser *parser, tree
 
 static tree
 cp_parser_range_for (cp_parser *parser, tree scope, tree init, tree range_decl,
-		     bool ivdep)
+		     bool ivdep, unsigned short unroll)
 {
   tree stmt, range_expr;
 
@@ -10474,6 +10481,8 @@ cp_parser_range_for (cp_parser *parser,
       stmt = begin_range_for_stmt (scope, init);
       if (ivdep)
 	RANGE_FOR_IVDEP (stmt) = 1;
+      if (unroll)
+	/* TODO */(void)0;
       finish_range_for_decl (stmt, range_decl, range_expr);
       if (!type_dependent_expression_p (range_expr)
 	  /* do_auto_deduction doesn't mess with template init-lists.  */
@@ -10483,7 +10492,7 @@ cp_parser_range_for (cp_parser *parser,
   else
     {
       stmt = begin_for_stmt (scope, init);
-      stmt = cp_convert_range_for (stmt, range_decl, range_expr, ivdep);
+      stmt = cp_convert_range_for (stmt, range_decl, range_expr, ivdep, unroll);
     }
   return stmt;
 }
@@ -10575,7 +10584,7 @@ do_range_for_auto_deduction (tree decl,
 
 tree
 cp_convert_range_for (tree statement, tree range_decl, tree range_expr,
-		      bool ivdep)
+		      bool ivdep, unsigned short unroll)
 {
   tree begin, end;
   tree iter_type, begin_expr, end_expr;
@@ -10632,7 +10641,7 @@ cp_convert_range_for (tree statement, tr
 				 begin, ERROR_MARK,
 				 end, ERROR_MARK,
 				 NULL, tf_warning_or_error);
-  finish_for_cond (condition, statement, ivdep);
+  finish_for_cond (condition, statement, ivdep, unroll);
 
   /* The new increment expression.  */
   expression = finish_unary_op_expr (input_location,
@@ -10793,7 +10802,8 @@ cp_parser_range_for_member_function (tre
    Returns the new WHILE_STMT, DO_STMT, FOR_STMT or RANGE_FOR_STMT.  */
 
 static tree
-cp_parser_iteration_statement (cp_parser* parser, bool ivdep)
+cp_parser_iteration_statement (cp_parser* parser, bool ivdep,
+			       unsigned short unroll)
 {
   cp_token *token;
   enum rid keyword;
@@ -10823,7 +10833,7 @@ cp_parser_iteration_statement (cp_parser
 	cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN);
 	/* Parse the condition.  */
 	condition = cp_parser_condition (parser);
-	finish_while_stmt_cond (condition, statement, ivdep);
+	finish_while_stmt_cond (condition, statement, ivdep, unroll);
 	/* Look for the `)'.  */
 	cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN);
 	/* Parse the dependent statement.  */
@@ -10853,7 +10863,7 @@ cp_parser_iteration_statement (cp_parser
 	/* Parse the expression.  */
 	expression = cp_parser_expression (parser);
 	/* We're done with the do-statement.  */
-	finish_do_stmt (expression, statement, ivdep);
+	finish_do_stmt (expression, statement, ivdep, unroll);
 	/* Look for the `)'.  */
 	cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN);
 	/* Look for the `;'.  */
@@ -10866,7 +10876,7 @@ cp_parser_iteration_statement (cp_parser
 	/* Look for the `('.  */
 	cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN);
 
-	statement = cp_parser_for (parser, ivdep);
+	statement = cp_parser_for (parser, ivdep, unroll);
 
 	/* Look for the `)'.  */
 	cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN);
@@ -32901,6 +32911,42 @@ cp_parser_cilk_grainsize (cp_parser *par
   cp_parser_skip_to_pragma_eol (parser, pragma_tok);
 }
 
+static bool
+cp_parser_pragma_ivdep (cp_parser *parser, cp_token *pragma_tok)
+{
+  cp_parser_skip_to_pragma_eol (parser, pragma_tok);
+  return true;
+}
+
+static unsigned short
+cp_parser_pragma_unroll (cp_parser *parser, cp_token *pragma_tok)
+{
+  location_t location = cp_lexer_peek_token (parser->lexer)->location;
+  tree expr = cp_parser_constant_expression (parser);
+  unsigned short unroll;
+  expr = maybe_constant_value (expr);
+  cp_parser_require_pragma_eol (parser, pragma_tok);
+  HOST_WIDE_INT lunroll = 0;
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (expr))
+      || TREE_CODE (expr) != INTEGER_CST
+      || (lunroll = tree_to_shwi (expr)) < 0
+      || lunroll > USHRT_MAX)
+    {
+      error_at (location,
+		"%<#pragma GCC unroll%> requires an assignment-expression"
+		" that evaluates to a non-negative integral constant less"
+		" than or equal to %d", USHRT_MAX);
+      unroll = 0;
+    }
+  else
+    {
+      unroll = (unsigned short) lunroll;
+      if (unroll == 0)
+	unroll = 1;
+    }
+  return unroll;
+}
+
 /* Normal parsing of a pragma token.  Here we can (and must) use the
    regular lexer.  */
 
@@ -33068,9 +33114,39 @@ cp_parser_pragma (cp_parser *parser, enu
 
     case PRAGMA_IVDEP:
       {
-	cp_parser_skip_to_pragma_eol (parser, pragma_tok);
+	bool ivdep = cp_parser_pragma_ivdep (parser, pragma_tok);
+	unsigned short unroll = 0;
 	cp_token *tok;
 	tok = cp_lexer_peek_token (the_parser->lexer);
+	if (tok->type == CPP_PRAGMA &&
+	    tok->pragma_kind == PRAGMA_UNROLL)
+	  {
+	    unroll = cp_parser_pragma_unroll (parser, pragma_tok);
+	    tok = cp_lexer_peek_token (the_parser->lexer);
+	  }
+	if (tok->type != CPP_KEYWORD
+	    || (tok->keyword != RID_FOR && tok->keyword != RID_WHILE
+		&& tok->keyword != RID_DO))
+	  {
+	    cp_parser_error (parser, "for, while or do statement expected");
+	    return false;
+	  }
+	cp_parser_iteration_statement (parser, ivdep, unroll);
+	return true;
+      }
+
+    case PRAGMA_UNROLL:
+      {
+	unsigned short unroll = cp_parser_pragma_unroll (parser, pragma_tok);
+	bool ivdep = false;
+	cp_token *tok;
+	tok = cp_lexer_peek_token (the_parser->lexer);
+	if (tok->type == CPP_PRAGMA &&
+	    tok->pragma_kind == PRAGMA_IVDEP)
+	  {
+	    ivdep = cp_parser_pragma_ivdep (parser, tok);
+	    tok = cp_lexer_peek_token (the_parser->lexer);
+	  }
 	if (tok->type != CPP_KEYWORD
 	    || (tok->keyword != RID_FOR && tok->keyword != RID_WHILE
 		&& tok->keyword != RID_DO))
@@ -33078,7 +33154,7 @@ cp_parser_pragma (cp_parser *parser, enu
 	    cp_parser_error (parser, "for, while or do statement expected");
 	    return false;
 	  }
-	cp_parser_iteration_statement (parser, true);
+	cp_parser_iteration_statement (parser, ivdep, unroll);
 	return true;
       }
 
Index: cp/pt.c
===================================================================
--- cp/pt.c	(revision 220084)
+++ cp/pt.c	(working copy)
@@ -13886,7 +13886,7 @@ tsubst_expr (tree t, tree args, tsubst_f
       RECUR (FOR_INIT_STMT (t));
       finish_for_init_stmt (stmt);
       tmp = RECUR (FOR_COND (t));
-      finish_for_cond (tmp, stmt, false);
+      finish_for_cond (tmp, stmt, false, 0);
       tmp = RECUR (FOR_EXPR (t));
       finish_for_expr (tmp, stmt);
       RECUR (FOR_BODY (t));
@@ -13901,7 +13901,7 @@ tsubst_expr (tree t, tree args, tsubst_f
         decl = tsubst (decl, args, complain, in_decl);
         maybe_push_decl (decl);
         expr = RECUR (RANGE_FOR_EXPR (t));
-        stmt = cp_convert_range_for (stmt, decl, expr, RANGE_FOR_IVDEP (t));
+        stmt = cp_convert_range_for (stmt, decl, expr, RANGE_FOR_IVDEP (t), 0);
         RECUR (RANGE_FOR_BODY (t));
         finish_for_stmt (stmt);
       }
@@ -13910,7 +13910,7 @@ tsubst_expr (tree t, tree args, tsubst_f
     case WHILE_STMT:
       stmt = begin_while_stmt ();
       tmp = RECUR (WHILE_COND (t));
-      finish_while_stmt_cond (tmp, stmt, false);
+      finish_while_stmt_cond (tmp, stmt, false, 0);
       RECUR (WHILE_BODY (t));
       finish_while_stmt (stmt);
       break;
@@ -13920,7 +13920,7 @@ tsubst_expr (tree t, tree args, tsubst_f
       RECUR (DO_BODY (t));
       finish_do_body (stmt);
       tmp = RECUR (DO_COND (t));
-      finish_do_stmt (tmp, stmt, false);
+      finish_do_stmt (tmp, stmt, false, 0);
       break;
 
     case IF_STMT:
@@ -14358,8 +14358,10 @@ tsubst_expr (tree t, tree args, tsubst_f
 
     case ANNOTATE_EXPR:
       tmp = RECUR (TREE_OPERAND (t, 0));
-      RETURN (build2_loc (EXPR_LOCATION (t), ANNOTATE_EXPR,
-			  TREE_TYPE (tmp), tmp, RECUR (TREE_OPERAND (t, 1))));
+      RETURN (build3_loc (EXPR_LOCATION (t), ANNOTATE_EXPR,
+			  TREE_TYPE (tmp), tmp,
+			  RECUR (TREE_OPERAND (t, 1)),
+			  RECUR (TREE_OPERAND (t, 2))));
 
     default:
       gcc_assert (!STATEMENT_CODE_P (TREE_CODE (t)));
Index: cp/semantics.c
===================================================================
--- cp/semantics.c	(revision 220084)
+++ cp/semantics.c	(working copy)
@@ -802,7 +802,8 @@ begin_while_stmt (void)
    WHILE_STMT.  */
 
 void
-finish_while_stmt_cond (tree cond, tree while_stmt, bool ivdep)
+finish_while_stmt_cond (tree cond, tree while_stmt, bool ivdep,
+			unsigned short unroll)
 {
   if (check_no_cilk (cond,
       "Cilk array notation cannot be used as a condition for while statement",
@@ -812,11 +813,19 @@ finish_while_stmt_cond (tree cond, tree
   finish_cond (&WHILE_COND (while_stmt), cond);
   begin_maybe_infinite_loop (cond);
   if (ivdep && cond != error_mark_node)
-    WHILE_COND (while_stmt) = build2 (ANNOTATE_EXPR,
+    WHILE_COND (while_stmt) = build3 (ANNOTATE_EXPR,
 				      TREE_TYPE (WHILE_COND (while_stmt)),
 				      WHILE_COND (while_stmt),
 				      build_int_cst (integer_type_node,
-						     annot_expr_ivdep_kind));
+						     annot_expr_ivdep_kind),
+				      NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    WHILE_COND (while_stmt) = build3 (ANNOTATE_EXPR,
+				      TREE_TYPE (WHILE_COND (while_stmt)),
+				      WHILE_COND (while_stmt),
+				      build_int_cst (integer_type_node,
+						     annot_expr_unroll_kind),
+				      build_int_cst (integer_type_node, unroll));
   simplify_loop_decl_cond (&WHILE_COND (while_stmt), WHILE_BODY (while_stmt));
 }
 
@@ -861,7 +870,7 @@ finish_do_body (tree do_stmt)
    COND is as indicated.  */
 
 void
-finish_do_stmt (tree cond, tree do_stmt, bool ivdep)
+finish_do_stmt (tree cond, tree do_stmt, bool ivdep, unsigned short unroll)
 {
   if (check_no_cilk (cond,
   "Cilk array notation cannot be used as a condition for a do-while statement",
@@ -870,8 +879,13 @@ finish_do_stmt (tree cond, tree do_stmt,
   cond = maybe_convert_cond (cond);
   end_maybe_infinite_loop (cond);
   if (ivdep && cond != error_mark_node)
-    cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
-		   build_int_cst (integer_type_node, annot_expr_ivdep_kind));
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+		   build_int_cst (integer_type_node, annot_expr_ivdep_kind),
+		   NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+		   build_int_cst (integer_type_node, annot_expr_unroll_kind),
+		   build_int_cst (integer_type_node, unroll));
   DO_COND (do_stmt) = cond;
 }
 
@@ -974,7 +988,7 @@ finish_for_init_stmt (tree for_stmt)
    FOR_STMT.  */
 
 void
-finish_for_cond (tree cond, tree for_stmt, bool ivdep)
+finish_for_cond (tree cond, tree for_stmt, bool ivdep, unsigned short unroll)
 {
   if (check_no_cilk (cond,
 	 "Cilk array notation cannot be used in a condition for a for-loop",
@@ -984,11 +998,20 @@ finish_for_cond (tree cond, tree for_stm
   finish_cond (&FOR_COND (for_stmt), cond);
   begin_maybe_infinite_loop (cond);
   if (ivdep && cond != error_mark_node)
-    FOR_COND (for_stmt) = build2 (ANNOTATE_EXPR,
+    FOR_COND (for_stmt) = build3 (ANNOTATE_EXPR,
 				  TREE_TYPE (FOR_COND (for_stmt)),
 				  FOR_COND (for_stmt),
 				  build_int_cst (integer_type_node,
-						 annot_expr_ivdep_kind));
+						 annot_expr_ivdep_kind),
+				  NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    FOR_COND (for_stmt) = build3 (ANNOTATE_EXPR,
+				  TREE_TYPE (FOR_COND (for_stmt)),
+				  FOR_COND (for_stmt),
+				  build_int_cst (integer_type_node,
+						 annot_expr_unroll_kind),
+				  build_int_cst (integer_type_node,
+						 unroll));
   simplify_loop_decl_cond (&FOR_COND (for_stmt), FOR_BODY (for_stmt));
 }
 
Index: doc/extend.texi
===================================================================
--- doc/extend.texi	(revision 220084)
+++ doc/extend.texi	(working copy)
@@ -17881,6 +17881,18 @@ void ignore_vec_dep (int *a, int k, int
 @}
 @end smallexample
 
+@table @code
+@item #pragma GCC unroll @var{n}
+@cindex pragma GCC unroll @var{n}
+
+With this pragma, the programmer informs the optimizer how many times
+a loop should be unrolled.  A 0 or 1 informs the compiler to not
+perform any loop unrolling.  The pragma must be immediately before
+@samp{#pragma ivdep} or a @code{for}, @code{while} or @code{do} loop
+and applies only to the loop that follows.  @var{n} is an
+assignment-expression that evaluates to an integer constant.
+
+@end table
 
 @node Unnamed Fields
 @section Unnamed struct/union fields within structs/unions
Index: fortran/trans-stmt.c
===================================================================
--- fortran/trans-stmt.c	(revision 220084)
+++ fortran/trans-stmt.c	(working copy)
@@ -2876,9 +2876,10 @@ gfc_trans_forall_loop (forall_info *fora
       cond = fold_build2_loc (input_location, LE_EXPR, boolean_type_node,
 			      count, build_int_cst (TREE_TYPE (count), 0));
       if (forall_tmp->do_concurrent)
-	cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+	cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
 		       build_int_cst (integer_type_node,
-				      annot_expr_ivdep_kind));
+				      annot_expr_ivdep_kind),
+		       NULL_TREE);
 
       tmp = build1_v (GOTO_EXPR, exit_label);
       tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node,
Index: function.h
===================================================================
--- function.h	(revision 220084)
+++ function.h	(working copy)
@@ -670,6 +670,10 @@ struct GTY(()) function {
 
   /* Set when the tail call has been identified.  */
   unsigned int tail_call_marked : 1;
+
+  /* Set when #pragma unroll has been used in the body.  Used by rtl
+     unrolling to know when to perform unrolling in the function.  */
+  unsigned int has_unroll : 1;
 };
 
 /* Add the decl D to the local_decls list of FUN.  */
Index: gimple-low.c
===================================================================
--- gimple-low.c	(revision 220084)
+++ gimple-low.c	(working copy)
@@ -347,7 +347,7 @@ lower_stmt (gimple_stmt_iterator *gsi, s
 	for (i = 0; i < gimple_call_num_args (stmt); i++)
 	  {
 	    tree arg = gimple_call_arg (stmt, i);
-	    if (EXPR_P (arg))
+	    if (arg && EXPR_P (arg))
 	      TREE_SET_BLOCK (arg, data->block);
 	  }
 
Index: gimple-walk.c
===================================================================
--- gimple-walk.c	(revision 220084)
+++ gimple-walk.c	(working copy)
@@ -261,7 +261,7 @@ walk_gimple_op (gimple stmt, walk_tree_f
 
       for (i = 0; i < gimple_call_num_args (stmt); i++)
 	{
-	  if (wi)
+	  if (wi && gimple_call_arg (stmt, i))
 	    wi->val_only
 	      = is_gimple_reg_type (TREE_TYPE (gimple_call_arg (stmt, i)));
 	  ret = walk_tree (gimple_call_arg_ptr (stmt, i), callback_op, wi,
Index: gimplify.c
===================================================================
--- gimplify.c	(revision 220084)
+++ gimplify.c	(working copy)
@@ -2908,6 +2908,9 @@ gimple_boolify (tree expr)
     case ANNOTATE_EXPR:
       switch ((enum annot_expr_kind) TREE_INT_CST_LOW (TREE_OPERAND (expr, 1)))
 	{
+	case annot_expr_unroll_kind:
+	  cfun->has_unroll = 1;
+	  /* fall-through */
 	case annot_expr_ivdep_kind:
 	case annot_expr_no_vector_kind:
 	case annot_expr_vector_kind:
@@ -7947,6 +7950,7 @@ gimplify_expr (tree *expr_p, gimple_seq
 	  {
 	    tree cond = TREE_OPERAND (*expr_p, 0);
 	    tree kind = TREE_OPERAND (*expr_p, 1);
+	    tree data = TREE_OPERAND (*expr_p, 2);
 	    tree type = TREE_TYPE (cond);
 	    if (!INTEGRAL_TYPE_P (type))
 	      {
@@ -7957,7 +7961,7 @@ gimplify_expr (tree *expr_p, gimple_seq
 	    tree tmp = create_tmp_var (type);
 	    gimplify_arg (&cond, pre_p, EXPR_LOCATION (*expr_p));
 	    gcall *call
-	      = gimple_build_call_internal (IFN_ANNOTATE, 2, cond, kind);
+	      = gimple_build_call_internal (IFN_ANNOTATE, 3, cond, kind, data);
 	    gimple_call_set_lhs (call, tmp);
 	    gimplify_seq_add_stmt (pre_p, call);
 	    *expr_p = tmp;
Index: loop-init.c
===================================================================
--- loop-init.c	(revision 220084)
+++ loop-init.c	(working copy)
@@ -375,6 +375,7 @@ pass_loop2::gate (function *fun)
       && (flag_move_loop_invariants
 	  || flag_unswitch_loops
 	  || flag_unroll_loops
+	  || cfun->has_unroll
 #ifdef HAVE_doloop_end
 	  || (flag_branch_on_count_reg && HAVE_doloop_end)
 #endif
@@ -576,7 +577,8 @@ public:
   /* opt_pass methods: */
   virtual bool gate (function *)
     {
-      return (flag_peel_loops || flag_unroll_loops || flag_unroll_all_loops);
+      return (flag_peel_loops || flag_unroll_loops || flag_unroll_all_loops
+	      || cfun->has_unroll);
     }
 
   virtual unsigned int execute (function *);
Index: loop-unroll.c
===================================================================
--- loop-unroll.c	(revision 220084)
+++ loop-unroll.c	(working copy)
@@ -243,16 +243,26 @@ report_unroll (struct loop *loop, locati
 
 /* Decide whether unroll loops and how much.  */
 static void
-decide_unrolling (int flags)
+decide_unrolling (int base_flags)
 {
   struct loop *loop;
 
   /* Scan the loops, inner ones first.  */
   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
     {
+      int flags = base_flags;
+      if (loop->unroll > 1)
+	flags = UAP_UNROLL | UAP_UNROLL_ALL;
       loop->lpt_decision.decision = LPT_NONE;
       location_t locus = get_loop_location (loop);
 
+      if (loop->unroll == 1)
+	{
+	  dump_printf_loc (TDF_RTL, locus,
+			   "not unrolling loop, user didn't want it unrolled\n");
+	  continue;
+	}
+
       if (dump_enabled_p ())
 	dump_printf_loc (TDF_RTL, locus,
                          ";; *** Considering loop %d at BB %d for "
@@ -422,6 +432,19 @@ decide_unroll_constant_iterations (struc
       return;
     }
 
+  if (loop->unroll)
+    {
+      loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
+      loop->lpt_decision.times = loop->unroll - 1;
+      if (loop->lpt_decision.times > desc->niter - 2)
+	{
+	  /* They won't do this for us.  */
+	  loop->lpt_decision.decision = LPT_NONE;
+	  loop->lpt_decision.times = desc->niter - 2;
+	}
+      return;
+    }
+
   /* Check whether the loop rolls enough to consider.  
      Consult also loop bounds and profile; in the case the loop has more
      than one exit it may well loop less than determined maximal number
@@ -443,7 +466,7 @@ decide_unroll_constant_iterations (struc
   best_copies = 2 * nunroll + 10;
 
   i = 2 * nunroll + 2;
-  if (i - 1 >= desc->niter)
+  if (i > desc->niter - 2)
     i = desc->niter - 2;
 
   for (; i >= nunroll - 1; i--)
@@ -695,6 +718,9 @@ decide_unroll_runtime_iterations (struct
   if (targetm.loop_unroll_adjust)
     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 
+  if (loop->unroll)
+    nunroll = loop->unroll;
+
   /* Skip big loops.  */
   if (nunroll <= 1)
     {
@@ -733,8 +759,9 @@ decide_unroll_runtime_iterations (struct
       return;
     }
 
-  /* Success; now force nunroll to be power of 2, as we are unable to
-     cope with overflows in computation of number of iterations.  */
+  /* Success; now force nunroll to be power of 2, as code-gen
+     requires it, we are unable to cope with overflows in
+     computation of number of iterations.  */
   for (i = 1; 2 * i <= nunroll; i *= 2)
     continue;
 
@@ -843,9 +870,10 @@ compare_and_jump_seq (rtx op0, rtx op1,
   return seq;
 }
 
-/* Unroll LOOP for which we are able to count number of iterations in runtime
-   LOOP->LPT_DECISION.TIMES times.  The transformation does this (with some
-   extra care for case n < 0):
+/* Unroll LOOP for which we are able to count number of iterations in
+   runtime LOOP->LPT_DECISION.TIMES times.  The times value must be a
+   power of two.  The transformation does this (with some extra care
+   for case n < 0):
 
    for (i = 0; i < n; i++)
      body;
@@ -1142,6 +1170,9 @@ decide_unroll_stupid (struct loop *loop,
   if (targetm.loop_unroll_adjust)
     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 
+  if (loop->unroll)
+    nunroll = loop->unroll;
+
   /* Skip big loops.  */
   if (nunroll <= 1)
     {
Index: lto-streamer-in.c
===================================================================
--- lto-streamer-in.c	(revision 220084)
+++ lto-streamer-in.c	(working copy)
@@ -751,6 +751,7 @@ input_cfg (struct lto_input_block *ib, s
 
       /* Read OMP SIMD related info.  */
       loop->safelen = streamer_read_hwi (ib);
+      loop->unroll = streamer_read_hwi (ib);
       loop->dont_vectorize = streamer_read_hwi (ib);
       loop->force_vectorize = streamer_read_hwi (ib);
       loop->simduid = stream_read_tree (ib, data_in);
Index: lto-streamer-out.c
===================================================================
--- lto-streamer-out.c	(revision 220084)
+++ lto-streamer-out.c	(working copy)
@@ -1884,6 +1884,7 @@ output_cfg (struct output_block *ob, str
 
       /* Write OMP SIMD related info.  */
       streamer_write_hwi (ob, loop->safelen);
+      streamer_write_hwi (ob, loop->unroll);
       streamer_write_hwi (ob, loop->dont_vectorize);
       streamer_write_hwi (ob, loop->force_vectorize);
       stream_write_tree (ob, loop->simduid, true);
Index: testsuite/c-c++-common/unroll-1.c
===================================================================
--- testsuite/c-c++-common/unroll-1.c	(revision 0)
+++ testsuite/c-c++-common/unroll-1.c	(working copy)
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdisable-tree-cunroll -fdump-rtl-loop2_unroll -fdump-tree-cunrolli-details" } */
+
+void bar(int);
+
+int j;
+
+void test1()
+{
+  unsigned long m = j;
+  unsigned long i;
+
+  /* { dg-final { scan-tree-dump "loop with 9 iterations completely unrolled" "cunrolli" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "21:\(5|11\): note: loop unrolled 7 times" "loop2_unroll" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= j; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "26:\(5|11\): note: loop unrolled 3 times" "loop2_unroll" } } */
+  #pragma GCC unroll 7
+  for (unsigned long i = 1; i <= j; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "3\[31\]:3: note: loop unrolled 2 times" "loop2_unroll" } } */
+  i = 0;
+  #pragma GCC unroll 3
+  do {
+    bar(i);
+  } while (++i < 9);
+
+  #pragma GCC unroll 4+4
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+}
+
+/* { dg-final { cleanup-rtl-dump "loop2_unroll" } } */
Index: testsuite/c-c++-common/unroll-2.c
===================================================================
--- testsuite/c-c++-common/unroll-2.c	(revision 0)
+++ testsuite/c-c++-common/unroll-2.c	(working copy)
@@ -0,0 +1,68 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-loop2_unroll -fdump-tree-cunrolli-details" } */
+
+void bar(int);
+
+int j;
+
+void test1()
+{
+  unsigned long m = j;
+  unsigned long i;
+
+  /* { dg-final { scan-tree-dump "15:\[0-9\]*: note: loop turned into non-loop; it never loops"  "cunrolli" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "\(19|21\):\(5|11\): note: loop unrolled 7 times" "loop2_unroll" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= j; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "26:\[0-9\]*: note: loop unrolled 3 times" "loop2_unroll" } } */
+  #pragma GCC unroll 7
+  for (unsigned long i = 1; i <= j; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "3\[13\]:\[0-9\]*: note: loop unrolled 2 times" "loop2_unroll" } } */
+  i = 0;
+  #pragma GCC unroll 3
+  do {
+    bar(i);
+  } while (++i < 9);
+}
+
+void test2 () {
+  unsigned long m = j;
+  unsigned long i;
+
+  /* { dg-final { scan-tree-dump "\[424\]:\[0-9\]*: note: loop turned into non-loop; it never loops" "cunrolli" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= 7; ++i)
+    bar(i);
+
+  /* { dg-final { scan-tree-dump "4\[79\]:\[0-9\]*: note: loop turned into non-loop; it never loops" "cunrolli" } } */
+  #pragma GCC unroll 9
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "52:\[0-9\]*: note: loop unrolled 3 times" "loop2_unroll" } } */
+  #pragma GCC unroll 4
+  for (unsigned long i = 1; i <= 15; ++i)
+    bar(i);
+
+  /* { dg-final { scan-tree-dump "5\[79\]:\[0-9\]*: note: loop turned into non-loop; it never loops" "cunrolli" } } */
+  #pragma GCC unroll 709
+  for (unsigned long i = 1; i <= 709; ++i)
+    bar(i);
+
+  /* { dg-final { scan-tree-dump "6\[24\]:\[0-9\]*: note: not unrolling loop, user didn't want it unrolled completely" "cunrolli" } } */
+  #pragma GCC unroll 0
+  for (unsigned long i = 1; i <= 3; ++i)
+    bar(i);
+}
+
+
+/* { dg-final { cleanup-tree-dump "cunrolli" } } */
+/* { dg-final { cleanup-rtl-dump "loop2_unroll" } } */
Index: testsuite/c-c++-common/unroll-3.c
===================================================================
--- testsuite/c-c++-common/unroll-3.c	(revision 0)
+++ testsuite/c-c++-common/unroll-3.c	(working copy)
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -funroll-all-loops -fdump-rtl-loop2_unroll -fdump-tree-cunrolli-details" } */
+
+void bar(int);
+
+int j;
+
+void test1()
+{
+  unsigned long m = j;
+  unsigned long i;
+
+  /* { dg-final { scan-tree-dump "16:\[0-9\]*: note: not unrolling loop, user didn't want it unrolled completely" "cunrolli" } } */
+  /* { dg-final { scan-rtl-dump "16:\[0-9\]*: note: not unrolling loop, user didn't want it unrolled" "loop2_unroll" } } */
+  #pragma GCC unroll 0
+  for (unsigned long i = 1; i <= 3; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "21:\[0-9\]*: note: not unrolling loop, user didn't want it unrolled" "loop2_unroll" } } */
+  #pragma GCC unroll 0
+  for (unsigned long i = 1; i <= m; ++i)
+    bar(i);
+}
+
+/* { dg-final { cleanup-tree-dump "cunrolli" } } */
+/* { dg-final { cleanup-rtl-dump "loop2_unroll" } } */
Index: testsuite/c-c++-common/unroll-4.c
===================================================================
--- testsuite/c-c++-common/unroll-4.c	(revision 0)
+++ testsuite/c-c++-common/unroll-4.c	(working copy)
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdisable-tree-cunroll" } */
+
+void bar(int);
+
+int j;
+
+void test1() {
+  unsigned long m = j;
+  unsigned long i;
+
+  #pragma GCC unroll 20000000000	/* { dg-error "requires an assignment-expression that evaluates to a non-negative integral constant less than or equal to" } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  #pragma GCC unroll i	/* { dg-error "requires an assignment-expression that evaluates to a non-negative integral constant less than or equal to" } */
+  /* { dg-error "cannot appear in a constant-expression|is not usable in a constant expression" "" { target c++ } 16 } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  #pragma GCC unroll n	/* { dg-error "requires an assignment-expression that evaluates to a non-negative integral constant less than or equal to" } */
+  /* { dg-error "declared" "" { target *-*-* } 21 } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  #pragma GCC unroll 1+i	/* { dg-error "requires an assignment-expression that evaluates to a non-negative integral constant less than or equal to" } */
+  /* { dg-error "cannot appear in a constant-expression|is not usable in a constant expression" "" { target c++ } 26 } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  #pragma GCC unroll  4,4		/* { dg-error "expected end of line before" } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  #pragma GCC unroll  4.2	/* { dg-error "requires an assignment-expression that evaluates to a non-negative integral constant less than or equal to" } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+}
Index: tree-cfg.c
===================================================================
--- tree-cfg.c	(revision 220084)
+++ tree-cfg.c	(working copy)
@@ -316,6 +316,10 @@ replace_loop_annotate_in_block (basic_bl
 	  loop->force_vectorize = true;
 	  cfun->has_force_vectorize_loops = true;
 	  break;
+	case annot_expr_unroll_kind:
+	  loop->unroll = (unsigned short)tree_to_shwi (gimple_call_arg (stmt,
+									2));
+	  break;
 	default:
 	  gcc_unreachable ();
 	}
@@ -365,6 +369,7 @@ replace_loop_annotate (void)
 	    case annot_expr_ivdep_kind:
 	    case annot_expr_no_vector_kind:
 	    case annot_expr_vector_kind:
+	    case annot_expr_unroll_kind:
 	      break;
 	    default:
 	      gcc_unreachable ();
@@ -3385,6 +3390,8 @@ verify_gimple_call (gcall *stmt)
   for (i = 0; i < gimple_call_num_args (stmt); ++i)
     {
       tree arg = gimple_call_arg (stmt, i);
+      if (! arg)
+	continue;
       if ((is_gimple_reg_type (TREE_TYPE (arg))
 	   && !is_gimple_val (arg))
 	  || (!is_gimple_reg_type (TREE_TYPE (arg))
@@ -7512,6 +7519,8 @@ print_loop (FILE *file, struct loop *loo
       fprintf (file, ", estimate = ");
       print_decu (loop->nb_iterations_estimate, file);
     }
+  if (loop->unroll)
+    fprintf (file, ", unroll = %d", loop->unroll);
   fprintf (file, ")\n");
 
   /* Print loop's body.  */
Index: tree-core.h
===================================================================
--- tree-core.h	(revision 220084)
+++ tree-core.h	(working copy)
@@ -725,6 +725,7 @@ enum annot_expr_kind {
   annot_expr_ivdep_kind,
   annot_expr_no_vector_kind,
   annot_expr_vector_kind,
+  annot_expr_unroll_kind,
   annot_expr_kind_last
 };
 
Index: tree-pretty-print.c
===================================================================
--- tree-pretty-print.c	(revision 220084)
+++ tree-pretty-print.c	(working copy)
@@ -2313,6 +2313,10 @@ dump_generic_node (pretty_printer *pp, t
 	case annot_expr_vector_kind:
 	  pp_string (pp, ", vector");
 	  break;
+	case annot_expr_unroll_kind:
+	  pp_printf (pp, ", unroll %d",
+		     (int)TREE_INT_CST_LOW (TREE_OPERAND (node, 2)));
+	  break;
 	default:
 	  gcc_unreachable ();
 	}
Index: tree-ssa-loop-ivcanon.c
===================================================================
--- tree-ssa-loop-ivcanon.c	(revision 220084)
+++ tree-ssa-loop-ivcanon.c	(working copy)
@@ -686,8 +686,7 @@ try_unroll_loop_completely (struct loop
 			    HOST_WIDE_INT maxiter,
 			    location_t locus)
 {
-  unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns;
-  struct loop_size size;
+  unsigned HOST_WIDE_INT n_unroll = 0;
   bool n_unroll_found = false;
   edge edge_to_cancel = NULL;
   int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
@@ -731,7 +730,8 @@ try_unroll_loop_completely (struct loop
   if (!n_unroll_found)
     return false;
 
-  if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
+  if (loop->unroll == 0 &&
+      n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "Not unrolling loop %d "
@@ -753,107 +753,130 @@ try_unroll_loop_completely (struct loop
       if (ul == UL_SINGLE_ITER)
 	return false;
 
-      large = tree_estimate_loop_size
-		 (loop, exit, edge_to_cancel, &size,
-		  PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
-      ninsns = size.overall;
-      if (large)
+      if (loop->unroll)
 	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
-		     loop->num);
-	  return false;
+	  /* If they wanted to unroll more than we want, don't unroll
+	     it completely.  */
+	  if (n_unroll > (unsigned)loop->unroll)
+	    {
+	      dump_printf_loc (report_flags, locus,
+	        "not unrolling loop, "
+		"user didn't want it unrolled completely.\n");
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file,
+		  "Not unrolling loop %d: "
+		  "user didn't want it unrolled completely.\n",
+			 loop->num);
+	      return false;
+	    }
 	}
-
-      unr_insns = estimated_unrolled_size (&size, n_unroll);
-      if (dump_file && (dump_flags & TDF_DETAILS))
+      else
 	{
-	  fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
-	  fprintf (dump_file, "  Estimated size after unrolling: %d\n",
-		   (int) unr_insns);
-	}
+	  struct loop_size size;
+	  large = tree_estimate_loop_size
+	            (loop, exit, edge_to_cancel, &size,
+		     PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
+	  unsigned HOST_WIDE_INT ninsns = size.overall;
+	  if (large)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
+			 loop->num);
+	      return false;
+	    }
 
-      /* If the code is going to shrink, we don't need to be extra cautious
-	 on guessing if the unrolling is going to be profitable.  */
-      if (unr_insns
-	  /* If there is IV variable that will become constant, we save
-	     one instruction in the loop prologue we do not account
-	     otherwise.  */
-	  <= ninsns + (size.constant_iv != false))
-	;
-      /* We unroll only inner loops, because we do not consider it profitable
-	 otheriwse.  We still can cancel loopback edge of not rolling loop;
-	 this is always a good idea.  */
-      else if (ul == UL_NO_GROWTH)
-	{
+	  unsigned HOST_WIDE_INT unr_insns
+	    = estimated_unrolled_size (&size, n_unroll);
 	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
-		     loop->num);
-	  return false;
-	}
-      /* Outer loops tend to be less interesting candidates for complete
-	 unrolling unless we can do a lot of propagation into the inner loop
-	 body.  For now we disable outer loop unrolling when the code would
-	 grow.  */
-      else if (loop->inner)
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     "it is not innermost and code would grow.\n",
-		     loop->num);
-	  return false;
-	}
-      /* If there is call on a hot path through the loop, then
-	 there is most probably not much to optimize.  */
-      else if (size.num_non_pure_calls_on_hot_path)
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     "contains call and code would grow.\n",
-		     loop->num);
-	  return false;
-	}
-      /* If there is pure/const call in the function, then we
-	 can still optimize the unrolled loop body if it contains
-	 some other interesting code than the calls and code
-	 storing or cumulating the return value.  */
-      else if (size.num_pure_calls_on_hot_path
-	       /* One IV increment, one test, one ivtmp store
-		  and one useful stmt.  That is about minimal loop
-		  doing pure call.  */
-	       && (size.non_call_stmts_on_hot_path
-		   <= 3 + size.num_pure_calls_on_hot_path))
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     "contains just pure calls and code would grow.\n",
-		     loop->num);
-	  return false;
-	}
-      /* Complette unrolling is major win when control flow is removed and
-	 one big basic block is created.  If the loop contains control flow
-	 the optimization may still be a win because of eliminating the loop
-	 overhead but it also may blow the branch predictor tables.
-	 Limit number of branches on the hot path through the peeled
-	 sequence.  */
-      else if (size.num_branches_on_hot_path * (int)n_unroll
-	       > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     " number of branches on hot path in the unrolled sequence"
-		     " reach --param max-peel-branches limit.\n",
-		     loop->num);
-	  return false;
-	}
-      else if (unr_insns
-	       > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     "(--param max-completely-peeled-insns limit reached).\n",
-		     loop->num);
-	  return false;
+	    {
+	      fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
+	      fprintf (dump_file, "  Estimated size after unrolling: %d\n",
+		       (int) unr_insns);
+	    }
+
+	  /* If the code is going to shrink, we don't need to be extra
+	     cautious on guessing if the unrolling is going to be
+	     profitable.  */
+	  if (unr_insns
+	      /* If there is IV variable that will become constant, we
+		 save one instruction in the loop prologue we do not
+		 account otherwise.  */
+	      <= ninsns + (size.constant_iv != false))
+	    ;
+	  /* We unroll only inner loops, because we do not consider it
+	     profitable otherwise.  We still can cancel loopback edge
+	     of not rolling loop; this is always a good idea.  */
+	  else if (ul == UL_NO_GROWTH)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
+			 loop->num);
+	      return false;
+	    }
+	  /* Outer loops tend to be less interesting candidates for
+	     complete unrolling unless we can do a lot of propagation
+	     into the inner loop body.  For now we disable outer loop
+	     unrolling when the code would grow.  */
+	  else if (loop->inner)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "it is not innermost and code would grow.\n",
+			 loop->num);
+	      return false;
+	    }
+	  /* If there is call on a hot path through the loop, then
+	     there is most probably not much to optimize.  */
+	  else if (size.num_non_pure_calls_on_hot_path)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "contains call and code would grow.\n",
+			 loop->num);
+	      return false;
+	    }
+	  /* If there is pure/const call in the function, then we can
+	     still optimize the unrolled loop body if it contains some
+	     other interesting code than the calls and code storing or
+	     cumulating the return value.  */
+	  else if (size.num_pure_calls_on_hot_path
+		   /* One IV increment, one test, one ivtmp store and
+		      one useful stmt.  That is about minimal loop
+		      doing pure call.  */
+		   && (size.non_call_stmts_on_hot_path
+		       <= 3 + size.num_pure_calls_on_hot_path))
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "contains just pure calls and code would grow.\n",
+			 loop->num);
+	      return false;
+	    }
+	  /* Complete unrolling is major win when control flow is
+	     removed and one big basic block is created.  If the loop
+	     contains control flow the optimization may still be a win
+	     because of eliminating the loop overhead but it also may
+	     blow the branch predictor tables.  Limit number of
+	     branches on the hot path through the peeled sequence.  */
+	  else if (size.num_branches_on_hot_path * (int)n_unroll
+		   > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 " number of branches on hot path in the unrolled sequence"
+			 " reach --param max-peel-branches limit.\n",
+			 loop->num);
+	      return false;
+	    }
+	  else if (unr_insns
+		   > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "(--param max-completely-peeled-insns limit reached).\n",
+			 loop->num);
+	      return false;
+	    }
 	}
       dump_printf_loc (report_flags, locus,
                        "loop turned into non-loop; it never loops.\n");
@@ -897,8 +920,9 @@ try_unroll_loop_completely (struct loop
       else
 	gimple_cond_make_true (cond);
       update_stmt (cond);
-      /* Do not remove the path. Doing so may remove outer loop
-	 and confuse bookkeeping code in tree_unroll_loops_completelly.  */
+      /* Do not remove the path. Doing so may remove outer loop and
+	 confuse bookkeeping code in
+	 tree_unroll_loops_completelly.  */
     }
 
   /* Store the loop for later unlooping and exit removal.  */
@@ -974,23 +998,33 @@ try_peel_loop (struct loop *loop,
   if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0)
     return false;
 
+  /* We don't peel loops that will be unrolled as this can duplicate a
+     loop more times than the user requested.  */
+  if (loop->unroll)
+    {
+      if (dump_file)
+        fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
+      return false;
+    }
+
   /* Peel only innermost loops.  */
   if (loop->inner)
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: outer loop\n");
+	fprintf (dump_file, "Not peeling: outer loop\n");
       return false;
     }
 
   if (!optimize_loop_for_speed_p (loop))
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: cold loop\n");
+	fprintf (dump_file, "Not peeling: cold loop\n");
       return false;
     }
 
   /* Check if there is an estimate on the number of iterations.  */
   npeel = estimated_loop_iterations_int (loop);
+
   if (npeel < 0)
     {
       if (dump_file)
@@ -998,10 +1032,11 @@ try_peel_loop (struct loop *loop,
 	         "estimated\n");
       return false;
     }
+
   if (maxiter >= 0 && maxiter <= npeel)
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: upper bound is known so can "
+	fprintf (dump_file, "Not peeling: upper bound is known so can "
 		 "unroll completely\n");
       return false;
     }
@@ -1012,7 +1047,7 @@ try_peel_loop (struct loop *loop,
   if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: rolls too much "
+	fprintf (dump_file, "Not peeling: rolls too much "
 		 "(%i + 1 > --param max-peel-times)\n", npeel);
       return false;
     }
@@ -1025,7 +1060,7 @@ try_peel_loop (struct loop *loop,
       > PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: peeled sequence size is too large "
+	fprintf (dump_file, "Not peeling: peeled sequence size is too large "
 		 "(%i insns > --param max-peel-insns)", peeled_size);
       return false;
     }
@@ -1302,7 +1337,9 @@ tree_unroll_loops_completely_1 (bool may
   if (!loop_father)
     return false;
 
-  if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
+  if (loop->unroll > 1)
+    ul = UL_ALL;
+  else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
       /* Unroll outermost loops only if asked to do so or they do
 	 not cause code growth.  */
       && (unroll_outer || loop_outer (loop_father)))
@@ -1539,7 +1576,9 @@ public:
   {}
 
   /* opt_pass methods: */
-  virtual bool gate (function *) { return optimize >= 2; }
+  virtual bool gate (function *) {
+    return optimize >= 2 || cfun->has_unroll;
+  }
   virtual unsigned int execute (function *);
 
 }; // class pass_complete_unrolli
Index: tree.def
===================================================================
--- tree.def	(revision 220084)
+++ tree.def	(working copy)
@@ -1365,8 +1365,9 @@ DEFTREECODE (TARGET_OPTION_NODE, "target
 
 /* ANNOTATE_EXPR.
    Operand 0 is the expression to be annotated.
-   Operand 1 is the annotation kind.  */
-DEFTREECODE (ANNOTATE_EXPR, "annotate_expr", tcc_expression, 2)
+   Operand 1 is the annotation kind.
+   Operand 2 is optional data.  */
+DEFTREECODE (ANNOTATE_EXPR, "annotate_expr", tcc_expression, 3)
 
 /* Cilk spawn statement
    Operand 0 is the CALL_EXPR.  */

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: #pragma GCC unroll support
  2015-01-30  6:57       ` Mike Stump
@ 2015-01-30 17:46         ` Joseph Myers
  2015-01-30 17:52           ` Mike Stump
  0 siblings, 1 reply; 25+ messages in thread
From: Joseph Myers @ 2015-01-30 17:46 UTC (permalink / raw)
  To: Mike Stump; +Cc: Richard Biener, gcc-patches Patches, Jason Merrill

On Thu, 29 Jan 2015, Mike Stump wrote:

> @@ -5587,6 +5600,12 @@ c_parser_for_statement (c_parser *parser
>  				  "%<GCC ivdep%> pragma");
>  		  cond = error_mark_node;
>  		}
> +	      else if (unroll)
> +		{
> +		  c_parser_error (parser, "missing loop condition in loop with "
> +				  "%<GCC unroll%> pragma");

c_parser_error is for errors with " before <token>" added to them.

It's not clear that's appropriate with this message; it's generally for 
"expected <token>" type messages, not for messages where something has 
been parsed but is semantically wrong.  (Actually, with carets and column 
numbers, it's not clear how useful that style of message from the C and 
C++ parsers is at all now, and it's not i18n-friendly; see bug 18248.  It 
may be something that, like reconstructing text approximating a complex 
expression in order to include it in a diagnostic, should now be phased 
out in favour of other approaches.)

> +    {
> +      char msgid[150];
> +      char num[10];
> +      strcpy (msgid, "%<#pragma GCC unroll%> requires an assignment-expression"
> +	      " that evaluates to a non-negative integral constant less than"
> +	      " or equal to ");
> +      sprintf (num, "%u", USHRT_MAX);
> +      strcat (msgid, num);
> +      c_parser_error (parser, msgid);

In this case, it seems clearly inappropriate, and the building up a 
message with sprintf is a further i18n problem.  Use error_at, and %u 
directly in the format.

> +	    c_parser_error (parser, "for, while or do statement expected");

This is the sort of case where c_parser_error *is* appropriate (as long as 
we use that error style).

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: #pragma GCC unroll support
  2015-01-30 17:46         ` Joseph Myers
@ 2015-01-30 17:52           ` Mike Stump
  2015-01-30 18:06             ` Marek Polacek
  2015-03-05 23:13             ` #pragma GCC unroll support Mike Stump
  0 siblings, 2 replies; 25+ messages in thread
From: Mike Stump @ 2015-01-30 17:52 UTC (permalink / raw)
  To: Joseph Myers; +Cc: Richard Biener, gcc-patches Patches, Jason Merrill

[-- Attachment #1: Type: text/plain, Size: 130 bytes --]

On Jan 30, 2015, at 7:49 AM, Joseph Myers <joseph@codesourcery.com> wrote:
> Use error_at, and %u directly in the format.

Done.


[-- Attachment #2: unroll-5.diffs.txt --]
[-- Type: text/plain, Size: 62603 bytes --]

Index: ada/gcc-interface/trans.c
===================================================================
--- ada/gcc-interface/trans.c	(revision 220084)
+++ ada/gcc-interface/trans.c	(working copy)
@@ -7870,17 +7870,20 @@ gnat_gimplify_stmt (tree *stmt_p)
 	  {
 	    /* Deal with the optimization hints.  */
 	    if (LOOP_STMT_IVDEP (stmt))
-	      gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
+	      gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
 				 build_int_cst (integer_type_node,
-						annot_expr_ivdep_kind));
+						annot_expr_ivdep_kind),
+				 NULL_TREE);
 	    if (LOOP_STMT_NO_VECTOR (stmt))
-	      gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
+	      gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
 				 build_int_cst (integer_type_node,
-						annot_expr_no_vector_kind));
+						annot_expr_no_vector_kind),
+				 NULL_TREE);
 	    if (LOOP_STMT_VECTOR (stmt))
-	      gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
+	      gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
 				 build_int_cst (integer_type_node,
-						annot_expr_vector_kind));
+						annot_expr_vector_kind),
+				 NULL_TREE);
 
 	    gnu_cond
 	      = build3 (COND_EXPR, void_type_node, gnu_cond, NULL_TREE,
Index: c/c-parser.c
===================================================================
--- c/c-parser.c	(revision 220084)
+++ c/c-parser.c	(working copy)
@@ -1217,9 +1217,9 @@ static void c_parser_statement (c_parser
 static void c_parser_statement_after_labels (c_parser *);
 static void c_parser_if_statement (c_parser *);
 static void c_parser_switch_statement (c_parser *);
-static void c_parser_while_statement (c_parser *, bool);
-static void c_parser_do_statement (c_parser *, bool);
-static void c_parser_for_statement (c_parser *, bool);
+static void c_parser_while_statement (c_parser *, bool, unsigned short);
+static void c_parser_do_statement (c_parser *, bool, unsigned short);
+static void c_parser_for_statement (c_parser *, bool, unsigned short);
 static tree c_parser_asm_statement (c_parser *);
 static tree c_parser_asm_operands (c_parser *);
 static tree c_parser_asm_goto_operands (c_parser *);
@@ -4972,13 +4972,13 @@ c_parser_statement_after_labels (c_parse
 	  c_parser_switch_statement (parser);
 	  break;
 	case RID_WHILE:
-	  c_parser_while_statement (parser, false);
+	  c_parser_while_statement (parser, false, 0);
 	  break;
 	case RID_DO:
-	  c_parser_do_statement (parser, false);
+	  c_parser_do_statement (parser, false, 0);
 	  break;
 	case RID_FOR:
-	  c_parser_for_statement (parser, false);
+	  c_parser_for_statement (parser, false, 0);
 	  break;
 	case RID_CILK_FOR:
 	  if (!flag_cilkplus)
@@ -5340,7 +5340,7 @@ c_parser_switch_statement (c_parser *par
 */
 
 static void
-c_parser_while_statement (c_parser *parser, bool ivdep)
+c_parser_while_statement (c_parser *parser, bool ivdep, unsigned short unroll)
 {
   tree block, cond, body, save_break, save_cont;
   location_t loc;
@@ -5354,9 +5354,15 @@ c_parser_while_statement (c_parser *pars
 	 "%<_Cilk_spawn%> statement cannot be used as a condition for while statement"))
     cond = error_mark_node;
   if (ivdep && cond != error_mark_node)
-    cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
 		   build_int_cst (integer_type_node,
-		   annot_expr_ivdep_kind));
+				  annot_expr_ivdep_kind),
+		   NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+		   build_int_cst (integer_type_node,
+				  annot_expr_unroll_kind),
+		   build_int_cst (integer_type_node, unroll));
   save_break = c_break_label;
   c_break_label = NULL_TREE;
   save_cont = c_cont_label;
@@ -5375,7 +5381,7 @@ c_parser_while_statement (c_parser *pars
 */
 
 static void
-c_parser_do_statement (c_parser *parser, bool ivdep)
+c_parser_do_statement (c_parser *parser, bool ivdep, unsigned short unroll)
 {
   tree block, cond, body, save_break, save_cont, new_break, new_cont;
   location_t loc;
@@ -5403,9 +5409,16 @@ c_parser_do_statement (c_parser *parser,
 	 "%<_Cilk_spawn%> statement cannot be used as a condition for a do-while statement"))
     cond = error_mark_node;
   if (ivdep && cond != error_mark_node)
-    cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+		   build_int_cst (integer_type_node,
+				  annot_expr_ivdep_kind),
+		   NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
 		   build_int_cst (integer_type_node,
-		   annot_expr_ivdep_kind));
+				  annot_expr_unroll_kind),
+		   build_int_cst (integer_type_node,
+				  unroll));
   if (!c_parser_require (parser, CPP_SEMICOLON, "expected %<;%>"))
     c_parser_skip_to_end_of_block_or_statement (parser);
   c_finish_loop (loc, cond, NULL, body, new_break, new_cont, false);
@@ -5469,7 +5482,7 @@ c_parser_do_statement (c_parser *parser,
 */
 
 static void
-c_parser_for_statement (c_parser *parser, bool ivdep)
+c_parser_for_statement (c_parser *parser, bool ivdep, unsigned short unroll)
 {
   tree block, cond, incr, save_break, save_cont, body;
   /* The following are only used when parsing an ObjC foreach statement.  */
@@ -5587,6 +5600,12 @@ c_parser_for_statement (c_parser *parser
 				  "%<GCC ivdep%> pragma");
 		  cond = error_mark_node;
 		}
+	      else if (unroll)
+		{
+		  c_parser_error (parser, "missing loop condition in loop with "
+				  "%<GCC unroll%> pragma");
+		  cond = error_mark_node;
+		}
 	      else
 		{
 		  c_parser_consume_token (parser);
@@ -5604,9 +5623,15 @@ c_parser_for_statement (c_parser *parser
 					 "expected %<;%>");
 	    }
 	  if (ivdep && cond != error_mark_node)
-	    cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+	    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+			   build_int_cst (integer_type_node,
+					  annot_expr_ivdep_kind),
+			   NULL_TREE);
+	  if (unroll && cond != error_mark_node)
+	    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
 			   build_int_cst (integer_type_node,
-			   annot_expr_ivdep_kind));
+					  annot_expr_unroll_kind),
+			   build_int_cst (integer_type_node, unroll));
 	}
       /* Parse the increment expression (the third expression in a
 	 for-statement).  In the case of a foreach-statement, this is
@@ -9592,6 +9617,45 @@ c_parser_objc_at_dynamic_declaration (c_
 }
 
 \f
+static bool
+c_parse_pragma_ivdep (c_parser *parser)
+{
+  c_parser_consume_pragma (parser);
+  c_parser_skip_to_pragma_eol (parser);
+  return true;
+}
+
+static unsigned short
+c_parser_pragma_unroll (c_parser *parser)
+{
+  unsigned short unroll;
+  c_parser_consume_pragma (parser);
+  location_t location = c_parser_peek_token (parser)->location;
+  tree expr = c_parser_expr_no_commas (parser, NULL).value;
+  mark_exp_read (expr);
+  expr = c_fully_fold (expr, false, NULL);
+  HOST_WIDE_INT lunroll = 0;
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (expr))
+      || TREE_CODE (expr) != INTEGER_CST
+      || (lunroll = tree_to_shwi (expr)) < 0
+      || lunroll > USHRT_MAX)
+    {
+      error_at (location, "%<#pragma GCC unroll%> requires an"
+		" assignment-expression that evaluates to a non-negative"
+		" integral constant less than or equal to %u", USHRT_MAX);
+      unroll = 0;
+    }
+  else
+    {
+      unroll = (unsigned short) lunroll;
+      if (unroll == 0)
+	unroll = 1;
+    }
+
+  c_parser_skip_to_pragma_eol (parser);
+  return unroll;
+}
+
 /* Handle pragmas.  Some OpenMP pragmas are associated with, and therefore
    should be considered, statements.  ALLOW_STMT is true if we're within
    the context of a function and such pragmas are to be allowed.  Returns
@@ -9714,21 +9778,46 @@ c_parser_pragma (c_parser *parser, enum
       c_parser_omp_declare (parser, context);
       return false;
     case PRAGMA_IVDEP:
-      c_parser_consume_pragma (parser);
-      c_parser_skip_to_pragma_eol (parser);
-      if (!c_parser_next_token_is_keyword (parser, RID_FOR)
-	  && !c_parser_next_token_is_keyword (parser, RID_WHILE)
-	  && !c_parser_next_token_is_keyword (parser, RID_DO))
-	{
-	  c_parser_error (parser, "for, while or do statement expected");
-	  return false;
-	}
-      if (c_parser_next_token_is_keyword (parser, RID_FOR))
-	c_parser_for_statement (parser, true);
-      else if (c_parser_next_token_is_keyword (parser, RID_WHILE))
-	c_parser_while_statement (parser, true);
-      else
-	c_parser_do_statement (parser, true);
+      {
+	bool ivdep = c_parse_pragma_ivdep (parser);
+	unsigned short unroll = 0;
+	if (c_parser_peek_token (parser)->pragma_kind == PRAGMA_UNROLL)
+	  unroll = c_parser_pragma_unroll (parser);
+	if (!c_parser_next_token_is_keyword (parser, RID_FOR)
+	    && !c_parser_next_token_is_keyword (parser, RID_WHILE)
+	    && !c_parser_next_token_is_keyword (parser, RID_DO))
+	  {
+	    c_parser_error (parser, "for, while or do statement expected");
+	    return false;
+	  }
+	if (c_parser_next_token_is_keyword (parser, RID_FOR))
+	  c_parser_for_statement (parser, ivdep, unroll);
+	else if (c_parser_next_token_is_keyword (parser, RID_WHILE))
+	  c_parser_while_statement (parser, ivdep, unroll);
+	else
+	  c_parser_do_statement (parser, ivdep, unroll);
+      }
+      return false;
+    case PRAGMA_UNROLL:
+      {
+	unsigned short unroll = c_parser_pragma_unroll (parser);
+	bool ivdep = false;
+	if (c_parser_peek_token (parser)->pragma_kind == PRAGMA_IVDEP)
+	  ivdep = c_parse_pragma_ivdep (parser);
+	if (!c_parser_next_token_is_keyword (parser, RID_FOR)
+	    && !c_parser_next_token_is_keyword (parser, RID_WHILE)
+	    && !c_parser_next_token_is_keyword (parser, RID_DO))
+	  {
+	    c_parser_error (parser, "for, while or do statement expected");
+	    return false;
+	  }
+	if (c_parser_next_token_is_keyword (parser, RID_FOR))
+	  c_parser_for_statement (parser, ivdep, unroll);
+	else if (c_parser_next_token_is_keyword (parser, RID_WHILE))
+	  c_parser_while_statement (parser, ivdep, unroll);
+	else
+	  c_parser_do_statement (parser, ivdep, unroll);
+      }
       return false;
 
     case PRAGMA_GCC_PCH_PREPROCESS:
Index: c-family/c-pragma.c
===================================================================
--- c-family/c-pragma.c	(revision 220084)
+++ c-family/c-pragma.c	(working copy)
@@ -1456,6 +1456,10 @@ init_pragma (void)
     cpp_register_deferred_pragma (parse_in, "GCC", "ivdep", PRAGMA_IVDEP, false,
 				  false);
 
+  if (!flag_preprocess_only)
+    cpp_register_deferred_pragma (parse_in, "GCC", "unroll", PRAGMA_UNROLL, false,
+				  false);
+
   if (flag_cilkplus && !flag_preprocess_only)
     cpp_register_deferred_pragma (parse_in, "cilk", "grainsize",
 				  PRAGMA_CILK_GRAINSIZE, true, false);
Index: c-family/c-pragma.h
===================================================================
--- c-family/c-pragma.h	(revision 220084)
+++ c-family/c-pragma.h	(working copy)
@@ -69,6 +69,7 @@ typedef enum pragma_kind {
 
   PRAGMA_GCC_PCH_PREPROCESS,
   PRAGMA_IVDEP,
+  PRAGMA_UNROLL,
 
   PRAGMA_FIRST_EXTERNAL
 } pragma_kind;
Index: cfgloop.h
===================================================================
--- cfgloop.h	(revision 220084)
+++ cfgloop.h	(working copy)
@@ -189,6 +189,11 @@ struct GTY ((chain_next ("%h.next"))) lo
      of the loop can be safely evaluated concurrently.  */
   int safelen;
 
+  /* The number of times to unroll the loop.  0, means no information
+     given, just do what we always do.  A value of 1, means don't unroll
+     the loop.  */
+  unsigned short unroll;
+
   /* True if this loop should never be vectorized.  */
   bool dont_vectorize;
 
Index: cfgloopmanip.c
===================================================================
--- cfgloopmanip.c	(revision 220084)
+++ cfgloopmanip.c	(working copy)
@@ -1038,6 +1038,7 @@ copy_loop_info (struct loop *loop, struc
   target->estimate_state = loop->estimate_state;
   target->warned_aggressive_loop_optimizations
     |= loop->warned_aggressive_loop_optimizations;
+  target->unroll = loop->unroll;
 }
 
 /* Copies copy of LOOP as subloop of TARGET loop, placing newly
Index: cp/cp-array-notation.c
===================================================================
--- cp/cp-array-notation.c	(revision 220084)
+++ cp/cp-array-notation.c	(working copy)
@@ -81,7 +81,7 @@ create_an_loop (tree init, tree cond, tr
   finish_expr_stmt (init);
   for_stmt = begin_for_stmt (NULL_TREE, NULL_TREE);
   finish_for_init_stmt (for_stmt);
-  finish_for_cond (cond, for_stmt, false);
+  finish_for_cond (cond, for_stmt, false, 0);
   finish_for_expr (incr, for_stmt);
   finish_expr_stmt (body);
   finish_for_stmt (for_stmt);
Index: cp/cp-tree.h
===================================================================
--- cp/cp-tree.h	(revision 220084)
+++ cp/cp-tree.h	(working copy)
@@ -5644,7 +5644,7 @@ extern tree implicitly_declare_fn
 extern bool maybe_clone_body			(tree);
 
 /* In parser.c */
-extern tree cp_convert_range_for (tree, tree, tree, bool);
+extern tree cp_convert_range_for (tree, tree, tree, bool, unsigned short);
 extern bool parsing_nsdmi (void);
 extern void inject_this_parameter (tree, cp_cv_quals);
 
@@ -5880,16 +5880,16 @@ extern void begin_else_clause			(tree);
 extern void finish_else_clause			(tree);
 extern void finish_if_stmt			(tree);
 extern tree begin_while_stmt			(void);
-extern void finish_while_stmt_cond		(tree, tree, bool);
+extern void finish_while_stmt_cond		(tree, tree, bool, unsigned short);
 extern void finish_while_stmt			(tree);
 extern tree begin_do_stmt			(void);
 extern void finish_do_body			(tree);
-extern void finish_do_stmt			(tree, tree, bool);
+extern void finish_do_stmt			(tree, tree, bool, unsigned short);
 extern tree finish_return_stmt			(tree);
 extern tree begin_for_scope			(tree *);
 extern tree begin_for_stmt			(tree, tree);
 extern void finish_for_init_stmt		(tree);
-extern void finish_for_cond			(tree, tree, bool);
+extern void finish_for_cond			(tree, tree, bool, unsigned short);
 extern void finish_for_expr			(tree, tree);
 extern void finish_for_stmt			(tree);
 extern tree begin_range_for_stmt		(tree, tree);
Index: cp/init.c
===================================================================
--- cp/init.c	(revision 220084)
+++ cp/init.c	(working copy)
@@ -3693,7 +3693,7 @@ build_vec_init (tree base, tree maxindex
       finish_for_init_stmt (for_stmt);
       finish_for_cond (build2 (NE_EXPR, boolean_type_node, iterator,
 			       build_int_cst (TREE_TYPE (iterator), -1)),
-		       for_stmt, false);
+		       for_stmt, false, 0);
       elt_init = cp_build_unary_op (PREDECREMENT_EXPR, iterator, 0,
 				    complain);
       if (elt_init == error_mark_node)
Index: cp/parser.c
===================================================================
--- cp/parser.c	(revision 220084)
+++ cp/parser.c	(working copy)
@@ -2044,15 +2044,15 @@ static tree cp_parser_selection_statemen
 static tree cp_parser_condition
   (cp_parser *);
 static tree cp_parser_iteration_statement
-  (cp_parser *, bool);
+  (cp_parser *, bool, unsigned short);
 static bool cp_parser_for_init_statement
   (cp_parser *, tree *decl);
 static tree cp_parser_for
-  (cp_parser *, bool);
+  (cp_parser *, bool, unsigned short);
 static tree cp_parser_c_for
-  (cp_parser *, tree, tree, bool);
+  (cp_parser *, tree, tree, bool, unsigned short);
 static tree cp_parser_range_for
-  (cp_parser *, tree, tree, tree, bool);
+  (cp_parser *, tree, tree, tree, bool, unsigned short);
 static void do_range_for_auto_deduction
   (tree, tree);
 static tree cp_parser_perform_range_for_lookup
@@ -9698,7 +9698,7 @@ cp_parser_statement (cp_parser* parser,
 	case RID_WHILE:
 	case RID_DO:
 	case RID_FOR:
-	  statement = cp_parser_iteration_statement (parser, false);
+	  statement = cp_parser_iteration_statement (parser, false, 0);
 	  break;
 
 	case RID_CILK_FOR:
@@ -10390,7 +10390,7 @@ cp_parser_condition (cp_parser* parser)
    not included. */
 
 static tree
-cp_parser_for (cp_parser *parser, bool ivdep)
+cp_parser_for (cp_parser *parser, bool ivdep, unsigned short unroll)
 {
   tree init, scope, decl;
   bool is_range_for;
@@ -10402,13 +10402,14 @@ cp_parser_for (cp_parser *parser, bool i
   is_range_for = cp_parser_for_init_statement (parser, &decl);
 
   if (is_range_for)
-    return cp_parser_range_for (parser, scope, init, decl, ivdep);
+    return cp_parser_range_for (parser, scope, init, decl, ivdep, unroll);
   else
-    return cp_parser_c_for (parser, scope, init, ivdep);
+    return cp_parser_c_for (parser, scope, init, ivdep, unroll);
 }
 
 static tree
-cp_parser_c_for (cp_parser *parser, tree scope, tree init, bool ivdep)
+cp_parser_c_for (cp_parser *parser, tree scope, tree init, bool ivdep,
+		 unsigned short unroll)
 {
   /* Normal for loop */
   tree condition = NULL_TREE;
@@ -10429,7 +10430,13 @@ cp_parser_c_for (cp_parser *parser, tree
 		       "%<GCC ivdep%> pragma");
       condition = error_mark_node;
     }
-  finish_for_cond (condition, stmt, ivdep);
+  else if (unroll)
+    {
+      cp_parser_error (parser, "missing loop condition in loop with "
+		       "%<GCC unroll%> pragma");
+      condition = error_mark_node;
+    }
+  finish_for_cond (condition, stmt, ivdep, unroll);
   /* Look for the `;'.  */
   cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON);
 
@@ -10453,7 +10460,7 @@ cp_parser_c_for (cp_parser *parser, tree
 
 static tree
 cp_parser_range_for (cp_parser *parser, tree scope, tree init, tree range_decl,
-		     bool ivdep)
+		     bool ivdep, unsigned short unroll)
 {
   tree stmt, range_expr;
 
@@ -10474,6 +10481,8 @@ cp_parser_range_for (cp_parser *parser,
       stmt = begin_range_for_stmt (scope, init);
       if (ivdep)
 	RANGE_FOR_IVDEP (stmt) = 1;
+      if (unroll)
+	/* TODO */(void)0;
       finish_range_for_decl (stmt, range_decl, range_expr);
       if (!type_dependent_expression_p (range_expr)
 	  /* do_auto_deduction doesn't mess with template init-lists.  */
@@ -10483,7 +10492,7 @@ cp_parser_range_for (cp_parser *parser,
   else
     {
       stmt = begin_for_stmt (scope, init);
-      stmt = cp_convert_range_for (stmt, range_decl, range_expr, ivdep);
+      stmt = cp_convert_range_for (stmt, range_decl, range_expr, ivdep, unroll);
     }
   return stmt;
 }
@@ -10575,7 +10584,7 @@ do_range_for_auto_deduction (tree decl,
 
 tree
 cp_convert_range_for (tree statement, tree range_decl, tree range_expr,
-		      bool ivdep)
+		      bool ivdep, unsigned short unroll)
 {
   tree begin, end;
   tree iter_type, begin_expr, end_expr;
@@ -10632,7 +10641,7 @@ cp_convert_range_for (tree statement, tr
 				 begin, ERROR_MARK,
 				 end, ERROR_MARK,
 				 NULL, tf_warning_or_error);
-  finish_for_cond (condition, statement, ivdep);
+  finish_for_cond (condition, statement, ivdep, unroll);
 
   /* The new increment expression.  */
   expression = finish_unary_op_expr (input_location,
@@ -10793,7 +10802,8 @@ cp_parser_range_for_member_function (tre
    Returns the new WHILE_STMT, DO_STMT, FOR_STMT or RANGE_FOR_STMT.  */
 
 static tree
-cp_parser_iteration_statement (cp_parser* parser, bool ivdep)
+cp_parser_iteration_statement (cp_parser* parser, bool ivdep,
+			       unsigned short unroll)
 {
   cp_token *token;
   enum rid keyword;
@@ -10823,7 +10833,7 @@ cp_parser_iteration_statement (cp_parser
 	cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN);
 	/* Parse the condition.  */
 	condition = cp_parser_condition (parser);
-	finish_while_stmt_cond (condition, statement, ivdep);
+	finish_while_stmt_cond (condition, statement, ivdep, unroll);
 	/* Look for the `)'.  */
 	cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN);
 	/* Parse the dependent statement.  */
@@ -10853,7 +10863,7 @@ cp_parser_iteration_statement (cp_parser
 	/* Parse the expression.  */
 	expression = cp_parser_expression (parser);
 	/* We're done with the do-statement.  */
-	finish_do_stmt (expression, statement, ivdep);
+	finish_do_stmt (expression, statement, ivdep, unroll);
 	/* Look for the `)'.  */
 	cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN);
 	/* Look for the `;'.  */
@@ -10866,7 +10876,7 @@ cp_parser_iteration_statement (cp_parser
 	/* Look for the `('.  */
 	cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN);
 
-	statement = cp_parser_for (parser, ivdep);
+	statement = cp_parser_for (parser, ivdep, unroll);
 
 	/* Look for the `)'.  */
 	cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN);
@@ -32901,6 +32911,41 @@ cp_parser_cilk_grainsize (cp_parser *par
   cp_parser_skip_to_pragma_eol (parser, pragma_tok);
 }
 
+static bool
+cp_parser_pragma_ivdep (cp_parser *parser, cp_token *pragma_tok)
+{
+  cp_parser_skip_to_pragma_eol (parser, pragma_tok);
+  return true;
+}
+
+static unsigned short
+cp_parser_pragma_unroll (cp_parser *parser, cp_token *pragma_tok)
+{
+  location_t location = cp_lexer_peek_token (parser->lexer)->location;
+  tree expr = cp_parser_constant_expression (parser);
+  unsigned short unroll;
+  expr = maybe_constant_value (expr);
+  cp_parser_require_pragma_eol (parser, pragma_tok);
+  HOST_WIDE_INT lunroll = 0;
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (expr))
+      || TREE_CODE (expr) != INTEGER_CST
+      || (lunroll = tree_to_shwi (expr)) < 0
+      || lunroll > USHRT_MAX)
+    {
+      error_at (location, "%<#pragma GCC unroll%> requires an"
+		" assignment-expression that evaluates to a non-negative"
+		" integral constant less than or equal to %u", USHRT_MAX);
+      unroll = 0;
+    }
+  else
+    {
+      unroll = (unsigned short) lunroll;
+      if (unroll == 0)
+	unroll = 1;
+    }
+  return unroll;
+}
+
 /* Normal parsing of a pragma token.  Here we can (and must) use the
    regular lexer.  */
 
@@ -33068,9 +33113,39 @@ cp_parser_pragma (cp_parser *parser, enu
 
     case PRAGMA_IVDEP:
       {
-	cp_parser_skip_to_pragma_eol (parser, pragma_tok);
+	bool ivdep = cp_parser_pragma_ivdep (parser, pragma_tok);
+	unsigned short unroll = 0;
 	cp_token *tok;
 	tok = cp_lexer_peek_token (the_parser->lexer);
+	if (tok->type == CPP_PRAGMA &&
+	    tok->pragma_kind == PRAGMA_UNROLL)
+	  {
+	    unroll = cp_parser_pragma_unroll (parser, pragma_tok);
+	    tok = cp_lexer_peek_token (the_parser->lexer);
+	  }
+	if (tok->type != CPP_KEYWORD
+	    || (tok->keyword != RID_FOR && tok->keyword != RID_WHILE
+		&& tok->keyword != RID_DO))
+	  {
+	    cp_parser_error (parser, "for, while or do statement expected");
+	    return false;
+	  }
+	cp_parser_iteration_statement (parser, ivdep, unroll);
+	return true;
+      }
+
+    case PRAGMA_UNROLL:
+      {
+	unsigned short unroll = cp_parser_pragma_unroll (parser, pragma_tok);
+	bool ivdep = false;
+	cp_token *tok;
+	tok = cp_lexer_peek_token (the_parser->lexer);
+	if (tok->type == CPP_PRAGMA &&
+	    tok->pragma_kind == PRAGMA_IVDEP)
+	  {
+	    ivdep = cp_parser_pragma_ivdep (parser, tok);
+	    tok = cp_lexer_peek_token (the_parser->lexer);
+	  }
 	if (tok->type != CPP_KEYWORD
 	    || (tok->keyword != RID_FOR && tok->keyword != RID_WHILE
 		&& tok->keyword != RID_DO))
@@ -33078,7 +33153,7 @@ cp_parser_pragma (cp_parser *parser, enu
 	    cp_parser_error (parser, "for, while or do statement expected");
 	    return false;
 	  }
-	cp_parser_iteration_statement (parser, true);
+	cp_parser_iteration_statement (parser, ivdep, unroll);
 	return true;
       }
 
Index: cp/pt.c
===================================================================
--- cp/pt.c	(revision 220084)
+++ cp/pt.c	(working copy)
@@ -13886,7 +13886,7 @@ tsubst_expr (tree t, tree args, tsubst_f
       RECUR (FOR_INIT_STMT (t));
       finish_for_init_stmt (stmt);
       tmp = RECUR (FOR_COND (t));
-      finish_for_cond (tmp, stmt, false);
+      finish_for_cond (tmp, stmt, false, 0);
       tmp = RECUR (FOR_EXPR (t));
       finish_for_expr (tmp, stmt);
       RECUR (FOR_BODY (t));
@@ -13901,7 +13901,7 @@ tsubst_expr (tree t, tree args, tsubst_f
         decl = tsubst (decl, args, complain, in_decl);
         maybe_push_decl (decl);
         expr = RECUR (RANGE_FOR_EXPR (t));
-        stmt = cp_convert_range_for (stmt, decl, expr, RANGE_FOR_IVDEP (t));
+        stmt = cp_convert_range_for (stmt, decl, expr, RANGE_FOR_IVDEP (t), 0);
         RECUR (RANGE_FOR_BODY (t));
         finish_for_stmt (stmt);
       }
@@ -13910,7 +13910,7 @@ tsubst_expr (tree t, tree args, tsubst_f
     case WHILE_STMT:
       stmt = begin_while_stmt ();
       tmp = RECUR (WHILE_COND (t));
-      finish_while_stmt_cond (tmp, stmt, false);
+      finish_while_stmt_cond (tmp, stmt, false, 0);
       RECUR (WHILE_BODY (t));
       finish_while_stmt (stmt);
       break;
@@ -13920,7 +13920,7 @@ tsubst_expr (tree t, tree args, tsubst_f
       RECUR (DO_BODY (t));
       finish_do_body (stmt);
       tmp = RECUR (DO_COND (t));
-      finish_do_stmt (tmp, stmt, false);
+      finish_do_stmt (tmp, stmt, false, 0);
       break;
 
     case IF_STMT:
@@ -14358,8 +14358,10 @@ tsubst_expr (tree t, tree args, tsubst_f
 
     case ANNOTATE_EXPR:
       tmp = RECUR (TREE_OPERAND (t, 0));
-      RETURN (build2_loc (EXPR_LOCATION (t), ANNOTATE_EXPR,
-			  TREE_TYPE (tmp), tmp, RECUR (TREE_OPERAND (t, 1))));
+      RETURN (build3_loc (EXPR_LOCATION (t), ANNOTATE_EXPR,
+			  TREE_TYPE (tmp), tmp,
+			  RECUR (TREE_OPERAND (t, 1)),
+			  RECUR (TREE_OPERAND (t, 2))));
 
     default:
       gcc_assert (!STATEMENT_CODE_P (TREE_CODE (t)));
Index: cp/semantics.c
===================================================================
--- cp/semantics.c	(revision 220084)
+++ cp/semantics.c	(working copy)
@@ -802,7 +802,8 @@ begin_while_stmt (void)
    WHILE_STMT.  */
 
 void
-finish_while_stmt_cond (tree cond, tree while_stmt, bool ivdep)
+finish_while_stmt_cond (tree cond, tree while_stmt, bool ivdep,
+			unsigned short unroll)
 {
   if (check_no_cilk (cond,
       "Cilk array notation cannot be used as a condition for while statement",
@@ -812,11 +813,19 @@ finish_while_stmt_cond (tree cond, tree
   finish_cond (&WHILE_COND (while_stmt), cond);
   begin_maybe_infinite_loop (cond);
   if (ivdep && cond != error_mark_node)
-    WHILE_COND (while_stmt) = build2 (ANNOTATE_EXPR,
+    WHILE_COND (while_stmt) = build3 (ANNOTATE_EXPR,
 				      TREE_TYPE (WHILE_COND (while_stmt)),
 				      WHILE_COND (while_stmt),
 				      build_int_cst (integer_type_node,
-						     annot_expr_ivdep_kind));
+						     annot_expr_ivdep_kind),
+				      NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    WHILE_COND (while_stmt) = build3 (ANNOTATE_EXPR,
+				      TREE_TYPE (WHILE_COND (while_stmt)),
+				      WHILE_COND (while_stmt),
+				      build_int_cst (integer_type_node,
+						     annot_expr_unroll_kind),
+				      build_int_cst (integer_type_node, unroll));
   simplify_loop_decl_cond (&WHILE_COND (while_stmt), WHILE_BODY (while_stmt));
 }
 
@@ -861,7 +870,7 @@ finish_do_body (tree do_stmt)
    COND is as indicated.  */
 
 void
-finish_do_stmt (tree cond, tree do_stmt, bool ivdep)
+finish_do_stmt (tree cond, tree do_stmt, bool ivdep, unsigned short unroll)
 {
   if (check_no_cilk (cond,
   "Cilk array notation cannot be used as a condition for a do-while statement",
@@ -870,8 +879,13 @@ finish_do_stmt (tree cond, tree do_stmt,
   cond = maybe_convert_cond (cond);
   end_maybe_infinite_loop (cond);
   if (ivdep && cond != error_mark_node)
-    cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
-		   build_int_cst (integer_type_node, annot_expr_ivdep_kind));
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+		   build_int_cst (integer_type_node, annot_expr_ivdep_kind),
+		   NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+		   build_int_cst (integer_type_node, annot_expr_unroll_kind),
+		   build_int_cst (integer_type_node, unroll));
   DO_COND (do_stmt) = cond;
 }
 
@@ -974,7 +988,7 @@ finish_for_init_stmt (tree for_stmt)
    FOR_STMT.  */
 
 void
-finish_for_cond (tree cond, tree for_stmt, bool ivdep)
+finish_for_cond (tree cond, tree for_stmt, bool ivdep, unsigned short unroll)
 {
   if (check_no_cilk (cond,
 	 "Cilk array notation cannot be used in a condition for a for-loop",
@@ -984,11 +998,20 @@ finish_for_cond (tree cond, tree for_stm
   finish_cond (&FOR_COND (for_stmt), cond);
   begin_maybe_infinite_loop (cond);
   if (ivdep && cond != error_mark_node)
-    FOR_COND (for_stmt) = build2 (ANNOTATE_EXPR,
+    FOR_COND (for_stmt) = build3 (ANNOTATE_EXPR,
 				  TREE_TYPE (FOR_COND (for_stmt)),
 				  FOR_COND (for_stmt),
 				  build_int_cst (integer_type_node,
-						 annot_expr_ivdep_kind));
+						 annot_expr_ivdep_kind),
+				  NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    FOR_COND (for_stmt) = build3 (ANNOTATE_EXPR,
+				  TREE_TYPE (FOR_COND (for_stmt)),
+				  FOR_COND (for_stmt),
+				  build_int_cst (integer_type_node,
+						 annot_expr_unroll_kind),
+				  build_int_cst (integer_type_node,
+						 unroll));
   simplify_loop_decl_cond (&FOR_COND (for_stmt), FOR_BODY (for_stmt));
 }
 
Index: doc/extend.texi
===================================================================
--- doc/extend.texi	(revision 220084)
+++ doc/extend.texi	(working copy)
@@ -17881,6 +17881,18 @@ void ignore_vec_dep (int *a, int k, int
 @}
 @end smallexample
 
+@table @code
+@item #pragma GCC unroll @var{n}
+@cindex pragma GCC unroll @var{n}
+
+With this pragma, the programmer informs the optimizer how many times
+a loop should be unrolled.  A 0 or 1 informs the compiler to not
+perform any loop unrolling.  The pragma must be immediately before
+@samp{#pragma ivdep} or a @code{for}, @code{while} or @code{do} loop
+and applies only to the loop that follows.  @var{n} is an
+assignment-expression that evaluates to an integer constant.
+
+@end table
 
 @node Unnamed Fields
 @section Unnamed struct/union fields within structs/unions
Index: fortran/trans-stmt.c
===================================================================
--- fortran/trans-stmt.c	(revision 220084)
+++ fortran/trans-stmt.c	(working copy)
@@ -2876,9 +2876,10 @@ gfc_trans_forall_loop (forall_info *fora
       cond = fold_build2_loc (input_location, LE_EXPR, boolean_type_node,
 			      count, build_int_cst (TREE_TYPE (count), 0));
       if (forall_tmp->do_concurrent)
-	cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+	cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
 		       build_int_cst (integer_type_node,
-				      annot_expr_ivdep_kind));
+				      annot_expr_ivdep_kind),
+		       NULL_TREE);
 
       tmp = build1_v (GOTO_EXPR, exit_label);
       tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node,
Index: function.h
===================================================================
--- function.h	(revision 220084)
+++ function.h	(working copy)
@@ -670,6 +670,10 @@ struct GTY(()) function {
 
   /* Set when the tail call has been identified.  */
   unsigned int tail_call_marked : 1;
+
+  /* Set when #pragma unroll has been used in the body.  Used by rtl
+     unrolling to know when to perform unrolling in the function.  */
+  unsigned int has_unroll : 1;
 };
 
 /* Add the decl D to the local_decls list of FUN.  */
Index: gimple-low.c
===================================================================
--- gimple-low.c	(revision 220084)
+++ gimple-low.c	(working copy)
@@ -347,7 +347,7 @@ lower_stmt (gimple_stmt_iterator *gsi, s
 	for (i = 0; i < gimple_call_num_args (stmt); i++)
 	  {
 	    tree arg = gimple_call_arg (stmt, i);
-	    if (EXPR_P (arg))
+	    if (arg && EXPR_P (arg))
 	      TREE_SET_BLOCK (arg, data->block);
 	  }
 
Index: gimple-walk.c
===================================================================
--- gimple-walk.c	(revision 220084)
+++ gimple-walk.c	(working copy)
@@ -261,7 +261,7 @@ walk_gimple_op (gimple stmt, walk_tree_f
 
       for (i = 0; i < gimple_call_num_args (stmt); i++)
 	{
-	  if (wi)
+	  if (wi && gimple_call_arg (stmt, i))
 	    wi->val_only
 	      = is_gimple_reg_type (TREE_TYPE (gimple_call_arg (stmt, i)));
 	  ret = walk_tree (gimple_call_arg_ptr (stmt, i), callback_op, wi,
Index: gimplify.c
===================================================================
--- gimplify.c	(revision 220084)
+++ gimplify.c	(working copy)
@@ -2908,6 +2908,9 @@ gimple_boolify (tree expr)
     case ANNOTATE_EXPR:
       switch ((enum annot_expr_kind) TREE_INT_CST_LOW (TREE_OPERAND (expr, 1)))
 	{
+	case annot_expr_unroll_kind:
+	  cfun->has_unroll = 1;
+	  /* fall-through */
 	case annot_expr_ivdep_kind:
 	case annot_expr_no_vector_kind:
 	case annot_expr_vector_kind:
@@ -7947,6 +7950,7 @@ gimplify_expr (tree *expr_p, gimple_seq
 	  {
 	    tree cond = TREE_OPERAND (*expr_p, 0);
 	    tree kind = TREE_OPERAND (*expr_p, 1);
+	    tree data = TREE_OPERAND (*expr_p, 2);
 	    tree type = TREE_TYPE (cond);
 	    if (!INTEGRAL_TYPE_P (type))
 	      {
@@ -7957,7 +7961,7 @@ gimplify_expr (tree *expr_p, gimple_seq
 	    tree tmp = create_tmp_var (type);
 	    gimplify_arg (&cond, pre_p, EXPR_LOCATION (*expr_p));
 	    gcall *call
-	      = gimple_build_call_internal (IFN_ANNOTATE, 2, cond, kind);
+	      = gimple_build_call_internal (IFN_ANNOTATE, 3, cond, kind, data);
 	    gimple_call_set_lhs (call, tmp);
 	    gimplify_seq_add_stmt (pre_p, call);
 	    *expr_p = tmp;
Index: loop-init.c
===================================================================
--- loop-init.c	(revision 220084)
+++ loop-init.c	(working copy)
@@ -375,6 +375,7 @@ pass_loop2::gate (function *fun)
       && (flag_move_loop_invariants
 	  || flag_unswitch_loops
 	  || flag_unroll_loops
+	  || cfun->has_unroll
 #ifdef HAVE_doloop_end
 	  || (flag_branch_on_count_reg && HAVE_doloop_end)
 #endif
@@ -576,7 +577,8 @@ public:
   /* opt_pass methods: */
   virtual bool gate (function *)
     {
-      return (flag_peel_loops || flag_unroll_loops || flag_unroll_all_loops);
+      return (flag_peel_loops || flag_unroll_loops || flag_unroll_all_loops
+	      || cfun->has_unroll);
     }
 
   virtual unsigned int execute (function *);
Index: loop-unroll.c
===================================================================
--- loop-unroll.c	(revision 220084)
+++ loop-unroll.c	(working copy)
@@ -243,16 +243,26 @@ report_unroll (struct loop *loop, locati
 
 /* Decide whether unroll loops and how much.  */
 static void
-decide_unrolling (int flags)
+decide_unrolling (int base_flags)
 {
   struct loop *loop;
 
   /* Scan the loops, inner ones first.  */
   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
     {
+      int flags = base_flags;
+      if (loop->unroll > 1)
+	flags = UAP_UNROLL | UAP_UNROLL_ALL;
       loop->lpt_decision.decision = LPT_NONE;
       location_t locus = get_loop_location (loop);
 
+      if (loop->unroll == 1)
+	{
+	  dump_printf_loc (TDF_RTL, locus,
+			   "not unrolling loop, user didn't want it unrolled\n");
+	  continue;
+	}
+
       if (dump_enabled_p ())
 	dump_printf_loc (TDF_RTL, locus,
                          ";; *** Considering loop %d at BB %d for "
@@ -422,6 +432,19 @@ decide_unroll_constant_iterations (struc
       return;
     }
 
+  if (loop->unroll)
+    {
+      loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
+      loop->lpt_decision.times = loop->unroll - 1;
+      if (loop->lpt_decision.times > desc->niter - 2)
+	{
+	  /* They won't do this for us.  */
+	  loop->lpt_decision.decision = LPT_NONE;
+	  loop->lpt_decision.times = desc->niter - 2;
+	}
+      return;
+    }
+
   /* Check whether the loop rolls enough to consider.  
      Consult also loop bounds and profile; in the case the loop has more
      than one exit it may well loop less than determined maximal number
@@ -443,7 +466,7 @@ decide_unroll_constant_iterations (struc
   best_copies = 2 * nunroll + 10;
 
   i = 2 * nunroll + 2;
-  if (i - 1 >= desc->niter)
+  if (i > desc->niter - 2)
     i = desc->niter - 2;
 
   for (; i >= nunroll - 1; i--)
@@ -695,6 +718,9 @@ decide_unroll_runtime_iterations (struct
   if (targetm.loop_unroll_adjust)
     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 
+  if (loop->unroll)
+    nunroll = loop->unroll;
+
   /* Skip big loops.  */
   if (nunroll <= 1)
     {
@@ -733,8 +759,9 @@ decide_unroll_runtime_iterations (struct
       return;
     }
 
-  /* Success; now force nunroll to be power of 2, as we are unable to
-     cope with overflows in computation of number of iterations.  */
+  /* Success; now force nunroll to be power of 2, as code-gen
+     requires it, we are unable to cope with overflows in
+     computation of number of iterations.  */
   for (i = 1; 2 * i <= nunroll; i *= 2)
     continue;
 
@@ -843,9 +870,10 @@ compare_and_jump_seq (rtx op0, rtx op1,
   return seq;
 }
 
-/* Unroll LOOP for which we are able to count number of iterations in runtime
-   LOOP->LPT_DECISION.TIMES times.  The transformation does this (with some
-   extra care for case n < 0):
+/* Unroll LOOP for which we are able to count number of iterations in
+   runtime LOOP->LPT_DECISION.TIMES times.  The times value must be a
+   power of two.  The transformation does this (with some extra care
+   for case n < 0):
 
    for (i = 0; i < n; i++)
      body;
@@ -1142,6 +1170,9 @@ decide_unroll_stupid (struct loop *loop,
   if (targetm.loop_unroll_adjust)
     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 
+  if (loop->unroll)
+    nunroll = loop->unroll;
+
   /* Skip big loops.  */
   if (nunroll <= 1)
     {
Index: lto-streamer-in.c
===================================================================
--- lto-streamer-in.c	(revision 220084)
+++ lto-streamer-in.c	(working copy)
@@ -751,6 +751,7 @@ input_cfg (struct lto_input_block *ib, s
 
       /* Read OMP SIMD related info.  */
       loop->safelen = streamer_read_hwi (ib);
+      loop->unroll = streamer_read_hwi (ib);
       loop->dont_vectorize = streamer_read_hwi (ib);
       loop->force_vectorize = streamer_read_hwi (ib);
       loop->simduid = stream_read_tree (ib, data_in);
Index: lto-streamer-out.c
===================================================================
--- lto-streamer-out.c	(revision 220084)
+++ lto-streamer-out.c	(working copy)
@@ -1884,6 +1884,7 @@ output_cfg (struct output_block *ob, str
 
       /* Write OMP SIMD related info.  */
       streamer_write_hwi (ob, loop->safelen);
+      streamer_write_hwi (ob, loop->unroll);
       streamer_write_hwi (ob, loop->dont_vectorize);
       streamer_write_hwi (ob, loop->force_vectorize);
       stream_write_tree (ob, loop->simduid, true);
Index: testsuite/c-c++-common/unroll-1.c
===================================================================
--- testsuite/c-c++-common/unroll-1.c	(revision 0)
+++ testsuite/c-c++-common/unroll-1.c	(working copy)
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdisable-tree-cunroll -fdump-rtl-loop2_unroll -fdump-tree-cunrolli-details" } */
+
+void bar(int);
+
+int j;
+
+void test1()
+{
+  unsigned long m = j;
+  unsigned long i;
+
+  /* { dg-final { scan-tree-dump "loop with 9 iterations completely unrolled" "cunrolli" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "21:\(5|11\): note: loop unrolled 7 times" "loop2_unroll" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= j; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "26:\(5|11\): note: loop unrolled 3 times" "loop2_unroll" } } */
+  #pragma GCC unroll 7
+  for (unsigned long i = 1; i <= j; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "3\[31\]:3: note: loop unrolled 2 times" "loop2_unroll" } } */
+  i = 0;
+  #pragma GCC unroll 3
+  do {
+    bar(i);
+  } while (++i < 9);
+
+  #pragma GCC unroll 4+4
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+}
+
+/* { dg-final { cleanup-rtl-dump "loop2_unroll" } } */
Index: testsuite/c-c++-common/unroll-2.c
===================================================================
--- testsuite/c-c++-common/unroll-2.c	(revision 0)
+++ testsuite/c-c++-common/unroll-2.c	(working copy)
@@ -0,0 +1,68 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-loop2_unroll -fdump-tree-cunrolli-details" } */
+
+void bar(int);
+
+int j;
+
+void test1()
+{
+  unsigned long m = j;
+  unsigned long i;
+
+  /* { dg-final { scan-tree-dump "15:\[0-9\]*: note: loop turned into non-loop; it never loops"  "cunrolli" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "\(19|21\):\(5|11\): note: loop unrolled 7 times" "loop2_unroll" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= j; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "26:\[0-9\]*: note: loop unrolled 3 times" "loop2_unroll" } } */
+  #pragma GCC unroll 7
+  for (unsigned long i = 1; i <= j; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "3\[13\]:\[0-9\]*: note: loop unrolled 2 times" "loop2_unroll" } } */
+  i = 0;
+  #pragma GCC unroll 3
+  do {
+    bar(i);
+  } while (++i < 9);
+}
+
+void test2 () {
+  unsigned long m = j;
+  unsigned long i;
+
+  /* { dg-final { scan-tree-dump "\[424\]:\[0-9\]*: note: loop turned into non-loop; it never loops" "cunrolli" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= 7; ++i)
+    bar(i);
+
+  /* { dg-final { scan-tree-dump "4\[79\]:\[0-9\]*: note: loop turned into non-loop; it never loops" "cunrolli" } } */
+  #pragma GCC unroll 9
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "52:\[0-9\]*: note: loop unrolled 3 times" "loop2_unroll" } } */
+  #pragma GCC unroll 4
+  for (unsigned long i = 1; i <= 15; ++i)
+    bar(i);
+
+  /* { dg-final { scan-tree-dump "5\[79\]:\[0-9\]*: note: loop turned into non-loop; it never loops" "cunrolli" } } */
+  #pragma GCC unroll 709
+  for (unsigned long i = 1; i <= 709; ++i)
+    bar(i);
+
+  /* { dg-final { scan-tree-dump "6\[24\]:\[0-9\]*: note: not unrolling loop, user didn't want it unrolled completely" "cunrolli" } } */
+  #pragma GCC unroll 0
+  for (unsigned long i = 1; i <= 3; ++i)
+    bar(i);
+}
+
+
+/* { dg-final { cleanup-tree-dump "cunrolli" } } */
+/* { dg-final { cleanup-rtl-dump "loop2_unroll" } } */
Index: testsuite/c-c++-common/unroll-3.c
===================================================================
--- testsuite/c-c++-common/unroll-3.c	(revision 0)
+++ testsuite/c-c++-common/unroll-3.c	(working copy)
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -funroll-all-loops -fdump-rtl-loop2_unroll -fdump-tree-cunrolli-details" } */
+
+void bar(int);
+
+int j;
+
+void test1()
+{
+  unsigned long m = j;
+  unsigned long i;
+
+  /* { dg-final { scan-tree-dump "16:\[0-9\]*: note: not unrolling loop, user didn't want it unrolled completely" "cunrolli" } } */
+  /* { dg-final { scan-rtl-dump "16:\[0-9\]*: note: not unrolling loop, user didn't want it unrolled" "loop2_unroll" } } */
+  #pragma GCC unroll 0
+  for (unsigned long i = 1; i <= 3; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "21:\[0-9\]*: note: not unrolling loop, user didn't want it unrolled" "loop2_unroll" } } */
+  #pragma GCC unroll 0
+  for (unsigned long i = 1; i <= m; ++i)
+    bar(i);
+}
+
+/* { dg-final { cleanup-tree-dump "cunrolli" } } */
+/* { dg-final { cleanup-rtl-dump "loop2_unroll" } } */
Index: testsuite/c-c++-common/unroll-4.c
===================================================================
--- testsuite/c-c++-common/unroll-4.c	(revision 0)
+++ testsuite/c-c++-common/unroll-4.c	(working copy)
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdisable-tree-cunroll" } */
+
+void bar(int);
+
+int j;
+
+void test1() {
+  unsigned long m = j;
+  unsigned long i;
+
+  #pragma GCC unroll 20000000000	/* { dg-error "requires an assignment-expression that evaluates to a non-negative integral constant less than or equal to" } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  #pragma GCC unroll i	/* { dg-error "requires an assignment-expression that evaluates to a non-negative integral constant less than or equal to" } */
+  /* { dg-error "cannot appear in a constant-expression|is not usable in a constant expression" "" { target c++ } 16 } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  #pragma GCC unroll n	/* { dg-error "requires an assignment-expression that evaluates to a non-negative integral constant less than or equal to" } */
+  /* { dg-error "declared" "" { target *-*-* } 21 } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  #pragma GCC unroll 1+i	/* { dg-error "requires an assignment-expression that evaluates to a non-negative integral constant less than or equal to" } */
+  /* { dg-error "cannot appear in a constant-expression|is not usable in a constant expression" "" { target c++ } 26 } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  #pragma GCC unroll  4,4		/* { dg-error "expected end of line before" } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  #pragma GCC unroll  4.2	/* { dg-error "requires an assignment-expression that evaluates to a non-negative integral constant less than or equal to" } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+}
Index: tree-cfg.c
===================================================================
--- tree-cfg.c	(revision 220084)
+++ tree-cfg.c	(working copy)
@@ -316,6 +316,10 @@ replace_loop_annotate_in_block (basic_bl
 	  loop->force_vectorize = true;
 	  cfun->has_force_vectorize_loops = true;
 	  break;
+	case annot_expr_unroll_kind:
+	  loop->unroll = (unsigned short)tree_to_shwi (gimple_call_arg (stmt,
+									2));
+	  break;
 	default:
 	  gcc_unreachable ();
 	}
@@ -365,6 +369,7 @@ replace_loop_annotate (void)
 	    case annot_expr_ivdep_kind:
 	    case annot_expr_no_vector_kind:
 	    case annot_expr_vector_kind:
+	    case annot_expr_unroll_kind:
 	      break;
 	    default:
 	      gcc_unreachable ();
@@ -3385,6 +3390,8 @@ verify_gimple_call (gcall *stmt)
   for (i = 0; i < gimple_call_num_args (stmt); ++i)
     {
       tree arg = gimple_call_arg (stmt, i);
+      if (! arg)
+	continue;
       if ((is_gimple_reg_type (TREE_TYPE (arg))
 	   && !is_gimple_val (arg))
 	  || (!is_gimple_reg_type (TREE_TYPE (arg))
@@ -7512,6 +7519,8 @@ print_loop (FILE *file, struct loop *loo
       fprintf (file, ", estimate = ");
       print_decu (loop->nb_iterations_estimate, file);
     }
+  if (loop->unroll)
+    fprintf (file, ", unroll = %d", loop->unroll);
   fprintf (file, ")\n");
 
   /* Print loop's body.  */
Index: tree-core.h
===================================================================
--- tree-core.h	(revision 220084)
+++ tree-core.h	(working copy)
@@ -725,6 +725,7 @@ enum annot_expr_kind {
   annot_expr_ivdep_kind,
   annot_expr_no_vector_kind,
   annot_expr_vector_kind,
+  annot_expr_unroll_kind,
   annot_expr_kind_last
 };
 
Index: tree-pretty-print.c
===================================================================
--- tree-pretty-print.c	(revision 220084)
+++ tree-pretty-print.c	(working copy)
@@ -2313,6 +2313,10 @@ dump_generic_node (pretty_printer *pp, t
 	case annot_expr_vector_kind:
 	  pp_string (pp, ", vector");
 	  break;
+	case annot_expr_unroll_kind:
+	  pp_printf (pp, ", unroll %d",
+		     (int)TREE_INT_CST_LOW (TREE_OPERAND (node, 2)));
+	  break;
 	default:
 	  gcc_unreachable ();
 	}
Index: tree-ssa-loop-ivcanon.c
===================================================================
--- tree-ssa-loop-ivcanon.c	(revision 220084)
+++ tree-ssa-loop-ivcanon.c	(working copy)
@@ -686,8 +686,7 @@ try_unroll_loop_completely (struct loop
 			    HOST_WIDE_INT maxiter,
 			    location_t locus)
 {
-  unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns;
-  struct loop_size size;
+  unsigned HOST_WIDE_INT n_unroll = 0;
   bool n_unroll_found = false;
   edge edge_to_cancel = NULL;
   int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
@@ -731,7 +730,8 @@ try_unroll_loop_completely (struct loop
   if (!n_unroll_found)
     return false;
 
-  if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
+  if (loop->unroll == 0 &&
+      n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "Not unrolling loop %d "
@@ -753,107 +753,130 @@ try_unroll_loop_completely (struct loop
       if (ul == UL_SINGLE_ITER)
 	return false;
 
-      large = tree_estimate_loop_size
-		 (loop, exit, edge_to_cancel, &size,
-		  PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
-      ninsns = size.overall;
-      if (large)
+      if (loop->unroll)
 	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
-		     loop->num);
-	  return false;
+	  /* If they wanted to unroll more than we want, don't unroll
+	     it completely.  */
+	  if (n_unroll > (unsigned)loop->unroll)
+	    {
+	      dump_printf_loc (report_flags, locus,
+	        "not unrolling loop, "
+		"user didn't want it unrolled completely.\n");
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file,
+		  "Not unrolling loop %d: "
+		  "user didn't want it unrolled completely.\n",
+			 loop->num);
+	      return false;
+	    }
 	}
-
-      unr_insns = estimated_unrolled_size (&size, n_unroll);
-      if (dump_file && (dump_flags & TDF_DETAILS))
+      else
 	{
-	  fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
-	  fprintf (dump_file, "  Estimated size after unrolling: %d\n",
-		   (int) unr_insns);
-	}
+	  struct loop_size size;
+	  large = tree_estimate_loop_size
+	            (loop, exit, edge_to_cancel, &size,
+		     PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
+	  unsigned HOST_WIDE_INT ninsns = size.overall;
+	  if (large)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
+			 loop->num);
+	      return false;
+	    }
 
-      /* If the code is going to shrink, we don't need to be extra cautious
-	 on guessing if the unrolling is going to be profitable.  */
-      if (unr_insns
-	  /* If there is IV variable that will become constant, we save
-	     one instruction in the loop prologue we do not account
-	     otherwise.  */
-	  <= ninsns + (size.constant_iv != false))
-	;
-      /* We unroll only inner loops, because we do not consider it profitable
-	 otheriwse.  We still can cancel loopback edge of not rolling loop;
-	 this is always a good idea.  */
-      else if (ul == UL_NO_GROWTH)
-	{
+	  unsigned HOST_WIDE_INT unr_insns
+	    = estimated_unrolled_size (&size, n_unroll);
 	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
-		     loop->num);
-	  return false;
-	}
-      /* Outer loops tend to be less interesting candidates for complete
-	 unrolling unless we can do a lot of propagation into the inner loop
-	 body.  For now we disable outer loop unrolling when the code would
-	 grow.  */
-      else if (loop->inner)
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     "it is not innermost and code would grow.\n",
-		     loop->num);
-	  return false;
-	}
-      /* If there is call on a hot path through the loop, then
-	 there is most probably not much to optimize.  */
-      else if (size.num_non_pure_calls_on_hot_path)
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     "contains call and code would grow.\n",
-		     loop->num);
-	  return false;
-	}
-      /* If there is pure/const call in the function, then we
-	 can still optimize the unrolled loop body if it contains
-	 some other interesting code than the calls and code
-	 storing or cumulating the return value.  */
-      else if (size.num_pure_calls_on_hot_path
-	       /* One IV increment, one test, one ivtmp store
-		  and one useful stmt.  That is about minimal loop
-		  doing pure call.  */
-	       && (size.non_call_stmts_on_hot_path
-		   <= 3 + size.num_pure_calls_on_hot_path))
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     "contains just pure calls and code would grow.\n",
-		     loop->num);
-	  return false;
-	}
-      /* Complette unrolling is major win when control flow is removed and
-	 one big basic block is created.  If the loop contains control flow
-	 the optimization may still be a win because of eliminating the loop
-	 overhead but it also may blow the branch predictor tables.
-	 Limit number of branches on the hot path through the peeled
-	 sequence.  */
-      else if (size.num_branches_on_hot_path * (int)n_unroll
-	       > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     " number of branches on hot path in the unrolled sequence"
-		     " reach --param max-peel-branches limit.\n",
-		     loop->num);
-	  return false;
-	}
-      else if (unr_insns
-	       > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     "(--param max-completely-peeled-insns limit reached).\n",
-		     loop->num);
-	  return false;
+	    {
+	      fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
+	      fprintf (dump_file, "  Estimated size after unrolling: %d\n",
+		       (int) unr_insns);
+	    }
+
+	  /* If the code is going to shrink, we don't need to be extra
+	     cautious on guessing if the unrolling is going to be
+	     profitable.  */
+	  if (unr_insns
+	      /* If there is IV variable that will become constant, we
+		 save one instruction in the loop prologue we do not
+		 account otherwise.  */
+	      <= ninsns + (size.constant_iv != false))
+	    ;
+	  /* We unroll only inner loops, because we do not consider it
+	     profitable otherwise.  We still can cancel loopback edge
+	     of not rolling loop; this is always a good idea.  */
+	  else if (ul == UL_NO_GROWTH)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
+			 loop->num);
+	      return false;
+	    }
+	  /* Outer loops tend to be less interesting candidates for
+	     complete unrolling unless we can do a lot of propagation
+	     into the inner loop body.  For now we disable outer loop
+	     unrolling when the code would grow.  */
+	  else if (loop->inner)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "it is not innermost and code would grow.\n",
+			 loop->num);
+	      return false;
+	    }
+	  /* If there is call on a hot path through the loop, then
+	     there is most probably not much to optimize.  */
+	  else if (size.num_non_pure_calls_on_hot_path)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "contains call and code would grow.\n",
+			 loop->num);
+	      return false;
+	    }
+	  /* If there is pure/const call in the function, then we can
+	     still optimize the unrolled loop body if it contains some
+	     other interesting code than the calls and code storing or
+	     cumulating the return value.  */
+	  else if (size.num_pure_calls_on_hot_path
+		   /* One IV increment, one test, one ivtmp store and
+		      one useful stmt.  That is about minimal loop
+		      doing pure call.  */
+		   && (size.non_call_stmts_on_hot_path
+		       <= 3 + size.num_pure_calls_on_hot_path))
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "contains just pure calls and code would grow.\n",
+			 loop->num);
+	      return false;
+	    }
+	  /* Complete unrolling is major win when control flow is
+	     removed and one big basic block is created.  If the loop
+	     contains control flow the optimization may still be a win
+	     because of eliminating the loop overhead but it also may
+	     blow the branch predictor tables.  Limit number of
+	     branches on the hot path through the peeled sequence.  */
+	  else if (size.num_branches_on_hot_path * (int)n_unroll
+		   > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 " number of branches on hot path in the unrolled sequence"
+			 " reach --param max-peel-branches limit.\n",
+			 loop->num);
+	      return false;
+	    }
+	  else if (unr_insns
+		   > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "(--param max-completely-peeled-insns limit reached).\n",
+			 loop->num);
+	      return false;
+	    }
 	}
       dump_printf_loc (report_flags, locus,
                        "loop turned into non-loop; it never loops.\n");
@@ -897,8 +920,9 @@ try_unroll_loop_completely (struct loop
       else
 	gimple_cond_make_true (cond);
       update_stmt (cond);
-      /* Do not remove the path. Doing so may remove outer loop
-	 and confuse bookkeeping code in tree_unroll_loops_completelly.  */
+      /* Do not remove the path. Doing so may remove outer loop and
+	 confuse bookkeeping code in
+	 tree_unroll_loops_completelly.  */
     }
 
   /* Store the loop for later unlooping and exit removal.  */
@@ -974,23 +998,33 @@ try_peel_loop (struct loop *loop,
   if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0)
     return false;
 
+  /* We don't peel loops that will be unrolled as this can duplicate a
+     loop more times than the user requested.  */
+  if (loop->unroll)
+    {
+      if (dump_file)
+        fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
+      return false;
+    }
+
   /* Peel only innermost loops.  */
   if (loop->inner)
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: outer loop\n");
+	fprintf (dump_file, "Not peeling: outer loop\n");
       return false;
     }
 
   if (!optimize_loop_for_speed_p (loop))
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: cold loop\n");
+	fprintf (dump_file, "Not peeling: cold loop\n");
       return false;
     }
 
   /* Check if there is an estimate on the number of iterations.  */
   npeel = estimated_loop_iterations_int (loop);
+
   if (npeel < 0)
     {
       if (dump_file)
@@ -998,10 +1032,11 @@ try_peel_loop (struct loop *loop,
 	         "estimated\n");
       return false;
     }
+
   if (maxiter >= 0 && maxiter <= npeel)
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: upper bound is known so can "
+	fprintf (dump_file, "Not peeling: upper bound is known so can "
 		 "unroll completely\n");
       return false;
     }
@@ -1012,7 +1047,7 @@ try_peel_loop (struct loop *loop,
   if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: rolls too much "
+	fprintf (dump_file, "Not peeling: rolls too much "
 		 "(%i + 1 > --param max-peel-times)\n", npeel);
       return false;
     }
@@ -1025,7 +1060,7 @@ try_peel_loop (struct loop *loop,
       > PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: peeled sequence size is too large "
+	fprintf (dump_file, "Not peeling: peeled sequence size is too large "
 		 "(%i insns > --param max-peel-insns)", peeled_size);
       return false;
     }
@@ -1302,7 +1337,9 @@ tree_unroll_loops_completely_1 (bool may
   if (!loop_father)
     return false;
 
-  if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
+  if (loop->unroll > 1)
+    ul = UL_ALL;
+  else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
       /* Unroll outermost loops only if asked to do so or they do
 	 not cause code growth.  */
       && (unroll_outer || loop_outer (loop_father)))
@@ -1539,7 +1576,9 @@ public:
   {}
 
   /* opt_pass methods: */
-  virtual bool gate (function *) { return optimize >= 2; }
+  virtual bool gate (function *) {
+    return optimize >= 2 || cfun->has_unroll;
+  }
   virtual unsigned int execute (function *);
 
 }; // class pass_complete_unrolli
Index: tree.def
===================================================================
--- tree.def	(revision 220084)
+++ tree.def	(working copy)
@@ -1365,8 +1365,9 @@ DEFTREECODE (TARGET_OPTION_NODE, "target
 
 /* ANNOTATE_EXPR.
    Operand 0 is the expression to be annotated.
-   Operand 1 is the annotation kind.  */
-DEFTREECODE (ANNOTATE_EXPR, "annotate_expr", tcc_expression, 2)
+   Operand 1 is the annotation kind.
+   Operand 2 is optional data.  */
+DEFTREECODE (ANNOTATE_EXPR, "annotate_expr", tcc_expression, 3)
 
 /* Cilk spawn statement
    Operand 0 is the CALL_EXPR.  */

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: #pragma GCC unroll support
  2015-01-30 17:52           ` Mike Stump
@ 2015-01-30 18:06             ` Marek Polacek
  2015-01-30 23:48               ` Bernhard Reutner-Fischer
  2015-03-05 23:13             ` #pragma GCC unroll support Mike Stump
  1 sibling, 1 reply; 25+ messages in thread
From: Marek Polacek @ 2015-01-30 18:06 UTC (permalink / raw)
  To: Mike Stump
  Cc: Joseph Myers, Richard Biener, gcc-patches Patches, Jason Merrill

On Fri, Jan 30, 2015 at 08:27:06AM -0800, Mike Stump wrote:
>  \f
> +static bool
> +c_parse_pragma_ivdep (c_parser *parser)
> +{
> +  c_parser_consume_pragma (parser);
> +  c_parser_skip_to_pragma_eol (parser);
> +  return true;
> +}
> +
> +static unsigned short
> +c_parser_pragma_unroll (c_parser *parser)
> +{

Note that these functions are missing comments.

	Marek

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: #pragma GCC unroll support
  2015-01-30 23:48               ` Bernhard Reutner-Fischer
@ 2015-01-30 22:16                 ` Mike Stump
  2015-02-02 23:22                   ` [PATCH, v0] fortran: !GCC$ unroll for DO Bernhard Reutner-Fischer
  2015-02-02 23:22                   ` [PATCH, RFC] fortran [was Re: #pragma GCC unroll support] Bernhard Reutner-Fischer
  0 siblings, 2 replies; 25+ messages in thread
From: Mike Stump @ 2015-01-30 22:16 UTC (permalink / raw)
  To: Bernhard Reutner-Fischer
  Cc: Marek Polacek, Joseph Myers, Richard Biener, gcc-patches Patches,
	Jason Merrill

On Jan 30, 2015, at 12:52 PM, Bernhard Reutner-Fischer <rep.dot.nop@gmail.com> wrote:
> This last test is still puzzling me.

As it is to me.  The reason it seems like they were going for in the code was due to wrapping, but wrapping should only happen at something relating to the bit size of the induction variable.  A 64-bit induction variable should only suffer wrapping effects somewhere beyond say 60 bits.

> From a user perspective this is not very intuitive, IMHO :)


Agreed.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: #pragma GCC unroll support
  2015-01-30 18:06             ` Marek Polacek
@ 2015-01-30 23:48               ` Bernhard Reutner-Fischer
  2015-01-30 22:16                 ` Mike Stump
  0 siblings, 1 reply; 25+ messages in thread
From: Bernhard Reutner-Fischer @ 2015-01-30 23:48 UTC (permalink / raw)
  To: Marek Polacek, Mike Stump
  Cc: Joseph Myers, Richard Biener, gcc-patches Patches, Jason Merrill

On January 30, 2015 5:36:16 PM GMT+01:00, Marek Polacek <polacek@redhat.com> wrote:
>On Fri, Jan 30, 2015 at 08:27:06AM -0800, Mike Stump wrote:
>>  \f>
>> +static bool
>> +c_parse_pragma_ivdep (c_parser *parser)
>> +{
>> +  c_parser_consume_pragma (parser);
>> +  c_parser_skip_to_pragma_eol (parser);
>> +  return true;
>> +}
>> +
>> +static unsigned short
>> +c_parser_pragma_unroll (c_parser *parser)
>> +{
>
>Note that these functions are missing comments.


+++ testsuite/c-c++-common/unroll-1.c	(working copy)
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdisable-tree-cunroll -fdump-rtl-loop2_unroll -fdump-tree-cunrolli-details" } */
+
+void bar(int);
+
+int j;
+
+void test1()
+{
+  unsigned long m = j;
+  unsigned long i;
+
+  /* { dg-final { scan-tree-dump "loop with 9 iterations completely unrolled" "cunrolli" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "21:\(5|11\): note: loop unrolled 7 times" "loop2_unroll" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= j; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "26:\(5|11\): note: loop unrolled 3 times" "loop2_unroll" } } */
+  #pragma GCC unroll 7
+  for (unsigned long i = 1; i <= j; ++i)
+    bar(i);

This last test is still puzzling me.
I want to unroll 7 times, we make that 6, so be it.
Now we compute a power-of-2 trip count AFAIU, which gives us 4. And due to the odd dumping off-by-one thing we end up seeing 3 in the dump as per above. Correct?

From a user perspective this is not very intuitive, IMHO :)


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH, v0] fortran: !GCC$ unroll for DO
  2015-01-30 22:16                 ` Mike Stump
@ 2015-02-02 23:22                   ` Bernhard Reutner-Fischer
  2015-02-03  8:42                     ` Tobias Burnus
  2015-02-02 23:22                   ` [PATCH, RFC] fortran [was Re: #pragma GCC unroll support] Bernhard Reutner-Fischer
  1 sibling, 1 reply; 25+ messages in thread
From: Bernhard Reutner-Fischer @ 2015-02-02 23:22 UTC (permalink / raw)
  To: Mike Stump; +Cc: Bernhard Reutner-Fischer, gcc-patches, fortran

fortran/ChangeLog:

2015-02-02  Bernhard Reutner-Fischer  <aldot@gcc.gnu.org>

	* match.h (gfc_match_gcc_unroll): New prototype.
	* decl.c (directive_unroll): New global variable.
	(gfc_match_gcc_unroll): New function.
	* gfortran.h (directive_unroll): New extern declaration.
	[gfc_iterator]: New member unroll.
	* parse.c (decode_gcc_attribute): Match "unroll".
	(parse_do_block): Set iterator's unroll.
	(parse_executable): Diagnose misplaced unroll directive.
	* trans.h (gfc_cfun_has_unroll): New prototype.
	* trans-decl.c (gfc_cfun_has_unroll): New function.
	* trans-stmt.c (gfc_trans_simple_do, gfc_trans_do): Annotate
	loop condition with annot_expr_unroll_kind.

testsuite/ChangeLog:

2015-02-02  Bernhard Reutner-Fischer  <aldot@gcc.gnu.org>

	* gfortran.dg/directive_unroll_1.f90: New testcase.
	* gfortran.dg/directive_unroll_2.f90: Likewise.

Signed-off-by: Bernhard Reutner-Fischer <rep.dot.nop@gmail.com>
---
 gcc/fortran/decl.c                               | 38 ++++++++++++++++++++
 gcc/fortran/gfortran.h                           |  2 ++
 gcc/fortran/match.h                              |  1 +
 gcc/fortran/parse.c                              | 13 ++++++-
 gcc/fortran/trans-decl.c                         |  7 ++++
 gcc/fortran/trans-stmt.c                         | 14 ++++++++
 gcc/fortran/trans.h                              |  3 ++
 gcc/testsuite/gfortran.dg/directive_unroll_1.f90 | 46 ++++++++++++++++++++++++
 gcc/testsuite/gfortran.dg/directive_unroll_2.f90 | 39 ++++++++++++++++++++
 9 files changed, 162 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gfortran.dg/directive_unroll_1.f90
 create mode 100644 gcc/testsuite/gfortran.dg/directive_unroll_2.f90

diff --git a/gcc/fortran/decl.c b/gcc/fortran/decl.c
index 40d851c..713e6ee 100644
--- a/gcc/fortran/decl.c
+++ b/gcc/fortran/decl.c
@@ -103,6 +103,8 @@ gfc_symbol *gfc_new_block;
 
 bool gfc_matching_function;
 
+/* Set upon parsing a !GCC$ unroll n directive for use in the next loop.  */
+int directive_unroll = -1;
 
 /********************* DATA statement subroutines *********************/
 
@@ -8866,3 +8868,39 @@ syntax:
   gfc_error ("Syntax error in !GCC$ ATTRIBUTES statement at %C");
   return MATCH_ERROR;
 }
+
+
+/* Match a !GCC$ UNROLL statement of the form:
+      !GCC$ UNROLL n
+
+   The parameter n is the number of times we are supposed to unroll;
+   Refer to the C frontend and loop-unroll.c decide_unrolling() for details.
+
+   When we come here, we have already matched the !GCC$ UNROLL string.
+   */
+match
+gfc_match_gcc_unroll (void)
+{
+  signed int value;
+
+  if (gfc_match_small_int (&value) == MATCH_YES)
+    {
+      if (value < 0 || value > USHRT_MAX)
+	{
+	  gfc_error ("%<GCC unroll%> directive requires a"
+	      " non-negative integral constant"
+	      " less than or equal to %u at %C",
+	      USHRT_MAX
+	  );
+	  return MATCH_ERROR;
+	}
+      if (gfc_match_eos () == MATCH_YES)
+	{
+	  directive_unroll = value;
+	  return MATCH_YES;
+	}
+    }
+
+  gfc_error ("Syntax error in !GCC$ UNROLL directive at %C");
+  return MATCH_ERROR;
+}
diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index 6b9f7dd..7bd2432 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -2185,6 +2185,7 @@ gfc_case;
 typedef struct
 {
   gfc_expr *var, *start, *end, *step;
+  unsigned short unroll;
 }
 gfc_iterator;
 
@@ -2546,6 +2547,7 @@ gfc_finalizer;
 /* decl.c */
 bool gfc_in_match_data (void);
 match gfc_match_char_spec (gfc_typespec *);
+extern int directive_unroll;
 
 /* scanner.c */
 void gfc_scanner_done_1 (void);
diff --git a/gcc/fortran/match.h b/gcc/fortran/match.h
index 96d3ec1..30c0aa3 100644
--- a/gcc/fortran/match.h
+++ b/gcc/fortran/match.h
@@ -219,6 +219,7 @@ match gfc_match_contiguous (void);
 match gfc_match_dimension (void);
 match gfc_match_external (void);
 match gfc_match_gcc_attributes (void);
+match gfc_match_gcc_unroll (void);
 match gfc_match_import (void);
 match gfc_match_intent (void);
 match gfc_match_intrinsic (void);
diff --git a/gcc/fortran/parse.c b/gcc/fortran/parse.c
index 2c7c554..95c35b9 100644
--- a/gcc/fortran/parse.c
+++ b/gcc/fortran/parse.c
@@ -882,6 +882,7 @@ decode_gcc_attribute (void)
   old_locus = gfc_current_locus;
 
   match ("attributes", gfc_match_gcc_attributes, ST_ATTR_DECL);
+  match ("unroll", gfc_match_gcc_unroll, ST_NONE);
 
   /* All else has failed, so give up.  See if any of the matchers has
      stored an error message of some sort.  */
@@ -4020,7 +4021,14 @@ parse_do_block (void)
   s.ext.end_do_label = new_st.label1;
 
   if (new_st.ext.iterator != NULL)
-    stree = new_st.ext.iterator->var->symtree;
+    {
+      stree = new_st.ext.iterator->var->symtree;
+      if (directive_unroll != -1)
+	{
+	  new_st.ext.iterator->unroll = directive_unroll;
+	  directive_unroll = -1;
+	}
+    }
   else
     stree = NULL;
 
@@ -4745,6 +4753,9 @@ parse_executable (gfc_statement st)
 	  return st;
 	}
 
+      if (directive_unroll != -1)
+	gfc_error ("%<GCC unroll%> directive does not commence a loop at %C");
+
       st = next_statement ();
     }
 }
diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c
index 8a65d2b..3965541 100644
--- a/gcc/fortran/trans-decl.c
+++ b/gcc/fortran/trans-decl.c
@@ -6117,5 +6117,12 @@ gfc_process_block_locals (gfc_namespace* ns)
   saved_local_decls = NULL_TREE;
 }
 
+/* Hint to the ME that the current function has an unroll directive.  */
+
+void
+gfc_cfun_has_unroll (void)
+{
+  cfun->has_unroll = true;
+}
 
 #include "gt-fortran-trans-decl.h"
diff --git a/gcc/fortran/trans-stmt.c b/gcc/fortran/trans-stmt.c
index 01bfd97..5379c7b 100644
--- a/gcc/fortran/trans-stmt.c
+++ b/gcc/fortran/trans-stmt.c
@@ -1570,6 +1570,13 @@ gfc_trans_simple_do (gfc_code * code, stmtblock_t *pblock, tree dovar,
   cond = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, dovar,
 			  to);
   cond = gfc_evaluate_now_loc (loc, cond, &body);
+  if (code->ext.iterator->unroll && cond != error_mark_node)
+    {
+      cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+	  build_int_cst (integer_type_node, annot_expr_unroll_kind),
+	  build_int_cst (integer_type_node, code->ext.iterator->unroll));
+      gfc_cfun_has_unroll ();
+    }
 
   /* Increment the loop variable.  */
   tmp = fold_build2_loc (loc, PLUS_EXPR, type, dovar, step);
@@ -1870,6 +1877,13 @@ gfc_trans_do (gfc_code * code, tree exit_cond)
   /* End with the loop condition.  Loop until countm1t == 0.  */
   cond = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, countm1t,
 			  build_int_cst (utype, 0));
+  if (code->ext.iterator->unroll && cond != error_mark_node)
+    {
+      cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+	  build_int_cst (integer_type_node, annot_expr_unroll_kind),
+	  build_int_cst (integer_type_node, code->ext.iterator->unroll));
+      gfc_cfun_has_unroll ();
+    }
   tmp = fold_build1_loc (loc, GOTO_EXPR, void_type_node, exit_label);
   tmp = fold_build3_loc (loc, COND_EXPR, void_type_node,
 			 cond, tmp, build_empty_stmt (loc));
diff --git a/gcc/fortran/trans.h b/gcc/fortran/trans.h
index bd1520a..fbd392b 100644
--- a/gcc/fortran/trans.h
+++ b/gcc/fortran/trans.h
@@ -665,6 +665,9 @@ tree gfc_build_library_function_decl_with_spec (tree name, const char *spec,
 /* Process the local variable decls of a block construct.  */
 void gfc_process_block_locals (gfc_namespace*);
 
+/* Hint to the ME that the current function has an unroll directive.  */
+void gfc_cfun_has_unroll (void);
+
 /* Output initialization/clean-up code that was deferred.  */
 void gfc_trans_deferred_vars (gfc_symbol*, gfc_wrapped_block *);
 
diff --git a/gcc/testsuite/gfortran.dg/directive_unroll_1.f90 b/gcc/testsuite/gfortran.dg/directive_unroll_1.f90
new file mode 100644
index 0000000..ebaa2f9
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/directive_unroll_1.f90
@@ -0,0 +1,46 @@
+! { dg-do compile }
+! { dg-options "-O2 -fdump-rtl-loop2_unroll -fdump-tree-cunrolli-details" }
+! Test that
+! #pragma GCC unroll n
+! works
+
+! { dg-final { scan-tree-dump-not "note: loop turned into non-loop; it never loops" "cunrolli" } }
+
+subroutine simple1(n)
+  implicit NONE
+  integer (kind=1), intent(in) :: n
+  integer (kind=4) :: i
+!GCC$ unroll 8
+  DO i=0, n, 1
+    call dummy1(i)
+  ENDDO
+! { dg-final { scan-tree-dump "15:0: note: loop unrolled 7 times" "loop2_unroll" } }
+end subroutine simple1
+
+subroutine simple2(a, b, n)
+  implicit NONE
+  integer (kind=1), intent(in) :: n
+  integer :: a(n), b(n)
+  integer (kind=4) :: i
+!GCC$ unroll 8
+  DO i=n, 0, -1
+    call dummy2(a(i), b(i), i)
+  ENDDO
+! { dg-final { scan-tree-dump "27:0: note: loop unrolled 7 times" "loop2_unroll" } }
+end subroutine simple2
+
+subroutine not_simple1(a, b, n)
+  implicit NONE
+  integer (kind=1), intent(in) :: n
+  integer :: a(n), b(n)
+  integer (kind=4) :: i
+!GCC$ unroll 8
+  DO i=0, n, 2
+    call dummy2(a(i), b(i), i)
+  ENDDO
+! { dg-final { scan-tree-dump "38:0: note: loop unrolled 7 times" "loop2_unroll" } }
+! { dg-final { scan-tree-dump "38:0: note: not unrolling loop, user didn't want it unrolled completely" "cunrolli" } }
+end subroutine not_simple1
+
+! { dg-final { cleanup-tree-dump "cunrolli" } }
+! { dg-final { cleanup-rtl-dump "loop2_unroll" } }
diff --git a/gcc/testsuite/gfortran.dg/directive_unroll_2.f90 b/gcc/testsuite/gfortran.dg/directive_unroll_2.f90
new file mode 100644
index 0000000..59804a1
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/directive_unroll_2.f90
@@ -0,0 +1,39 @@
+! { dg-do compile }
+
+! Test that
+! #pragma GCC unroll n
+! rejects invalid n and improper use
+
+subroutine wrong1(n)
+  implicit NONE
+  integer (kind=1), intent(in) :: n
+  integer (kind=4) :: i
+!GCC$ unroll 999999999 ! { dg-error "non-negative integral constant less than" }
+  DO i=0, n, 1
+    call dummy1(i)
+  ENDDO
+end subroutine wrong1
+
+subroutine wrong2(a, b, n)
+  implicit NONE
+  integer (kind=1), intent(in) :: n
+  integer :: a(n), b(n)
+  integer (kind=4) :: i
+!GCC$ unroll -1 ! { dg-error "non-negative integral constant less than" }
+  DO i=0, n, 2
+    call dummy2(a(i), b(i), i)
+  ENDDO
+end subroutine wrong2
+
+subroutine wrong3(a, b, n)
+  implicit NONE
+  integer (kind=1), intent(in) :: n
+  integer :: a(n), b(n)
+  integer (kind=4) :: i
+!GCC$ unroll 8
+  write (*,*) "wrong"! { dg-error "directive does not commence a loop" }
+  DO i=n, 0, -1
+    call dummy2(a(i), b(i), i)
+  ENDDO
+end subroutine wrong3
+
-- 
2.1.4

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH, RFC] fortran [was Re: #pragma GCC unroll support]
  2015-01-30 22:16                 ` Mike Stump
  2015-02-02 23:22                   ` [PATCH, v0] fortran: !GCC$ unroll for DO Bernhard Reutner-Fischer
@ 2015-02-02 23:22                   ` Bernhard Reutner-Fischer
  2015-02-03  0:08                     ` Mike Stump
  1 sibling, 1 reply; 25+ messages in thread
From: Bernhard Reutner-Fischer @ 2015-02-02 23:22 UTC (permalink / raw)
  To: Mike Stump; +Cc: Bernhard Reutner-Fischer, gcc-patches, fortran

Hi,

Some compilers IIRC use !DIR$ unroll, if memory serves me right then
the DEC compiler had !DEC$ unroll.
We could support one or the other three-letter keyword or maybe not.

I think a combination of unroll and ivdep directives is allowed (at
least in some compilers); TODO.

Not sure what other statements should be annotated with that directive?

I do not like the global variable directive_unroll but is was the easy
way out for cheap warnings.

Untested draft patch, regstrap running over night, depends on Mike's
unroll-5.diffs.txt patch in this thread (
https://gcc.gnu.org/ml/gcc-patches/2015-01/msg02733.html ).

Just stage-1 tinkering here.

Cheers,

Bernhard Reutner-Fischer (1):
  fortran: !GCC$ unroll for DO

 gcc/fortran/decl.c                               | 38 ++++++++++++++++++++
 gcc/fortran/gfortran.h                           |  2 ++
 gcc/fortran/match.h                              |  1 +
 gcc/fortran/parse.c                              | 13 ++++++-
 gcc/fortran/trans-decl.c                         |  7 ++++
 gcc/fortran/trans-stmt.c                         | 14 ++++++++
 gcc/fortran/trans.h                              |  3 ++
 gcc/testsuite/gfortran.dg/directive_unroll_1.f90 | 46 ++++++++++++++++++++++++
 gcc/testsuite/gfortran.dg/directive_unroll_2.f90 | 39 ++++++++++++++++++++
 9 files changed, 162 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gfortran.dg/directive_unroll_1.f90
 create mode 100644 gcc/testsuite/gfortran.dg/directive_unroll_2.f90

-- 
2.1.4

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH, RFC] fortran [was Re: #pragma GCC unroll support]
  2015-02-02 23:22                   ` [PATCH, RFC] fortran [was Re: #pragma GCC unroll support] Bernhard Reutner-Fischer
@ 2015-02-03  0:08                     ` Mike Stump
  2015-05-28  9:56                       ` Bernhard Reutner-Fischer
  0 siblings, 1 reply; 25+ messages in thread
From: Mike Stump @ 2015-02-03  0:08 UTC (permalink / raw)
  To: Bernhard Reutner-Fischer; +Cc: gcc-patches, fortran

On Feb 2, 2015, at 3:22 PM, Bernhard Reutner-Fischer <rep.dot.nop@gmail.com> wrote:
> Untested draft patch

I looked it over, seems to slot in nicely.


+	gfc_error ("%<GCC unroll%> directive does not commence a loop at %C”);

So, don’t like commence here.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH, v0] fortran: !GCC$ unroll for DO
  2015-02-02 23:22                   ` [PATCH, v0] fortran: !GCC$ unroll for DO Bernhard Reutner-Fischer
@ 2015-02-03  8:42                     ` Tobias Burnus
  0 siblings, 0 replies; 25+ messages in thread
From: Tobias Burnus @ 2015-02-03  8:42 UTC (permalink / raw)
  To: Bernhard Reutner-Fischer, Mike Stump; +Cc: gcc-patches, fortran

Bernhard Reutner-Fischer wrote:
> Some compilers IIRC use !DIR$ unroll, if memory serves me right then
> the DEC compiler had !DEC$ unroll.
> We could support one or the other three-letter keyword or maybe not.

Intel's compiler supports quite a lot of loop directives. (Its Fortran
front end is based on DEC's and part of the developer team also moved
from Compaq to Intel. Not that Intel didn't add a bunch of additional
directives later on.)

Supported are: simd, ivdep, loop count, (no)vector, inline, force, (no)inline,
(no)unroll, unroll_and_jam, nofusion and distribution point.

GCC support "ivdep" (C/C++), simd (I think only via -fcilkplus; but
OpenMP's 'omp simd' is a replacement [-fopenmp/-fopenmp-simd] in C/C++ and
Fortran) - and "unroll" as proposed for C/C++ and with this patch for
Fortran.

By the way: gfortran automatically annotates 'do concurrent' with 'ivdep'.

For Intel's loop directives, see:
https://software.intel.com/en-us/articles/getting-started-with-intel-composer-xe-2013-compiler-pragmas-and-directives
C++: https://software.intel.com/en-us/node/524494
Fortran: https://software.intel.com/en-us/node/525781

 * * *

Regarding the patch: In general, I prefer to stick to standard methods
(which are portable) and think that those user knobs often make things
slower than faster (as they tend to stay for years, even after the hard-
ware as moved on - or they are even inserted blindly).
till, I think it would be fine to add it.

Tobias

PS: For a non-RFC patch, you also need to update gfortran.texi.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: #pragma GCC unroll support
  2015-01-30 17:52           ` Mike Stump
  2015-01-30 18:06             ` Marek Polacek
@ 2015-03-05 23:13             ` Mike Stump
  2015-03-06  1:32               ` Sandra Loosemore
  2015-03-07  1:16               ` Joseph Myers
  1 sibling, 2 replies; 25+ messages in thread
From: Mike Stump @ 2015-03-05 23:13 UTC (permalink / raw)
  To: Joseph Myers, Jason Merrill, Richard Biener; +Cc: gcc-patches Patches

[-- Attachment #1: Type: text/plain, Size: 217 bytes --]

On Jan 30, 2015, at 8:27 AM, Mike Stump <mikestump@comcast.net> wrote:
> On Jan 30, 2015, at 7:49 AM, Joseph Myers <joseph@codesourcery.com> wrote:
>> Use error_at, and %u directly in the format.
> 
> Done.

Ping?




[-- Attachment #2: unroll-5.diffs.txt --]
[-- Type: text/plain, Size: 62603 bytes --]

Index: ada/gcc-interface/trans.c
===================================================================
--- ada/gcc-interface/trans.c	(revision 220084)
+++ ada/gcc-interface/trans.c	(working copy)
@@ -7870,17 +7870,20 @@ gnat_gimplify_stmt (tree *stmt_p)
 	  {
 	    /* Deal with the optimization hints.  */
 	    if (LOOP_STMT_IVDEP (stmt))
-	      gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
+	      gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
 				 build_int_cst (integer_type_node,
-						annot_expr_ivdep_kind));
+						annot_expr_ivdep_kind),
+				 NULL_TREE);
 	    if (LOOP_STMT_NO_VECTOR (stmt))
-	      gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
+	      gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
 				 build_int_cst (integer_type_node,
-						annot_expr_no_vector_kind));
+						annot_expr_no_vector_kind),
+				 NULL_TREE);
 	    if (LOOP_STMT_VECTOR (stmt))
-	      gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
+	      gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
 				 build_int_cst (integer_type_node,
-						annot_expr_vector_kind));
+						annot_expr_vector_kind),
+				 NULL_TREE);
 
 	    gnu_cond
 	      = build3 (COND_EXPR, void_type_node, gnu_cond, NULL_TREE,
Index: c/c-parser.c
===================================================================
--- c/c-parser.c	(revision 220084)
+++ c/c-parser.c	(working copy)
@@ -1217,9 +1217,9 @@ static void c_parser_statement (c_parser
 static void c_parser_statement_after_labels (c_parser *);
 static void c_parser_if_statement (c_parser *);
 static void c_parser_switch_statement (c_parser *);
-static void c_parser_while_statement (c_parser *, bool);
-static void c_parser_do_statement (c_parser *, bool);
-static void c_parser_for_statement (c_parser *, bool);
+static void c_parser_while_statement (c_parser *, bool, unsigned short);
+static void c_parser_do_statement (c_parser *, bool, unsigned short);
+static void c_parser_for_statement (c_parser *, bool, unsigned short);
 static tree c_parser_asm_statement (c_parser *);
 static tree c_parser_asm_operands (c_parser *);
 static tree c_parser_asm_goto_operands (c_parser *);
@@ -4972,13 +4972,13 @@ c_parser_statement_after_labels (c_parse
 	  c_parser_switch_statement (parser);
 	  break;
 	case RID_WHILE:
-	  c_parser_while_statement (parser, false);
+	  c_parser_while_statement (parser, false, 0);
 	  break;
 	case RID_DO:
-	  c_parser_do_statement (parser, false);
+	  c_parser_do_statement (parser, false, 0);
 	  break;
 	case RID_FOR:
-	  c_parser_for_statement (parser, false);
+	  c_parser_for_statement (parser, false, 0);
 	  break;
 	case RID_CILK_FOR:
 	  if (!flag_cilkplus)
@@ -5340,7 +5340,7 @@ c_parser_switch_statement (c_parser *par
 */
 
 static void
-c_parser_while_statement (c_parser *parser, bool ivdep)
+c_parser_while_statement (c_parser *parser, bool ivdep, unsigned short unroll)
 {
   tree block, cond, body, save_break, save_cont;
   location_t loc;
@@ -5354,9 +5354,15 @@ c_parser_while_statement (c_parser *pars
 	 "%<_Cilk_spawn%> statement cannot be used as a condition for while statement"))
     cond = error_mark_node;
   if (ivdep && cond != error_mark_node)
-    cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
 		   build_int_cst (integer_type_node,
-		   annot_expr_ivdep_kind));
+				  annot_expr_ivdep_kind),
+		   NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+		   build_int_cst (integer_type_node,
+				  annot_expr_unroll_kind),
+		   build_int_cst (integer_type_node, unroll));
   save_break = c_break_label;
   c_break_label = NULL_TREE;
   save_cont = c_cont_label;
@@ -5375,7 +5381,7 @@ c_parser_while_statement (c_parser *pars
 */
 
 static void
-c_parser_do_statement (c_parser *parser, bool ivdep)
+c_parser_do_statement (c_parser *parser, bool ivdep, unsigned short unroll)
 {
   tree block, cond, body, save_break, save_cont, new_break, new_cont;
   location_t loc;
@@ -5403,9 +5409,16 @@ c_parser_do_statement (c_parser *parser,
 	 "%<_Cilk_spawn%> statement cannot be used as a condition for a do-while statement"))
     cond = error_mark_node;
   if (ivdep && cond != error_mark_node)
-    cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+		   build_int_cst (integer_type_node,
+				  annot_expr_ivdep_kind),
+		   NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
 		   build_int_cst (integer_type_node,
-		   annot_expr_ivdep_kind));
+				  annot_expr_unroll_kind),
+		   build_int_cst (integer_type_node,
+				  unroll));
   if (!c_parser_require (parser, CPP_SEMICOLON, "expected %<;%>"))
     c_parser_skip_to_end_of_block_or_statement (parser);
   c_finish_loop (loc, cond, NULL, body, new_break, new_cont, false);
@@ -5469,7 +5482,7 @@ c_parser_do_statement (c_parser *parser,
 */
 
 static void
-c_parser_for_statement (c_parser *parser, bool ivdep)
+c_parser_for_statement (c_parser *parser, bool ivdep, unsigned short unroll)
 {
   tree block, cond, incr, save_break, save_cont, body;
   /* The following are only used when parsing an ObjC foreach statement.  */
@@ -5587,6 +5600,12 @@ c_parser_for_statement (c_parser *parser
 				  "%<GCC ivdep%> pragma");
 		  cond = error_mark_node;
 		}
+	      else if (unroll)
+		{
+		  c_parser_error (parser, "missing loop condition in loop with "
+				  "%<GCC unroll%> pragma");
+		  cond = error_mark_node;
+		}
 	      else
 		{
 		  c_parser_consume_token (parser);
@@ -5604,9 +5623,15 @@ c_parser_for_statement (c_parser *parser
 					 "expected %<;%>");
 	    }
 	  if (ivdep && cond != error_mark_node)
-	    cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+	    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+			   build_int_cst (integer_type_node,
+					  annot_expr_ivdep_kind),
+			   NULL_TREE);
+	  if (unroll && cond != error_mark_node)
+	    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
 			   build_int_cst (integer_type_node,
-			   annot_expr_ivdep_kind));
+					  annot_expr_unroll_kind),
+			   build_int_cst (integer_type_node, unroll));
 	}
       /* Parse the increment expression (the third expression in a
 	 for-statement).  In the case of a foreach-statement, this is
@@ -9592,6 +9617,45 @@ c_parser_objc_at_dynamic_declaration (c_
 }
 
 \f
+static bool
+c_parse_pragma_ivdep (c_parser *parser)
+{
+  c_parser_consume_pragma (parser);
+  c_parser_skip_to_pragma_eol (parser);
+  return true;
+}
+
+static unsigned short
+c_parser_pragma_unroll (c_parser *parser)
+{
+  unsigned short unroll;
+  c_parser_consume_pragma (parser);
+  location_t location = c_parser_peek_token (parser)->location;
+  tree expr = c_parser_expr_no_commas (parser, NULL).value;
+  mark_exp_read (expr);
+  expr = c_fully_fold (expr, false, NULL);
+  HOST_WIDE_INT lunroll = 0;
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (expr))
+      || TREE_CODE (expr) != INTEGER_CST
+      || (lunroll = tree_to_shwi (expr)) < 0
+      || lunroll > USHRT_MAX)
+    {
+      error_at (location, "%<#pragma GCC unroll%> requires an"
+		" assignment-expression that evaluates to a non-negative"
+		" integral constant less than or equal to %u", USHRT_MAX);
+      unroll = 0;
+    }
+  else
+    {
+      unroll = (unsigned short) lunroll;
+      if (unroll == 0)
+	unroll = 1;
+    }
+
+  c_parser_skip_to_pragma_eol (parser);
+  return unroll;
+}
+
 /* Handle pragmas.  Some OpenMP pragmas are associated with, and therefore
    should be considered, statements.  ALLOW_STMT is true if we're within
    the context of a function and such pragmas are to be allowed.  Returns
@@ -9714,21 +9778,46 @@ c_parser_pragma (c_parser *parser, enum
       c_parser_omp_declare (parser, context);
       return false;
     case PRAGMA_IVDEP:
-      c_parser_consume_pragma (parser);
-      c_parser_skip_to_pragma_eol (parser);
-      if (!c_parser_next_token_is_keyword (parser, RID_FOR)
-	  && !c_parser_next_token_is_keyword (parser, RID_WHILE)
-	  && !c_parser_next_token_is_keyword (parser, RID_DO))
-	{
-	  c_parser_error (parser, "for, while or do statement expected");
-	  return false;
-	}
-      if (c_parser_next_token_is_keyword (parser, RID_FOR))
-	c_parser_for_statement (parser, true);
-      else if (c_parser_next_token_is_keyword (parser, RID_WHILE))
-	c_parser_while_statement (parser, true);
-      else
-	c_parser_do_statement (parser, true);
+      {
+	bool ivdep = c_parse_pragma_ivdep (parser);
+	unsigned short unroll = 0;
+	if (c_parser_peek_token (parser)->pragma_kind == PRAGMA_UNROLL)
+	  unroll = c_parser_pragma_unroll (parser);
+	if (!c_parser_next_token_is_keyword (parser, RID_FOR)
+	    && !c_parser_next_token_is_keyword (parser, RID_WHILE)
+	    && !c_parser_next_token_is_keyword (parser, RID_DO))
+	  {
+	    c_parser_error (parser, "for, while or do statement expected");
+	    return false;
+	  }
+	if (c_parser_next_token_is_keyword (parser, RID_FOR))
+	  c_parser_for_statement (parser, ivdep, unroll);
+	else if (c_parser_next_token_is_keyword (parser, RID_WHILE))
+	  c_parser_while_statement (parser, ivdep, unroll);
+	else
+	  c_parser_do_statement (parser, ivdep, unroll);
+      }
+      return false;
+    case PRAGMA_UNROLL:
+      {
+	unsigned short unroll = c_parser_pragma_unroll (parser);
+	bool ivdep = false;
+	if (c_parser_peek_token (parser)->pragma_kind == PRAGMA_IVDEP)
+	  ivdep = c_parse_pragma_ivdep (parser);
+	if (!c_parser_next_token_is_keyword (parser, RID_FOR)
+	    && !c_parser_next_token_is_keyword (parser, RID_WHILE)
+	    && !c_parser_next_token_is_keyword (parser, RID_DO))
+	  {
+	    c_parser_error (parser, "for, while or do statement expected");
+	    return false;
+	  }
+	if (c_parser_next_token_is_keyword (parser, RID_FOR))
+	  c_parser_for_statement (parser, ivdep, unroll);
+	else if (c_parser_next_token_is_keyword (parser, RID_WHILE))
+	  c_parser_while_statement (parser, ivdep, unroll);
+	else
+	  c_parser_do_statement (parser, ivdep, unroll);
+      }
       return false;
 
     case PRAGMA_GCC_PCH_PREPROCESS:
Index: c-family/c-pragma.c
===================================================================
--- c-family/c-pragma.c	(revision 220084)
+++ c-family/c-pragma.c	(working copy)
@@ -1456,6 +1456,10 @@ init_pragma (void)
     cpp_register_deferred_pragma (parse_in, "GCC", "ivdep", PRAGMA_IVDEP, false,
 				  false);
 
+  if (!flag_preprocess_only)
+    cpp_register_deferred_pragma (parse_in, "GCC", "unroll", PRAGMA_UNROLL, false,
+				  false);
+
   if (flag_cilkplus && !flag_preprocess_only)
     cpp_register_deferred_pragma (parse_in, "cilk", "grainsize",
 				  PRAGMA_CILK_GRAINSIZE, true, false);
Index: c-family/c-pragma.h
===================================================================
--- c-family/c-pragma.h	(revision 220084)
+++ c-family/c-pragma.h	(working copy)
@@ -69,6 +69,7 @@ typedef enum pragma_kind {
 
   PRAGMA_GCC_PCH_PREPROCESS,
   PRAGMA_IVDEP,
+  PRAGMA_UNROLL,
 
   PRAGMA_FIRST_EXTERNAL
 } pragma_kind;
Index: cfgloop.h
===================================================================
--- cfgloop.h	(revision 220084)
+++ cfgloop.h	(working copy)
@@ -189,6 +189,11 @@ struct GTY ((chain_next ("%h.next"))) lo
      of the loop can be safely evaluated concurrently.  */
   int safelen;
 
+  /* The number of times to unroll the loop.  0, means no information
+     given, just do what we always do.  A value of 1, means don't unroll
+     the loop.  */
+  unsigned short unroll;
+
   /* True if this loop should never be vectorized.  */
   bool dont_vectorize;
 
Index: cfgloopmanip.c
===================================================================
--- cfgloopmanip.c	(revision 220084)
+++ cfgloopmanip.c	(working copy)
@@ -1038,6 +1038,7 @@ copy_loop_info (struct loop *loop, struc
   target->estimate_state = loop->estimate_state;
   target->warned_aggressive_loop_optimizations
     |= loop->warned_aggressive_loop_optimizations;
+  target->unroll = loop->unroll;
 }
 
 /* Copies copy of LOOP as subloop of TARGET loop, placing newly
Index: cp/cp-array-notation.c
===================================================================
--- cp/cp-array-notation.c	(revision 220084)
+++ cp/cp-array-notation.c	(working copy)
@@ -81,7 +81,7 @@ create_an_loop (tree init, tree cond, tr
   finish_expr_stmt (init);
   for_stmt = begin_for_stmt (NULL_TREE, NULL_TREE);
   finish_for_init_stmt (for_stmt);
-  finish_for_cond (cond, for_stmt, false);
+  finish_for_cond (cond, for_stmt, false, 0);
   finish_for_expr (incr, for_stmt);
   finish_expr_stmt (body);
   finish_for_stmt (for_stmt);
Index: cp/cp-tree.h
===================================================================
--- cp/cp-tree.h	(revision 220084)
+++ cp/cp-tree.h	(working copy)
@@ -5644,7 +5644,7 @@ extern tree implicitly_declare_fn
 extern bool maybe_clone_body			(tree);
 
 /* In parser.c */
-extern tree cp_convert_range_for (tree, tree, tree, bool);
+extern tree cp_convert_range_for (tree, tree, tree, bool, unsigned short);
 extern bool parsing_nsdmi (void);
 extern void inject_this_parameter (tree, cp_cv_quals);
 
@@ -5880,16 +5880,16 @@ extern void begin_else_clause			(tree);
 extern void finish_else_clause			(tree);
 extern void finish_if_stmt			(tree);
 extern tree begin_while_stmt			(void);
-extern void finish_while_stmt_cond		(tree, tree, bool);
+extern void finish_while_stmt_cond		(tree, tree, bool, unsigned short);
 extern void finish_while_stmt			(tree);
 extern tree begin_do_stmt			(void);
 extern void finish_do_body			(tree);
-extern void finish_do_stmt			(tree, tree, bool);
+extern void finish_do_stmt			(tree, tree, bool, unsigned short);
 extern tree finish_return_stmt			(tree);
 extern tree begin_for_scope			(tree *);
 extern tree begin_for_stmt			(tree, tree);
 extern void finish_for_init_stmt		(tree);
-extern void finish_for_cond			(tree, tree, bool);
+extern void finish_for_cond			(tree, tree, bool, unsigned short);
 extern void finish_for_expr			(tree, tree);
 extern void finish_for_stmt			(tree);
 extern tree begin_range_for_stmt		(tree, tree);
Index: cp/init.c
===================================================================
--- cp/init.c	(revision 220084)
+++ cp/init.c	(working copy)
@@ -3693,7 +3693,7 @@ build_vec_init (tree base, tree maxindex
       finish_for_init_stmt (for_stmt);
       finish_for_cond (build2 (NE_EXPR, boolean_type_node, iterator,
 			       build_int_cst (TREE_TYPE (iterator), -1)),
-		       for_stmt, false);
+		       for_stmt, false, 0);
       elt_init = cp_build_unary_op (PREDECREMENT_EXPR, iterator, 0,
 				    complain);
       if (elt_init == error_mark_node)
Index: cp/parser.c
===================================================================
--- cp/parser.c	(revision 220084)
+++ cp/parser.c	(working copy)
@@ -2044,15 +2044,15 @@ static tree cp_parser_selection_statemen
 static tree cp_parser_condition
   (cp_parser *);
 static tree cp_parser_iteration_statement
-  (cp_parser *, bool);
+  (cp_parser *, bool, unsigned short);
 static bool cp_parser_for_init_statement
   (cp_parser *, tree *decl);
 static tree cp_parser_for
-  (cp_parser *, bool);
+  (cp_parser *, bool, unsigned short);
 static tree cp_parser_c_for
-  (cp_parser *, tree, tree, bool);
+  (cp_parser *, tree, tree, bool, unsigned short);
 static tree cp_parser_range_for
-  (cp_parser *, tree, tree, tree, bool);
+  (cp_parser *, tree, tree, tree, bool, unsigned short);
 static void do_range_for_auto_deduction
   (tree, tree);
 static tree cp_parser_perform_range_for_lookup
@@ -9698,7 +9698,7 @@ cp_parser_statement (cp_parser* parser,
 	case RID_WHILE:
 	case RID_DO:
 	case RID_FOR:
-	  statement = cp_parser_iteration_statement (parser, false);
+	  statement = cp_parser_iteration_statement (parser, false, 0);
 	  break;
 
 	case RID_CILK_FOR:
@@ -10390,7 +10390,7 @@ cp_parser_condition (cp_parser* parser)
    not included. */
 
 static tree
-cp_parser_for (cp_parser *parser, bool ivdep)
+cp_parser_for (cp_parser *parser, bool ivdep, unsigned short unroll)
 {
   tree init, scope, decl;
   bool is_range_for;
@@ -10402,13 +10402,14 @@ cp_parser_for (cp_parser *parser, bool i
   is_range_for = cp_parser_for_init_statement (parser, &decl);
 
   if (is_range_for)
-    return cp_parser_range_for (parser, scope, init, decl, ivdep);
+    return cp_parser_range_for (parser, scope, init, decl, ivdep, unroll);
   else
-    return cp_parser_c_for (parser, scope, init, ivdep);
+    return cp_parser_c_for (parser, scope, init, ivdep, unroll);
 }
 
 static tree
-cp_parser_c_for (cp_parser *parser, tree scope, tree init, bool ivdep)
+cp_parser_c_for (cp_parser *parser, tree scope, tree init, bool ivdep,
+		 unsigned short unroll)
 {
   /* Normal for loop */
   tree condition = NULL_TREE;
@@ -10429,7 +10430,13 @@ cp_parser_c_for (cp_parser *parser, tree
 		       "%<GCC ivdep%> pragma");
       condition = error_mark_node;
     }
-  finish_for_cond (condition, stmt, ivdep);
+  else if (unroll)
+    {
+      cp_parser_error (parser, "missing loop condition in loop with "
+		       "%<GCC unroll%> pragma");
+      condition = error_mark_node;
+    }
+  finish_for_cond (condition, stmt, ivdep, unroll);
   /* Look for the `;'.  */
   cp_parser_require (parser, CPP_SEMICOLON, RT_SEMICOLON);
 
@@ -10453,7 +10460,7 @@ cp_parser_c_for (cp_parser *parser, tree
 
 static tree
 cp_parser_range_for (cp_parser *parser, tree scope, tree init, tree range_decl,
-		     bool ivdep)
+		     bool ivdep, unsigned short unroll)
 {
   tree stmt, range_expr;
 
@@ -10474,6 +10481,8 @@ cp_parser_range_for (cp_parser *parser,
       stmt = begin_range_for_stmt (scope, init);
       if (ivdep)
 	RANGE_FOR_IVDEP (stmt) = 1;
+      if (unroll)
+	/* TODO */(void)0;
       finish_range_for_decl (stmt, range_decl, range_expr);
       if (!type_dependent_expression_p (range_expr)
 	  /* do_auto_deduction doesn't mess with template init-lists.  */
@@ -10483,7 +10492,7 @@ cp_parser_range_for (cp_parser *parser,
   else
     {
       stmt = begin_for_stmt (scope, init);
-      stmt = cp_convert_range_for (stmt, range_decl, range_expr, ivdep);
+      stmt = cp_convert_range_for (stmt, range_decl, range_expr, ivdep, unroll);
     }
   return stmt;
 }
@@ -10575,7 +10584,7 @@ do_range_for_auto_deduction (tree decl,
 
 tree
 cp_convert_range_for (tree statement, tree range_decl, tree range_expr,
-		      bool ivdep)
+		      bool ivdep, unsigned short unroll)
 {
   tree begin, end;
   tree iter_type, begin_expr, end_expr;
@@ -10632,7 +10641,7 @@ cp_convert_range_for (tree statement, tr
 				 begin, ERROR_MARK,
 				 end, ERROR_MARK,
 				 NULL, tf_warning_or_error);
-  finish_for_cond (condition, statement, ivdep);
+  finish_for_cond (condition, statement, ivdep, unroll);
 
   /* The new increment expression.  */
   expression = finish_unary_op_expr (input_location,
@@ -10793,7 +10802,8 @@ cp_parser_range_for_member_function (tre
    Returns the new WHILE_STMT, DO_STMT, FOR_STMT or RANGE_FOR_STMT.  */
 
 static tree
-cp_parser_iteration_statement (cp_parser* parser, bool ivdep)
+cp_parser_iteration_statement (cp_parser* parser, bool ivdep,
+			       unsigned short unroll)
 {
   cp_token *token;
   enum rid keyword;
@@ -10823,7 +10833,7 @@ cp_parser_iteration_statement (cp_parser
 	cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN);
 	/* Parse the condition.  */
 	condition = cp_parser_condition (parser);
-	finish_while_stmt_cond (condition, statement, ivdep);
+	finish_while_stmt_cond (condition, statement, ivdep, unroll);
 	/* Look for the `)'.  */
 	cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN);
 	/* Parse the dependent statement.  */
@@ -10853,7 +10863,7 @@ cp_parser_iteration_statement (cp_parser
 	/* Parse the expression.  */
 	expression = cp_parser_expression (parser);
 	/* We're done with the do-statement.  */
-	finish_do_stmt (expression, statement, ivdep);
+	finish_do_stmt (expression, statement, ivdep, unroll);
 	/* Look for the `)'.  */
 	cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN);
 	/* Look for the `;'.  */
@@ -10866,7 +10876,7 @@ cp_parser_iteration_statement (cp_parser
 	/* Look for the `('.  */
 	cp_parser_require (parser, CPP_OPEN_PAREN, RT_OPEN_PAREN);
 
-	statement = cp_parser_for (parser, ivdep);
+	statement = cp_parser_for (parser, ivdep, unroll);
 
 	/* Look for the `)'.  */
 	cp_parser_require (parser, CPP_CLOSE_PAREN, RT_CLOSE_PAREN);
@@ -32901,6 +32911,41 @@ cp_parser_cilk_grainsize (cp_parser *par
   cp_parser_skip_to_pragma_eol (parser, pragma_tok);
 }
 
+static bool
+cp_parser_pragma_ivdep (cp_parser *parser, cp_token *pragma_tok)
+{
+  cp_parser_skip_to_pragma_eol (parser, pragma_tok);
+  return true;
+}
+
+static unsigned short
+cp_parser_pragma_unroll (cp_parser *parser, cp_token *pragma_tok)
+{
+  location_t location = cp_lexer_peek_token (parser->lexer)->location;
+  tree expr = cp_parser_constant_expression (parser);
+  unsigned short unroll;
+  expr = maybe_constant_value (expr);
+  cp_parser_require_pragma_eol (parser, pragma_tok);
+  HOST_WIDE_INT lunroll = 0;
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (expr))
+      || TREE_CODE (expr) != INTEGER_CST
+      || (lunroll = tree_to_shwi (expr)) < 0
+      || lunroll > USHRT_MAX)
+    {
+      error_at (location, "%<#pragma GCC unroll%> requires an"
+		" assignment-expression that evaluates to a non-negative"
+		" integral constant less than or equal to %u", USHRT_MAX);
+      unroll = 0;
+    }
+  else
+    {
+      unroll = (unsigned short) lunroll;
+      if (unroll == 0)
+	unroll = 1;
+    }
+  return unroll;
+}
+
 /* Normal parsing of a pragma token.  Here we can (and must) use the
    regular lexer.  */
 
@@ -33068,9 +33113,39 @@ cp_parser_pragma (cp_parser *parser, enu
 
     case PRAGMA_IVDEP:
       {
-	cp_parser_skip_to_pragma_eol (parser, pragma_tok);
+	bool ivdep = cp_parser_pragma_ivdep (parser, pragma_tok);
+	unsigned short unroll = 0;
 	cp_token *tok;
 	tok = cp_lexer_peek_token (the_parser->lexer);
+	if (tok->type == CPP_PRAGMA &&
+	    tok->pragma_kind == PRAGMA_UNROLL)
+	  {
+	    unroll = cp_parser_pragma_unroll (parser, pragma_tok);
+	    tok = cp_lexer_peek_token (the_parser->lexer);
+	  }
+	if (tok->type != CPP_KEYWORD
+	    || (tok->keyword != RID_FOR && tok->keyword != RID_WHILE
+		&& tok->keyword != RID_DO))
+	  {
+	    cp_parser_error (parser, "for, while or do statement expected");
+	    return false;
+	  }
+	cp_parser_iteration_statement (parser, ivdep, unroll);
+	return true;
+      }
+
+    case PRAGMA_UNROLL:
+      {
+	unsigned short unroll = cp_parser_pragma_unroll (parser, pragma_tok);
+	bool ivdep = false;
+	cp_token *tok;
+	tok = cp_lexer_peek_token (the_parser->lexer);
+	if (tok->type == CPP_PRAGMA &&
+	    tok->pragma_kind == PRAGMA_IVDEP)
+	  {
+	    ivdep = cp_parser_pragma_ivdep (parser, tok);
+	    tok = cp_lexer_peek_token (the_parser->lexer);
+	  }
 	if (tok->type != CPP_KEYWORD
 	    || (tok->keyword != RID_FOR && tok->keyword != RID_WHILE
 		&& tok->keyword != RID_DO))
@@ -33078,7 +33153,7 @@ cp_parser_pragma (cp_parser *parser, enu
 	    cp_parser_error (parser, "for, while or do statement expected");
 	    return false;
 	  }
-	cp_parser_iteration_statement (parser, true);
+	cp_parser_iteration_statement (parser, ivdep, unroll);
 	return true;
       }
 
Index: cp/pt.c
===================================================================
--- cp/pt.c	(revision 220084)
+++ cp/pt.c	(working copy)
@@ -13886,7 +13886,7 @@ tsubst_expr (tree t, tree args, tsubst_f
       RECUR (FOR_INIT_STMT (t));
       finish_for_init_stmt (stmt);
       tmp = RECUR (FOR_COND (t));
-      finish_for_cond (tmp, stmt, false);
+      finish_for_cond (tmp, stmt, false, 0);
       tmp = RECUR (FOR_EXPR (t));
       finish_for_expr (tmp, stmt);
       RECUR (FOR_BODY (t));
@@ -13901,7 +13901,7 @@ tsubst_expr (tree t, tree args, tsubst_f
         decl = tsubst (decl, args, complain, in_decl);
         maybe_push_decl (decl);
         expr = RECUR (RANGE_FOR_EXPR (t));
-        stmt = cp_convert_range_for (stmt, decl, expr, RANGE_FOR_IVDEP (t));
+        stmt = cp_convert_range_for (stmt, decl, expr, RANGE_FOR_IVDEP (t), 0);
         RECUR (RANGE_FOR_BODY (t));
         finish_for_stmt (stmt);
       }
@@ -13910,7 +13910,7 @@ tsubst_expr (tree t, tree args, tsubst_f
     case WHILE_STMT:
       stmt = begin_while_stmt ();
       tmp = RECUR (WHILE_COND (t));
-      finish_while_stmt_cond (tmp, stmt, false);
+      finish_while_stmt_cond (tmp, stmt, false, 0);
       RECUR (WHILE_BODY (t));
       finish_while_stmt (stmt);
       break;
@@ -13920,7 +13920,7 @@ tsubst_expr (tree t, tree args, tsubst_f
       RECUR (DO_BODY (t));
       finish_do_body (stmt);
       tmp = RECUR (DO_COND (t));
-      finish_do_stmt (tmp, stmt, false);
+      finish_do_stmt (tmp, stmt, false, 0);
       break;
 
     case IF_STMT:
@@ -14358,8 +14358,10 @@ tsubst_expr (tree t, tree args, tsubst_f
 
     case ANNOTATE_EXPR:
       tmp = RECUR (TREE_OPERAND (t, 0));
-      RETURN (build2_loc (EXPR_LOCATION (t), ANNOTATE_EXPR,
-			  TREE_TYPE (tmp), tmp, RECUR (TREE_OPERAND (t, 1))));
+      RETURN (build3_loc (EXPR_LOCATION (t), ANNOTATE_EXPR,
+			  TREE_TYPE (tmp), tmp,
+			  RECUR (TREE_OPERAND (t, 1)),
+			  RECUR (TREE_OPERAND (t, 2))));
 
     default:
       gcc_assert (!STATEMENT_CODE_P (TREE_CODE (t)));
Index: cp/semantics.c
===================================================================
--- cp/semantics.c	(revision 220084)
+++ cp/semantics.c	(working copy)
@@ -802,7 +802,8 @@ begin_while_stmt (void)
    WHILE_STMT.  */
 
 void
-finish_while_stmt_cond (tree cond, tree while_stmt, bool ivdep)
+finish_while_stmt_cond (tree cond, tree while_stmt, bool ivdep,
+			unsigned short unroll)
 {
   if (check_no_cilk (cond,
       "Cilk array notation cannot be used as a condition for while statement",
@@ -812,11 +813,19 @@ finish_while_stmt_cond (tree cond, tree
   finish_cond (&WHILE_COND (while_stmt), cond);
   begin_maybe_infinite_loop (cond);
   if (ivdep && cond != error_mark_node)
-    WHILE_COND (while_stmt) = build2 (ANNOTATE_EXPR,
+    WHILE_COND (while_stmt) = build3 (ANNOTATE_EXPR,
 				      TREE_TYPE (WHILE_COND (while_stmt)),
 				      WHILE_COND (while_stmt),
 				      build_int_cst (integer_type_node,
-						     annot_expr_ivdep_kind));
+						     annot_expr_ivdep_kind),
+				      NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    WHILE_COND (while_stmt) = build3 (ANNOTATE_EXPR,
+				      TREE_TYPE (WHILE_COND (while_stmt)),
+				      WHILE_COND (while_stmt),
+				      build_int_cst (integer_type_node,
+						     annot_expr_unroll_kind),
+				      build_int_cst (integer_type_node, unroll));
   simplify_loop_decl_cond (&WHILE_COND (while_stmt), WHILE_BODY (while_stmt));
 }
 
@@ -861,7 +870,7 @@ finish_do_body (tree do_stmt)
    COND is as indicated.  */
 
 void
-finish_do_stmt (tree cond, tree do_stmt, bool ivdep)
+finish_do_stmt (tree cond, tree do_stmt, bool ivdep, unsigned short unroll)
 {
   if (check_no_cilk (cond,
   "Cilk array notation cannot be used as a condition for a do-while statement",
@@ -870,8 +879,13 @@ finish_do_stmt (tree cond, tree do_stmt,
   cond = maybe_convert_cond (cond);
   end_maybe_infinite_loop (cond);
   if (ivdep && cond != error_mark_node)
-    cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
-		   build_int_cst (integer_type_node, annot_expr_ivdep_kind));
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+		   build_int_cst (integer_type_node, annot_expr_ivdep_kind),
+		   NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+		   build_int_cst (integer_type_node, annot_expr_unroll_kind),
+		   build_int_cst (integer_type_node, unroll));
   DO_COND (do_stmt) = cond;
 }
 
@@ -974,7 +988,7 @@ finish_for_init_stmt (tree for_stmt)
    FOR_STMT.  */
 
 void
-finish_for_cond (tree cond, tree for_stmt, bool ivdep)
+finish_for_cond (tree cond, tree for_stmt, bool ivdep, unsigned short unroll)
 {
   if (check_no_cilk (cond,
 	 "Cilk array notation cannot be used in a condition for a for-loop",
@@ -984,11 +998,20 @@ finish_for_cond (tree cond, tree for_stm
   finish_cond (&FOR_COND (for_stmt), cond);
   begin_maybe_infinite_loop (cond);
   if (ivdep && cond != error_mark_node)
-    FOR_COND (for_stmt) = build2 (ANNOTATE_EXPR,
+    FOR_COND (for_stmt) = build3 (ANNOTATE_EXPR,
 				  TREE_TYPE (FOR_COND (for_stmt)),
 				  FOR_COND (for_stmt),
 				  build_int_cst (integer_type_node,
-						 annot_expr_ivdep_kind));
+						 annot_expr_ivdep_kind),
+				  NULL_TREE);
+  if (unroll && cond != error_mark_node)
+    FOR_COND (for_stmt) = build3 (ANNOTATE_EXPR,
+				  TREE_TYPE (FOR_COND (for_stmt)),
+				  FOR_COND (for_stmt),
+				  build_int_cst (integer_type_node,
+						 annot_expr_unroll_kind),
+				  build_int_cst (integer_type_node,
+						 unroll));
   simplify_loop_decl_cond (&FOR_COND (for_stmt), FOR_BODY (for_stmt));
 }
 
Index: doc/extend.texi
===================================================================
--- doc/extend.texi	(revision 220084)
+++ doc/extend.texi	(working copy)
@@ -17881,6 +17881,18 @@ void ignore_vec_dep (int *a, int k, int
 @}
 @end smallexample
 
+@table @code
+@item #pragma GCC unroll @var{n}
+@cindex pragma GCC unroll @var{n}
+
+With this pragma, the programmer informs the optimizer how many times
+a loop should be unrolled.  A 0 or 1 informs the compiler to not
+perform any loop unrolling.  The pragma must be immediately before
+@samp{#pragma ivdep} or a @code{for}, @code{while} or @code{do} loop
+and applies only to the loop that follows.  @var{n} is an
+assignment-expression that evaluates to an integer constant.
+
+@end table
 
 @node Unnamed Fields
 @section Unnamed struct/union fields within structs/unions
Index: fortran/trans-stmt.c
===================================================================
--- fortran/trans-stmt.c	(revision 220084)
+++ fortran/trans-stmt.c	(working copy)
@@ -2876,9 +2876,10 @@ gfc_trans_forall_loop (forall_info *fora
       cond = fold_build2_loc (input_location, LE_EXPR, boolean_type_node,
 			      count, build_int_cst (TREE_TYPE (count), 0));
       if (forall_tmp->do_concurrent)
-	cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+	cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
 		       build_int_cst (integer_type_node,
-				      annot_expr_ivdep_kind));
+				      annot_expr_ivdep_kind),
+		       NULL_TREE);
 
       tmp = build1_v (GOTO_EXPR, exit_label);
       tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node,
Index: function.h
===================================================================
--- function.h	(revision 220084)
+++ function.h	(working copy)
@@ -670,6 +670,10 @@ struct GTY(()) function {
 
   /* Set when the tail call has been identified.  */
   unsigned int tail_call_marked : 1;
+
+  /* Set when #pragma unroll has been used in the body.  Used by rtl
+     unrolling to know when to perform unrolling in the function.  */
+  unsigned int has_unroll : 1;
 };
 
 /* Add the decl D to the local_decls list of FUN.  */
Index: gimple-low.c
===================================================================
--- gimple-low.c	(revision 220084)
+++ gimple-low.c	(working copy)
@@ -347,7 +347,7 @@ lower_stmt (gimple_stmt_iterator *gsi, s
 	for (i = 0; i < gimple_call_num_args (stmt); i++)
 	  {
 	    tree arg = gimple_call_arg (stmt, i);
-	    if (EXPR_P (arg))
+	    if (arg && EXPR_P (arg))
 	      TREE_SET_BLOCK (arg, data->block);
 	  }
 
Index: gimple-walk.c
===================================================================
--- gimple-walk.c	(revision 220084)
+++ gimple-walk.c	(working copy)
@@ -261,7 +261,7 @@ walk_gimple_op (gimple stmt, walk_tree_f
 
       for (i = 0; i < gimple_call_num_args (stmt); i++)
 	{
-	  if (wi)
+	  if (wi && gimple_call_arg (stmt, i))
 	    wi->val_only
 	      = is_gimple_reg_type (TREE_TYPE (gimple_call_arg (stmt, i)));
 	  ret = walk_tree (gimple_call_arg_ptr (stmt, i), callback_op, wi,
Index: gimplify.c
===================================================================
--- gimplify.c	(revision 220084)
+++ gimplify.c	(working copy)
@@ -2908,6 +2908,9 @@ gimple_boolify (tree expr)
     case ANNOTATE_EXPR:
       switch ((enum annot_expr_kind) TREE_INT_CST_LOW (TREE_OPERAND (expr, 1)))
 	{
+	case annot_expr_unroll_kind:
+	  cfun->has_unroll = 1;
+	  /* fall-through */
 	case annot_expr_ivdep_kind:
 	case annot_expr_no_vector_kind:
 	case annot_expr_vector_kind:
@@ -7947,6 +7950,7 @@ gimplify_expr (tree *expr_p, gimple_seq
 	  {
 	    tree cond = TREE_OPERAND (*expr_p, 0);
 	    tree kind = TREE_OPERAND (*expr_p, 1);
+	    tree data = TREE_OPERAND (*expr_p, 2);
 	    tree type = TREE_TYPE (cond);
 	    if (!INTEGRAL_TYPE_P (type))
 	      {
@@ -7957,7 +7961,7 @@ gimplify_expr (tree *expr_p, gimple_seq
 	    tree tmp = create_tmp_var (type);
 	    gimplify_arg (&cond, pre_p, EXPR_LOCATION (*expr_p));
 	    gcall *call
-	      = gimple_build_call_internal (IFN_ANNOTATE, 2, cond, kind);
+	      = gimple_build_call_internal (IFN_ANNOTATE, 3, cond, kind, data);
 	    gimple_call_set_lhs (call, tmp);
 	    gimplify_seq_add_stmt (pre_p, call);
 	    *expr_p = tmp;
Index: loop-init.c
===================================================================
--- loop-init.c	(revision 220084)
+++ loop-init.c	(working copy)
@@ -375,6 +375,7 @@ pass_loop2::gate (function *fun)
       && (flag_move_loop_invariants
 	  || flag_unswitch_loops
 	  || flag_unroll_loops
+	  || cfun->has_unroll
 #ifdef HAVE_doloop_end
 	  || (flag_branch_on_count_reg && HAVE_doloop_end)
 #endif
@@ -576,7 +577,8 @@ public:
   /* opt_pass methods: */
   virtual bool gate (function *)
     {
-      return (flag_peel_loops || flag_unroll_loops || flag_unroll_all_loops);
+      return (flag_peel_loops || flag_unroll_loops || flag_unroll_all_loops
+	      || cfun->has_unroll);
     }
 
   virtual unsigned int execute (function *);
Index: loop-unroll.c
===================================================================
--- loop-unroll.c	(revision 220084)
+++ loop-unroll.c	(working copy)
@@ -243,16 +243,26 @@ report_unroll (struct loop *loop, locati
 
 /* Decide whether unroll loops and how much.  */
 static void
-decide_unrolling (int flags)
+decide_unrolling (int base_flags)
 {
   struct loop *loop;
 
   /* Scan the loops, inner ones first.  */
   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
     {
+      int flags = base_flags;
+      if (loop->unroll > 1)
+	flags = UAP_UNROLL | UAP_UNROLL_ALL;
       loop->lpt_decision.decision = LPT_NONE;
       location_t locus = get_loop_location (loop);
 
+      if (loop->unroll == 1)
+	{
+	  dump_printf_loc (TDF_RTL, locus,
+			   "not unrolling loop, user didn't want it unrolled\n");
+	  continue;
+	}
+
       if (dump_enabled_p ())
 	dump_printf_loc (TDF_RTL, locus,
                          ";; *** Considering loop %d at BB %d for "
@@ -422,6 +432,19 @@ decide_unroll_constant_iterations (struc
       return;
     }
 
+  if (loop->unroll)
+    {
+      loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
+      loop->lpt_decision.times = loop->unroll - 1;
+      if (loop->lpt_decision.times > desc->niter - 2)
+	{
+	  /* They won't do this for us.  */
+	  loop->lpt_decision.decision = LPT_NONE;
+	  loop->lpt_decision.times = desc->niter - 2;
+	}
+      return;
+    }
+
   /* Check whether the loop rolls enough to consider.  
      Consult also loop bounds and profile; in the case the loop has more
      than one exit it may well loop less than determined maximal number
@@ -443,7 +466,7 @@ decide_unroll_constant_iterations (struc
   best_copies = 2 * nunroll + 10;
 
   i = 2 * nunroll + 2;
-  if (i - 1 >= desc->niter)
+  if (i > desc->niter - 2)
     i = desc->niter - 2;
 
   for (; i >= nunroll - 1; i--)
@@ -695,6 +718,9 @@ decide_unroll_runtime_iterations (struct
   if (targetm.loop_unroll_adjust)
     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 
+  if (loop->unroll)
+    nunroll = loop->unroll;
+
   /* Skip big loops.  */
   if (nunroll <= 1)
     {
@@ -733,8 +759,9 @@ decide_unroll_runtime_iterations (struct
       return;
     }
 
-  /* Success; now force nunroll to be power of 2, as we are unable to
-     cope with overflows in computation of number of iterations.  */
+  /* Success; now force nunroll to be power of 2, as code-gen
+     requires it, we are unable to cope with overflows in
+     computation of number of iterations.  */
   for (i = 1; 2 * i <= nunroll; i *= 2)
     continue;
 
@@ -843,9 +870,10 @@ compare_and_jump_seq (rtx op0, rtx op1,
   return seq;
 }
 
-/* Unroll LOOP for which we are able to count number of iterations in runtime
-   LOOP->LPT_DECISION.TIMES times.  The transformation does this (with some
-   extra care for case n < 0):
+/* Unroll LOOP for which we are able to count number of iterations in
+   runtime LOOP->LPT_DECISION.TIMES times.  The times value must be a
+   power of two.  The transformation does this (with some extra care
+   for case n < 0):
 
    for (i = 0; i < n; i++)
      body;
@@ -1142,6 +1170,9 @@ decide_unroll_stupid (struct loop *loop,
   if (targetm.loop_unroll_adjust)
     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 
+  if (loop->unroll)
+    nunroll = loop->unroll;
+
   /* Skip big loops.  */
   if (nunroll <= 1)
     {
Index: lto-streamer-in.c
===================================================================
--- lto-streamer-in.c	(revision 220084)
+++ lto-streamer-in.c	(working copy)
@@ -751,6 +751,7 @@ input_cfg (struct lto_input_block *ib, s
 
       /* Read OMP SIMD related info.  */
       loop->safelen = streamer_read_hwi (ib);
+      loop->unroll = streamer_read_hwi (ib);
       loop->dont_vectorize = streamer_read_hwi (ib);
       loop->force_vectorize = streamer_read_hwi (ib);
       loop->simduid = stream_read_tree (ib, data_in);
Index: lto-streamer-out.c
===================================================================
--- lto-streamer-out.c	(revision 220084)
+++ lto-streamer-out.c	(working copy)
@@ -1884,6 +1884,7 @@ output_cfg (struct output_block *ob, str
 
       /* Write OMP SIMD related info.  */
       streamer_write_hwi (ob, loop->safelen);
+      streamer_write_hwi (ob, loop->unroll);
       streamer_write_hwi (ob, loop->dont_vectorize);
       streamer_write_hwi (ob, loop->force_vectorize);
       stream_write_tree (ob, loop->simduid, true);
Index: testsuite/c-c++-common/unroll-1.c
===================================================================
--- testsuite/c-c++-common/unroll-1.c	(revision 0)
+++ testsuite/c-c++-common/unroll-1.c	(working copy)
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdisable-tree-cunroll -fdump-rtl-loop2_unroll -fdump-tree-cunrolli-details" } */
+
+void bar(int);
+
+int j;
+
+void test1()
+{
+  unsigned long m = j;
+  unsigned long i;
+
+  /* { dg-final { scan-tree-dump "loop with 9 iterations completely unrolled" "cunrolli" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "21:\(5|11\): note: loop unrolled 7 times" "loop2_unroll" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= j; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "26:\(5|11\): note: loop unrolled 3 times" "loop2_unroll" } } */
+  #pragma GCC unroll 7
+  for (unsigned long i = 1; i <= j; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "3\[31\]:3: note: loop unrolled 2 times" "loop2_unroll" } } */
+  i = 0;
+  #pragma GCC unroll 3
+  do {
+    bar(i);
+  } while (++i < 9);
+
+  #pragma GCC unroll 4+4
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+}
+
+/* { dg-final { cleanup-rtl-dump "loop2_unroll" } } */
Index: testsuite/c-c++-common/unroll-2.c
===================================================================
--- testsuite/c-c++-common/unroll-2.c	(revision 0)
+++ testsuite/c-c++-common/unroll-2.c	(working copy)
@@ -0,0 +1,68 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-loop2_unroll -fdump-tree-cunrolli-details" } */
+
+void bar(int);
+
+int j;
+
+void test1()
+{
+  unsigned long m = j;
+  unsigned long i;
+
+  /* { dg-final { scan-tree-dump "15:\[0-9\]*: note: loop turned into non-loop; it never loops"  "cunrolli" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "\(19|21\):\(5|11\): note: loop unrolled 7 times" "loop2_unroll" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= j; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "26:\[0-9\]*: note: loop unrolled 3 times" "loop2_unroll" } } */
+  #pragma GCC unroll 7
+  for (unsigned long i = 1; i <= j; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "3\[13\]:\[0-9\]*: note: loop unrolled 2 times" "loop2_unroll" } } */
+  i = 0;
+  #pragma GCC unroll 3
+  do {
+    bar(i);
+  } while (++i < 9);
+}
+
+void test2 () {
+  unsigned long m = j;
+  unsigned long i;
+
+  /* { dg-final { scan-tree-dump "\[424\]:\[0-9\]*: note: loop turned into non-loop; it never loops" "cunrolli" } } */
+  #pragma GCC unroll 8
+  for (unsigned long i = 1; i <= 7; ++i)
+    bar(i);
+
+  /* { dg-final { scan-tree-dump "4\[79\]:\[0-9\]*: note: loop turned into non-loop; it never loops" "cunrolli" } } */
+  #pragma GCC unroll 9
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "52:\[0-9\]*: note: loop unrolled 3 times" "loop2_unroll" } } */
+  #pragma GCC unroll 4
+  for (unsigned long i = 1; i <= 15; ++i)
+    bar(i);
+
+  /* { dg-final { scan-tree-dump "5\[79\]:\[0-9\]*: note: loop turned into non-loop; it never loops" "cunrolli" } } */
+  #pragma GCC unroll 709
+  for (unsigned long i = 1; i <= 709; ++i)
+    bar(i);
+
+  /* { dg-final { scan-tree-dump "6\[24\]:\[0-9\]*: note: not unrolling loop, user didn't want it unrolled completely" "cunrolli" } } */
+  #pragma GCC unroll 0
+  for (unsigned long i = 1; i <= 3; ++i)
+    bar(i);
+}
+
+
+/* { dg-final { cleanup-tree-dump "cunrolli" } } */
+/* { dg-final { cleanup-rtl-dump "loop2_unroll" } } */
Index: testsuite/c-c++-common/unroll-3.c
===================================================================
--- testsuite/c-c++-common/unroll-3.c	(revision 0)
+++ testsuite/c-c++-common/unroll-3.c	(working copy)
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -funroll-all-loops -fdump-rtl-loop2_unroll -fdump-tree-cunrolli-details" } */
+
+void bar(int);
+
+int j;
+
+void test1()
+{
+  unsigned long m = j;
+  unsigned long i;
+
+  /* { dg-final { scan-tree-dump "16:\[0-9\]*: note: not unrolling loop, user didn't want it unrolled completely" "cunrolli" } } */
+  /* { dg-final { scan-rtl-dump "16:\[0-9\]*: note: not unrolling loop, user didn't want it unrolled" "loop2_unroll" } } */
+  #pragma GCC unroll 0
+  for (unsigned long i = 1; i <= 3; ++i)
+    bar(i);
+
+  /* { dg-final { scan-rtl-dump "21:\[0-9\]*: note: not unrolling loop, user didn't want it unrolled" "loop2_unroll" } } */
+  #pragma GCC unroll 0
+  for (unsigned long i = 1; i <= m; ++i)
+    bar(i);
+}
+
+/* { dg-final { cleanup-tree-dump "cunrolli" } } */
+/* { dg-final { cleanup-rtl-dump "loop2_unroll" } } */
Index: testsuite/c-c++-common/unroll-4.c
===================================================================
--- testsuite/c-c++-common/unroll-4.c	(revision 0)
+++ testsuite/c-c++-common/unroll-4.c	(working copy)
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdisable-tree-cunroll" } */
+
+void bar(int);
+
+int j;
+
+void test1() {
+  unsigned long m = j;
+  unsigned long i;
+
+  #pragma GCC unroll 20000000000	/* { dg-error "requires an assignment-expression that evaluates to a non-negative integral constant less than or equal to" } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  #pragma GCC unroll i	/* { dg-error "requires an assignment-expression that evaluates to a non-negative integral constant less than or equal to" } */
+  /* { dg-error "cannot appear in a constant-expression|is not usable in a constant expression" "" { target c++ } 16 } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  #pragma GCC unroll n	/* { dg-error "requires an assignment-expression that evaluates to a non-negative integral constant less than or equal to" } */
+  /* { dg-error "declared" "" { target *-*-* } 21 } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  #pragma GCC unroll 1+i	/* { dg-error "requires an assignment-expression that evaluates to a non-negative integral constant less than or equal to" } */
+  /* { dg-error "cannot appear in a constant-expression|is not usable in a constant expression" "" { target c++ } 26 } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  #pragma GCC unroll  4,4		/* { dg-error "expected end of line before" } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+
+  #pragma GCC unroll  4.2	/* { dg-error "requires an assignment-expression that evaluates to a non-negative integral constant less than or equal to" } */
+  for (unsigned long i = 1; i <= 8; ++i)
+    bar(i);
+}
Index: tree-cfg.c
===================================================================
--- tree-cfg.c	(revision 220084)
+++ tree-cfg.c	(working copy)
@@ -316,6 +316,10 @@ replace_loop_annotate_in_block (basic_bl
 	  loop->force_vectorize = true;
 	  cfun->has_force_vectorize_loops = true;
 	  break;
+	case annot_expr_unroll_kind:
+	  loop->unroll = (unsigned short)tree_to_shwi (gimple_call_arg (stmt,
+									2));
+	  break;
 	default:
 	  gcc_unreachable ();
 	}
@@ -365,6 +369,7 @@ replace_loop_annotate (void)
 	    case annot_expr_ivdep_kind:
 	    case annot_expr_no_vector_kind:
 	    case annot_expr_vector_kind:
+	    case annot_expr_unroll_kind:
 	      break;
 	    default:
 	      gcc_unreachable ();
@@ -3385,6 +3390,8 @@ verify_gimple_call (gcall *stmt)
   for (i = 0; i < gimple_call_num_args (stmt); ++i)
     {
       tree arg = gimple_call_arg (stmt, i);
+      if (! arg)
+	continue;
       if ((is_gimple_reg_type (TREE_TYPE (arg))
 	   && !is_gimple_val (arg))
 	  || (!is_gimple_reg_type (TREE_TYPE (arg))
@@ -7512,6 +7519,8 @@ print_loop (FILE *file, struct loop *loo
       fprintf (file, ", estimate = ");
       print_decu (loop->nb_iterations_estimate, file);
     }
+  if (loop->unroll)
+    fprintf (file, ", unroll = %d", loop->unroll);
   fprintf (file, ")\n");
 
   /* Print loop's body.  */
Index: tree-core.h
===================================================================
--- tree-core.h	(revision 220084)
+++ tree-core.h	(working copy)
@@ -725,6 +725,7 @@ enum annot_expr_kind {
   annot_expr_ivdep_kind,
   annot_expr_no_vector_kind,
   annot_expr_vector_kind,
+  annot_expr_unroll_kind,
   annot_expr_kind_last
 };
 
Index: tree-pretty-print.c
===================================================================
--- tree-pretty-print.c	(revision 220084)
+++ tree-pretty-print.c	(working copy)
@@ -2313,6 +2313,10 @@ dump_generic_node (pretty_printer *pp, t
 	case annot_expr_vector_kind:
 	  pp_string (pp, ", vector");
 	  break;
+	case annot_expr_unroll_kind:
+	  pp_printf (pp, ", unroll %d",
+		     (int)TREE_INT_CST_LOW (TREE_OPERAND (node, 2)));
+	  break;
 	default:
 	  gcc_unreachable ();
 	}
Index: tree-ssa-loop-ivcanon.c
===================================================================
--- tree-ssa-loop-ivcanon.c	(revision 220084)
+++ tree-ssa-loop-ivcanon.c	(working copy)
@@ -686,8 +686,7 @@ try_unroll_loop_completely (struct loop
 			    HOST_WIDE_INT maxiter,
 			    location_t locus)
 {
-  unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns;
-  struct loop_size size;
+  unsigned HOST_WIDE_INT n_unroll = 0;
   bool n_unroll_found = false;
   edge edge_to_cancel = NULL;
   int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
@@ -731,7 +730,8 @@ try_unroll_loop_completely (struct loop
   if (!n_unroll_found)
     return false;
 
-  if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
+  if (loop->unroll == 0 &&
+      n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "Not unrolling loop %d "
@@ -753,107 +753,130 @@ try_unroll_loop_completely (struct loop
       if (ul == UL_SINGLE_ITER)
 	return false;
 
-      large = tree_estimate_loop_size
-		 (loop, exit, edge_to_cancel, &size,
-		  PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
-      ninsns = size.overall;
-      if (large)
+      if (loop->unroll)
 	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
-		     loop->num);
-	  return false;
+	  /* If they wanted to unroll more than we want, don't unroll
+	     it completely.  */
+	  if (n_unroll > (unsigned)loop->unroll)
+	    {
+	      dump_printf_loc (report_flags, locus,
+	        "not unrolling loop, "
+		"user didn't want it unrolled completely.\n");
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file,
+		  "Not unrolling loop %d: "
+		  "user didn't want it unrolled completely.\n",
+			 loop->num);
+	      return false;
+	    }
 	}
-
-      unr_insns = estimated_unrolled_size (&size, n_unroll);
-      if (dump_file && (dump_flags & TDF_DETAILS))
+      else
 	{
-	  fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
-	  fprintf (dump_file, "  Estimated size after unrolling: %d\n",
-		   (int) unr_insns);
-	}
+	  struct loop_size size;
+	  large = tree_estimate_loop_size
+	            (loop, exit, edge_to_cancel, &size,
+		     PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
+	  unsigned HOST_WIDE_INT ninsns = size.overall;
+	  if (large)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
+			 loop->num);
+	      return false;
+	    }
 
-      /* If the code is going to shrink, we don't need to be extra cautious
-	 on guessing if the unrolling is going to be profitable.  */
-      if (unr_insns
-	  /* If there is IV variable that will become constant, we save
-	     one instruction in the loop prologue we do not account
-	     otherwise.  */
-	  <= ninsns + (size.constant_iv != false))
-	;
-      /* We unroll only inner loops, because we do not consider it profitable
-	 otheriwse.  We still can cancel loopback edge of not rolling loop;
-	 this is always a good idea.  */
-      else if (ul == UL_NO_GROWTH)
-	{
+	  unsigned HOST_WIDE_INT unr_insns
+	    = estimated_unrolled_size (&size, n_unroll);
 	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
-		     loop->num);
-	  return false;
-	}
-      /* Outer loops tend to be less interesting candidates for complete
-	 unrolling unless we can do a lot of propagation into the inner loop
-	 body.  For now we disable outer loop unrolling when the code would
-	 grow.  */
-      else if (loop->inner)
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     "it is not innermost and code would grow.\n",
-		     loop->num);
-	  return false;
-	}
-      /* If there is call on a hot path through the loop, then
-	 there is most probably not much to optimize.  */
-      else if (size.num_non_pure_calls_on_hot_path)
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     "contains call and code would grow.\n",
-		     loop->num);
-	  return false;
-	}
-      /* If there is pure/const call in the function, then we
-	 can still optimize the unrolled loop body if it contains
-	 some other interesting code than the calls and code
-	 storing or cumulating the return value.  */
-      else if (size.num_pure_calls_on_hot_path
-	       /* One IV increment, one test, one ivtmp store
-		  and one useful stmt.  That is about minimal loop
-		  doing pure call.  */
-	       && (size.non_call_stmts_on_hot_path
-		   <= 3 + size.num_pure_calls_on_hot_path))
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     "contains just pure calls and code would grow.\n",
-		     loop->num);
-	  return false;
-	}
-      /* Complette unrolling is major win when control flow is removed and
-	 one big basic block is created.  If the loop contains control flow
-	 the optimization may still be a win because of eliminating the loop
-	 overhead but it also may blow the branch predictor tables.
-	 Limit number of branches on the hot path through the peeled
-	 sequence.  */
-      else if (size.num_branches_on_hot_path * (int)n_unroll
-	       > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     " number of branches on hot path in the unrolled sequence"
-		     " reach --param max-peel-branches limit.\n",
-		     loop->num);
-	  return false;
-	}
-      else if (unr_insns
-	       > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
-	{
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d: "
-		     "(--param max-completely-peeled-insns limit reached).\n",
-		     loop->num);
-	  return false;
+	    {
+	      fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
+	      fprintf (dump_file, "  Estimated size after unrolling: %d\n",
+		       (int) unr_insns);
+	    }
+
+	  /* If the code is going to shrink, we don't need to be extra
+	     cautious on guessing if the unrolling is going to be
+	     profitable.  */
+	  if (unr_insns
+	      /* If there is IV variable that will become constant, we
+		 save one instruction in the loop prologue we do not
+		 account otherwise.  */
+	      <= ninsns + (size.constant_iv != false))
+	    ;
+	  /* We unroll only inner loops, because we do not consider it
+	     profitable otherwise.  We still can cancel loopback edge
+	     of not rolling loop; this is always a good idea.  */
+	  else if (ul == UL_NO_GROWTH)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
+			 loop->num);
+	      return false;
+	    }
+	  /* Outer loops tend to be less interesting candidates for
+	     complete unrolling unless we can do a lot of propagation
+	     into the inner loop body.  For now we disable outer loop
+	     unrolling when the code would grow.  */
+	  else if (loop->inner)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "it is not innermost and code would grow.\n",
+			 loop->num);
+	      return false;
+	    }
+	  /* If there is call on a hot path through the loop, then
+	     there is most probably not much to optimize.  */
+	  else if (size.num_non_pure_calls_on_hot_path)
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "contains call and code would grow.\n",
+			 loop->num);
+	      return false;
+	    }
+	  /* If there is pure/const call in the function, then we can
+	     still optimize the unrolled loop body if it contains some
+	     other interesting code than the calls and code storing or
+	     cumulating the return value.  */
+	  else if (size.num_pure_calls_on_hot_path
+		   /* One IV increment, one test, one ivtmp store and
+		      one useful stmt.  That is about minimal loop
+		      doing pure call.  */
+		   && (size.non_call_stmts_on_hot_path
+		       <= 3 + size.num_pure_calls_on_hot_path))
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "contains just pure calls and code would grow.\n",
+			 loop->num);
+	      return false;
+	    }
+	  /* Complete unrolling is major win when control flow is
+	     removed and one big basic block is created.  If the loop
+	     contains control flow the optimization may still be a win
+	     because of eliminating the loop overhead but it also may
+	     blow the branch predictor tables.  Limit number of
+	     branches on the hot path through the peeled sequence.  */
+	  else if (size.num_branches_on_hot_path * (int)n_unroll
+		   > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 " number of branches on hot path in the unrolled sequence"
+			 " reach --param max-peel-branches limit.\n",
+			 loop->num);
+	      return false;
+	    }
+	  else if (unr_insns
+		   > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
+	    {
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not unrolling loop %d: "
+			 "(--param max-completely-peeled-insns limit reached).\n",
+			 loop->num);
+	      return false;
+	    }
 	}
       dump_printf_loc (report_flags, locus,
                        "loop turned into non-loop; it never loops.\n");
@@ -897,8 +920,9 @@ try_unroll_loop_completely (struct loop
       else
 	gimple_cond_make_true (cond);
       update_stmt (cond);
-      /* Do not remove the path. Doing so may remove outer loop
-	 and confuse bookkeeping code in tree_unroll_loops_completelly.  */
+      /* Do not remove the path. Doing so may remove outer loop and
+	 confuse bookkeeping code in
+	 tree_unroll_loops_completelly.  */
     }
 
   /* Store the loop for later unlooping and exit removal.  */
@@ -974,23 +998,33 @@ try_peel_loop (struct loop *loop,
   if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0)
     return false;
 
+  /* We don't peel loops that will be unrolled as this can duplicate a
+     loop more times than the user requested.  */
+  if (loop->unroll)
+    {
+      if (dump_file)
+        fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
+      return false;
+    }
+
   /* Peel only innermost loops.  */
   if (loop->inner)
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: outer loop\n");
+	fprintf (dump_file, "Not peeling: outer loop\n");
       return false;
     }
 
   if (!optimize_loop_for_speed_p (loop))
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: cold loop\n");
+	fprintf (dump_file, "Not peeling: cold loop\n");
       return false;
     }
 
   /* Check if there is an estimate on the number of iterations.  */
   npeel = estimated_loop_iterations_int (loop);
+
   if (npeel < 0)
     {
       if (dump_file)
@@ -998,10 +1032,11 @@ try_peel_loop (struct loop *loop,
 	         "estimated\n");
       return false;
     }
+
   if (maxiter >= 0 && maxiter <= npeel)
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: upper bound is known so can "
+	fprintf (dump_file, "Not peeling: upper bound is known so can "
 		 "unroll completely\n");
       return false;
     }
@@ -1012,7 +1047,7 @@ try_peel_loop (struct loop *loop,
   if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: rolls too much "
+	fprintf (dump_file, "Not peeling: rolls too much "
 		 "(%i + 1 > --param max-peel-times)\n", npeel);
       return false;
     }
@@ -1025,7 +1060,7 @@ try_peel_loop (struct loop *loop,
       > PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
     {
       if (dump_file)
-        fprintf (dump_file, "Not peeling: peeled sequence size is too large "
+	fprintf (dump_file, "Not peeling: peeled sequence size is too large "
 		 "(%i insns > --param max-peel-insns)", peeled_size);
       return false;
     }
@@ -1302,7 +1337,9 @@ tree_unroll_loops_completely_1 (bool may
   if (!loop_father)
     return false;
 
-  if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
+  if (loop->unroll > 1)
+    ul = UL_ALL;
+  else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
       /* Unroll outermost loops only if asked to do so or they do
 	 not cause code growth.  */
       && (unroll_outer || loop_outer (loop_father)))
@@ -1539,7 +1576,9 @@ public:
   {}
 
   /* opt_pass methods: */
-  virtual bool gate (function *) { return optimize >= 2; }
+  virtual bool gate (function *) {
+    return optimize >= 2 || cfun->has_unroll;
+  }
   virtual unsigned int execute (function *);
 
 }; // class pass_complete_unrolli
Index: tree.def
===================================================================
--- tree.def	(revision 220084)
+++ tree.def	(working copy)
@@ -1365,8 +1365,9 @@ DEFTREECODE (TARGET_OPTION_NODE, "target
 
 /* ANNOTATE_EXPR.
    Operand 0 is the expression to be annotated.
-   Operand 1 is the annotation kind.  */
-DEFTREECODE (ANNOTATE_EXPR, "annotate_expr", tcc_expression, 2)
+   Operand 1 is the annotation kind.
+   Operand 2 is optional data.  */
+DEFTREECODE (ANNOTATE_EXPR, "annotate_expr", tcc_expression, 3)
 
 /* Cilk spawn statement
    Operand 0 is the CALL_EXPR.  */

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: #pragma GCC unroll support
  2015-03-05 23:13             ` #pragma GCC unroll support Mike Stump
@ 2015-03-06  1:32               ` Sandra Loosemore
  2015-03-06 12:37                 ` Bernhard Reutner-Fischer
  2015-03-07  1:16               ` Joseph Myers
  1 sibling, 1 reply; 25+ messages in thread
From: Sandra Loosemore @ 2015-03-06  1:32 UTC (permalink / raw)
  To: Mike Stump
  Cc: Joseph Myers, Jason Merrill, Richard Biener, gcc-patches Patches

On 03/05/2015 04:12 PM, Mike Stump wrote:
>
> Ping?
>

Just commenting on the documentation part:

> Index: doc/extend.texi
> ===================================================================
> --- doc/extend.texi	(revision 220084)
> +++ doc/extend.texi	(working copy)
> @@ -17881,6 +17881,18 @@ void ignore_vec_dep (int *a, int k, int
>  @}
>  @end smallexample
>
> +@table @code
> +@item #pragma GCC unroll @var{n}
> +@cindex pragma GCC unroll @var{n}
> +
> +With this pragma, the programmer informs the optimizer how many times
> +a loop should be unrolled.  A 0 or 1 informs the compiler to not
> +perform any loop unrolling.  The pragma must be immediately before
> +@samp{#pragma ivdep} or a @code{for}, @code{while} or @code{do} loop
> +and applies only to the loop that follows.  @var{n} is an
> +assignment-expression that evaluates to an integer constant.
> +
> +@end table
>
>  @node Unnamed Fields
>  @section Unnamed struct/union fields within structs/unions

User documentation shouldn't refer to the reader as "the programmer"; 
either use the second person "you" or the imperative.  I'd also 
rearrange the paragraph slightly to put the two sentences about the 
parameter together, something like:

Use this pragma to inform the compiler how many times a loop should be 
unrolled.  The pragma must be immediately before
@samp{#pragma ivdep} or a @code{for}, @code{while} or @code{do} loop
and applies only to the loop that follows.  @var{n} is an
assignment-expression that evaluates to an integer constant.
A 0 or 1 informs the compiler to not perform any loop unrolling.

-Sandra

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: #pragma GCC unroll support
  2015-03-06  1:32               ` Sandra Loosemore
@ 2015-03-06 12:37                 ` Bernhard Reutner-Fischer
  0 siblings, 0 replies; 25+ messages in thread
From: Bernhard Reutner-Fischer @ 2015-03-06 12:37 UTC (permalink / raw)
  To: Sandra Loosemore
  Cc: Mike Stump, Joseph Myers, Jason Merrill, Richard Biener,
	gcc-patches Patches

[-- Attachment #1: Type: text/plain, Size: 1555 bytes --]

On 6 March 2015 at 02:31, Sandra Loosemore <sandra@codesourcery.com> wrote:
> On 03/05/2015 04:12 PM, Mike Stump wrote:
>>
>>
>> Ping?
>>
>
> Just commenting on the documentation part:
[]

and a few coding style nits:

+++ b/gcc/c-family/c-pragma.c
@@ -1459,6 +1459,10 @@ init_pragma (void)
     cpp_register_deferred_pragma (parse_in, "GCC", "ivdep",
PRAGMA_IVDEP, false,
                                  false);

+  if (!flag_preprocess_only)
+    cpp_register_deferred_pragma (parse_in, "GCC", "unroll",
PRAGMA_UNROLL, false,
+                                 false);
+

overlong line (also for the IVDEP above)

+++ b/gcc/c/c-parser.c
+static void c_parser_while_statement (c_parser *, bool, unsigned short);
+static void c_parser_do_statement (c_parser *, bool, unsigned short);
+static void c_parser_for_statement (c_parser *, bool, unsigned short);

since we're now a C++ app I would have added a default for the
unsigned short unroll of = 0
Same for
finish_while_stmt_cond, finish_do_stmt, finish_for_cond et al.

In cp_parser_range_for() i take it you remember there is a //TODO
I am attaching an unroll-5.C which might show that this does not seem
to be implemented yet, IIUC

gcc/loop-unroll.c::decide_unrolling()
I'd put the "if (loop->unroll == 1) {continue}" earlier in the
FOR_EACH_LOOP body (we're C++ nowadays) but maybe our optimizers are
good enough to do that anyway (but i fear we're not up to that?).

I did not see c/c++ tests for both !DIR$ UNROLL and !DIR$ IVDEP, fwiw.
You seem to handle both placements proper, though.
cheers,

[-- Attachment #2: unroll-5.C --]
[-- Type: text/x-csrc, Size: 1012 bytes --]

/* { dg-do compile } */
/* { dg-options "-O2 -fdump-rtl-loop2_unroll -fdump-tree-cunrolli-details" } */

#include <string>
#include <vector>

void bar(int);
int j;

void test1() {
  unsigned long m = j;
  const std::string s("abcdefgh");
  const std::vector<int> v = {7, 6, 5, 4, 3, 2, 1, 0};
  const std::size_t v_sz = 8;

  // { dg-final { scan-tree-dump "18:\[0-9\]*: note: loop turned into non-loop" "cunrolli" } }
  #pragma GCC unroll sizeof("abcdefgh") * 4
  for (auto ch : s)
    bar(ch);

  // { dg-final { scan-tree-dump "23:\[0-9\]*: note: loop turned into non-loop" "cunrolli" } }
  #pragma GCC unroll v_sz
  for (auto i : v)
    bar(i);

  // { dg-final { scan-tree-dump "28:\[0-9\]*: note: loop turned into non-loop" "cunrolli" } }
  #pragma GCC unroll 99
  for (auto i : v)
    bar(i);
}
// { dg-final { scan-rtl-dump-not "Unable to prove that the loop iterates constant times" "loop2_unroll" } }
// { dg-final { cleanup-tree-dump "cunrolli" } }
// { dg-final { cleanup-rtl-dump "loop2_unroll" } }

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: #pragma GCC unroll support
  2015-03-05 23:13             ` #pragma GCC unroll support Mike Stump
  2015-03-06  1:32               ` Sandra Loosemore
@ 2015-03-07  1:16               ` Joseph Myers
  1 sibling, 0 replies; 25+ messages in thread
From: Joseph Myers @ 2015-03-07  1:16 UTC (permalink / raw)
  To: Mike Stump; +Cc: Jason Merrill, Richard Biener, gcc-patches Patches

On Thu, 5 Mar 2015, Mike Stump wrote:

> On Jan 30, 2015, at 8:27 AM, Mike Stump <mikestump@comcast.net> wrote:
> > On Jan 30, 2015, at 7:49 AM, Joseph Myers <joseph@codesourcery.com> wrote:
> >> Use error_at, and %u directly in the format.
> > 
> > Done.
> 
> Ping?

I don't see any sign of 
<https://gcc.gnu.org/ml/gcc-patches/2015-01/msg02735.html> having been 
addressed.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH, RFC] fortran [was Re: #pragma GCC unroll support]
  2015-02-03  0:08                     ` Mike Stump
@ 2015-05-28  9:56                       ` Bernhard Reutner-Fischer
  2015-05-28 12:29                         ` Mike Stump
  0 siblings, 1 reply; 25+ messages in thread
From: Bernhard Reutner-Fischer @ 2015-05-28  9:56 UTC (permalink / raw)
  To: Mike Stump; +Cc: GCC Patches, gfortran

On 3 February 2015 at 01:07, Mike Stump <mikestump@comcast.net> wrote:
> On Feb 2, 2015, at 3:22 PM, Bernhard Reutner-Fischer <rep.dot.nop@gmail.com> wrote:
>> Untested draft patch
>
> I looked it over, seems to slot in nicely.
>
>
> +       gfc_error ("%<GCC unroll%> directive does not commence a loop at %C”);
>
> So, don’t like commence here.

Does anybody have a better suggestion?

directive not at the start of a loop at %C
directive not followed by a loop at %C

dunno..

Mike, did you tweak the one or two things you got from the reviews
yet? ISTM your
main patch was not OKed yet nor installed.

thanks,

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH, RFC] fortran [was Re: #pragma GCC unroll support]
  2015-05-28  9:56                       ` Bernhard Reutner-Fischer
@ 2015-05-28 12:29                         ` Mike Stump
  2015-11-02 12:18                           ` Bernhard Reutner-Fischer
  0 siblings, 1 reply; 25+ messages in thread
From: Mike Stump @ 2015-05-28 12:29 UTC (permalink / raw)
  To: Bernhard Reutner-Fischer; +Cc: GCC Patches, gfortran

On May 28, 2015, at 2:02 AM, Bernhard Reutner-Fischer <rep.dot.nop@gmail.com> wrote:
> 
> Does anybody have a better suggestion?
> 
> directive not at the start of a loop at %C
> directive not followed by a loop at %C

I prefer either of these.  I have a slight preference for the first.

> Mike, did you tweak the one or two things you got from the reviews
> yet?

Nope.

> ISTM your main patch was not OKed yet nor installed.

Been busy with work.  I’ll come back and address the nits that people pointed out and see if I can ping it some more and try and get the C++ bits reviewed.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH, RFC] fortran [was Re: #pragma GCC unroll support]
  2015-05-28 12:29                         ` Mike Stump
@ 2015-11-02 12:18                           ` Bernhard Reutner-Fischer
  0 siblings, 0 replies; 25+ messages in thread
From: Bernhard Reutner-Fischer @ 2015-11-02 12:18 UTC (permalink / raw)
  To: Mike Stump; +Cc: GCC Patches, gfortran

On May 28, 2015 2:03:08 PM GMT+02:00, Mike Stump <mikestump@comcast.net> wrote:
>On May 28, 2015, at 2:02 AM, Bernhard Reutner-Fischer
><rep.dot.nop@gmail.com> wrote:
>> 
>> Does anybody have a better suggestion?
>> 
>> directive not at the start of a loop at %C
>> directive not followed by a loop at %C
>
>I prefer either of these.  I have a slight preference for the first.

I've changed the Fortran error to the first locally.

>
>> Mike, did you tweak the one or two things you got from the reviews
>> yet?
>
>Nope.
>
>> ISTM your main patch was not OKed yet nor installed.
>
>Been busy with work.  Iâ€™ll come back and address the nits that people
>pointed out and see if I can ping it some more and try and get the C++
>bits reviewed.

I take it you're well aware that stage 1 will end in a couple of weeks, and maybe you want to have this in GCC-6, so now would be the perfect time.. :)

Cheers,


^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2015-11-02 12:18 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-12-22 21:51 #pragma GCC unroll support Mike Stump
2014-12-23 20:22 ` Andi Kleen
2015-01-08 12:45 ` Richard Biener
2015-01-26 21:13   ` Mike Stump
2015-01-26 21:18     ` Mike Stump
2015-01-30  2:51       ` Joseph Myers
2015-01-30  6:12         ` Mike Stump
2015-01-30  2:54     ` Joseph Myers
2015-01-30  6:57       ` Mike Stump
2015-01-30 17:46         ` Joseph Myers
2015-01-30 17:52           ` Mike Stump
2015-01-30 18:06             ` Marek Polacek
2015-01-30 23:48               ` Bernhard Reutner-Fischer
2015-01-30 22:16                 ` Mike Stump
2015-02-02 23:22                   ` [PATCH, v0] fortran: !GCC$ unroll for DO Bernhard Reutner-Fischer
2015-02-03  8:42                     ` Tobias Burnus
2015-02-02 23:22                   ` [PATCH, RFC] fortran [was Re: #pragma GCC unroll support] Bernhard Reutner-Fischer
2015-02-03  0:08                     ` Mike Stump
2015-05-28  9:56                       ` Bernhard Reutner-Fischer
2015-05-28 12:29                         ` Mike Stump
2015-11-02 12:18                           ` Bernhard Reutner-Fischer
2015-03-05 23:13             ` #pragma GCC unroll support Mike Stump
2015-03-06  1:32               ` Sandra Loosemore
2015-03-06 12:37                 ` Bernhard Reutner-Fischer
2015-03-07  1:16               ` Joseph Myers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).