[PATCH,c] better recovery from missing semicolon after declaration

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH,c] better recovery from missing semicolon after declaration
@ 2010-11-17 15:46 Nathan Froyd
  2010-11-17 16:17 ` Nathan Froyd
  2010-11-19 18:32 ` Joseph S. Myers
  0 siblings, 2 replies; 6+ messages in thread
From: Nathan Froyd @ 2010-11-17 15:46 UTC (permalink / raw)
  To: gcc-patches

I was editing cp-tree.h and forgot a semicolon after a function
declaration, like so:

  extern void f (tree)
  extern void g (tree);

Compiling the above gave me several tens of pages of errors, none of
which had anything to do with the actual error.

This patch attempts to fix that by detecting when we've just parsed a
declaration and we're faced with what looks like the beginning of the
next declaration.  We cannot use c_parser_next_token_starts_declspecs,
as we need to cope with things like (from
c-torture/compile/20000403-1.c):

  int uname (name) struct utsname *name;

We do, however, attempt to detect when we're not parsing an old-style
declaration and rightly complain for:

  extern void g1 (int)
  struct s { int a; };

It would be better if we could give the location of the previous token,
but the C parser is not really set up to do that.

Tested on x86_64-unknown-linux-gnu.  OK to commit?

-Nathan

gcc/
	* c-parser.c (c_parser_next_token_cannot_follow_declaration):
	New function.
	(c_parser_declaration_or_fndef): Intelligently handle invalid
	tokens after a declaration.

gcc/testsuite/
	* gcc.dg/semicolon-errors.c: New test.

diff --git a/gcc/c-parser.c b/gcc/c-parser.c
index 577528d..92d0efc 100644
--- a/gcc/c-parser.c
+++ b/gcc/c-parser.c
@@ -592,6 +592,64 @@ c_token_starts_declaration (c_token *token)
 
 static c_token *c_parser_peek_2nd_token (c_parser *parser);
 
+/* Return true if the next token from PARSER cannot follow a
+   declaration, false otherwise.  */
+static inline bool
+c_parser_next_token_cannot_follow_declaration (c_parser *parser,
+					       struct c_declarator *declarator)
+{
+  c_token *token = c_parser_peek_token (parser);
+
+  switch (token->type)
+    {
+    case CPP_KEYWORD:
+      switch (token->keyword)
+	{
+	case RID_EXTERN:
+	case RID_STATIC:
+	case RID_TYPEDEF:
+	case RID_INLINE:
+	case RID_AUTO:
+	case RID_THREAD:
+	  /* Can never follow.  */
+	  return true;
+	case RID_STRUCT:
+	case RID_UNION:
+	case RID_UNSIGNED:
+	case RID_LONG:
+	case RID_INT128:
+	case RID_SHORT:
+	case RID_SIGNED:
+	case RID_COMPLEX:
+	case RID_INT:
+	case RID_CHAR:
+	case RID_FLOAT:
+	case RID_DOUBLE:
+	case RID_VOID:
+	case RID_DFLOAT32:
+	case RID_DFLOAT64:
+	case RID_DFLOAT128:
+	case RID_BOOL:
+	case RID_ENUM:
+	  /* We need to be careful, since K&R argument lists can begin
+	     with these tokens.  Consult the types of the argument info;
+	     if we actually have a type, then these tokens are invalid.
+	     Don't bother grovelling through the whole list.  */
+	  if (declarator->kind == cdk_pointer)
+	    declarator = declarator->declarator;
+	  if (declarator->kind == cdk_function)
+	    return TYPE_P (TREE_VALUE (declarator->u.arg_info->types));
+	  else
+	    return true;
+	default:
+	  return false;
+	}
+      break;
+    default:
+      return false;
+    }
+}
+
 /* Return true if the next token from PARSER can start declaration
    specifiers, false otherwise.  */
 static inline bool
@@ -1601,6 +1659,13 @@ c_parser_declaration_or_fndef (c_parser *parser, bool fndef_ok,
 	      return;
 	    }
 	}
+      else if (c_parser_next_token_cannot_follow_declaration (parser,
+							      declarator))
+	{
+	  start_decl (declarator, specs, false, all_prefix_attrs);
+	  c_parser_error (parser, "expected %<;%>");
+	  return;
+	}
       else if (!fndef_ok)
 	{
 	  c_parser_error (parser, "expected %<=%>, %<,%>, %<;%>, "
diff --git a/gcc/testsuite/gcc.dg/semicolon-errors.c b/gcc/testsuite/gcc.dg/semicolon-errors.c
new file mode 100644
index 0000000..e579313
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/semicolon-errors.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-std=gnu89" } // suppress default -pedantic-errors */
+
+extern void f1 (int)
+extern void f2 (int);		/* { dg-error "expected" } */
+
+extern int x
+extern int z;			/* { dg-error "expected" } */
+
+extern void g1 (int)
+struct s { int a; };		/* { dg-error "expected" } */
+
+typedef unsigned int uint32
+extern int i;			/* { dg-error "expected" } */

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH,c] better recovery from missing semicolon after declaration
  2010-11-17 15:46 [PATCH,c] better recovery from missing semicolon after declaration Nathan Froyd
@ 2010-11-17 16:17 ` Nathan Froyd
  2010-11-17 16:39   ` Paolo Bonzini
  2010-11-19 18:32 ` Joseph S. Myers
  1 sibling, 1 reply; 6+ messages in thread
From: Nathan Froyd @ 2010-11-17 16:17 UTC (permalink / raw)
  To: gcc-patches

On Wed, Nov 17, 2010 at 10:29:29AM -0500, Nathan Froyd wrote:
> This patch attempts to fix that by detecting when we've just parsed a
> declaration and we're faced with what looks like the beginning of the
> next declaration.

I forgot that one testcase needed some adjustments.  The patch below
deletes some dg-error messages since for the opening line of the
testcase:

typedef BYTE unsigned char;	/* { dg-error "expected" } */

we now treat BYTE as a type.  I think this is an improvement, albeit an
unexpected one.

-Nathan

diff --git a/gcc/testsuite/gcc.dg/noncompile/920923-1.c b/gcc/testsuite/gcc.dg/noncompile/920923-1.c
index f586a7c..a524931 100644
--- a/gcc/testsuite/gcc.dg/noncompile/920923-1.c
+++ b/gcc/testsuite/gcc.dg/noncompile/920923-1.c
@@ -6,9 +6,9 @@ struct PENT { caddr_t v_addr; };/* { dg-error "expected" } */
 typedef struct PENT prec;
 typedef struct PENT *prec_t;
 prec_t mem_hash;
-BYTE *mem_base;			/* { dg-error "unknown type name" } */
+BYTE *mem_base;
 struct PTE {
-     BYTE *p_page;		/* { dg-error "expected" } */
+     BYTE *p_page;
      perm_set p_perms;
 };
 typedef struct PTE pte;
@@ -27,7 +27,7 @@ void
 mmu_walk_find(va)
 caddr_t va;			/* { dg-error "unknown type name" } */
 {
-     BYTE *page_addr; /* { dg-error "unknown type name" } */
+     BYTE *page_addr;
      if (mmu_base[Level1(va)]->valid==0x0) { /* { dg-error "undeclared" } */
 	  l1_base = mmu_base[Level1(va)]->(u.p_tablep) = p_alloc(); /* { dg-error "expected|undeclared" } */
 	  mmu_base[Level1(va)]->valid = 0x3;
@@ -102,8 +102,8 @@ extern void *calloc(__SIZE_TYPE__, __SIZE_TYPE__);
 void
 init_mem()
 {
-     mem_base = (BYTE *) calloc(1024, (1<<13)); /* { dg-error "undeclared|expected" } */
-     ((void)((mem_base != (BYTE *)0)	/* { dg-error "expected" } */
+     mem_base = (BYTE *) calloc(1024, (1<<13));
+     ((void)((mem_base != (BYTE *)0)
 	     ? 0
 	     : (__eprintf("Failed assertion`%s'at line%d of`%s'.\n",
 			  "mem_base != (BYTE *)0", 366, "b.c"),

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH,c] better recovery from missing semicolon after declaration
  2010-11-17 16:17 ` Nathan Froyd
@ 2010-11-17 16:39   ` Paolo Bonzini
  2010-11-18 19:33     ` Nathan Froyd
  0 siblings, 1 reply; 6+ messages in thread
From: Paolo Bonzini @ 2010-11-17 16:39 UTC (permalink / raw)
  To: Nathan Froyd; +Cc: gcc-patches

On 11/17/2010 04:33 PM, Nathan Froyd wrote:
> On Wed, Nov 17, 2010 at 10:29:29AM -0500, Nathan Froyd wrote:
>> This patch attempts to fix that by detecting when we've just parsed a
>> declaration and we're faced with what looks like the beginning of the
>> next declaration.
>
> I forgot that one testcase needed some adjustments.  The patch below
> deletes some dg-error messages since for the opening line of the
> testcase:
>
> typedef BYTE unsigned char;	/* { dg-error "expected" } */
>
> we now treat BYTE as a type.  I think this is an improvement, albeit an
> unexpected one.

Ah, so we parse it as

    typedef /* implied int */ BYTE;
    unsigned char;

though without the extra warnings the above has.  This may be an 
improvement (better parsing recovery is good, but better semantic 
recovery is too!), but I'm not sure about the relatively convoluted way 
in which the parser gets there.

In particular, I'm worried that for example in something like this:

   typedef uintt16_t pid_t;

we would not be able to see the declaration of pid_t as a type.  In 
other words, I'm worried that it becomes harder to improve this tescase:

   typedef unsigned short uint16_t;

   /* should warn about unknown type name "uintt16_t", currently gives
      a parse error ("expected ... before pid_t") because unknown type
      names are not guessed in c_parser_declspecs.  */
   typedef uintt16_t pid_t;

   /* no error expected about unknown type name; currently fails */
   pid_t xyz;

Given my recent experience with adding unknown type name errors, it 
should not be hard to fix the above (just lack of time on my part), for 
example by moving this into c_parser_next_token_starts_typename:

   /* Try a bit harder to detect an unknown typename.  */
   if (token->type == CPP_NAME
       && token->id_kind == C_ID_ID
       && (c_parser_peek_2nd_token (parser)->type == CPP_NAME
           || c_parser_peek_2nd_token (parser)->type == CPP_MULT)
       && !lookup_name (token->value)

       /* Do not try too hard when we could have "object in array".  */
       && !parser->objc_could_be_foreach_context)
     return true;

Maybe you can give this a shot before applying these patches?

Paolo

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH,c] better recovery from missing semicolon after declaration
  2010-11-17 16:39   ` Paolo Bonzini
@ 2010-11-18 19:33     ` Nathan Froyd
  2010-11-18 19:47       ` Paolo Bonzini
  0 siblings, 1 reply; 6+ messages in thread
From: Nathan Froyd @ 2010-11-18 19:33 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: gcc-patches

On Wed, Nov 17, 2010 at 05:17:20PM +0100, Paolo Bonzini wrote:
> On 11/17/2010 04:33 PM, Nathan Froyd wrote:
> >I forgot that one testcase needed some adjustments.  The patch below
> >deletes some dg-error messages since for the opening line of the
> >testcase:
> >
> >typedef BYTE unsigned char;	/* { dg-error "expected" } */
> >
> >we now treat BYTE as a type.  I think this is an improvement, albeit an
> >unexpected one.
> 
> In other words, I'm worried that it becomes harder to improve this
> tescase:
> 
>   typedef unsigned short uint16_t;
> 
>   /* should warn about unknown type name "uintt16_t", currently gives
>      a parse error ("expected ... before pid_t") because unknown type
>      names are not guessed in c_parser_declspecs.  */
>   typedef uintt16_t pid_t;
> 
>   /* no error expected about unknown type name; currently fails */
>   pid_t xyz;
> 
> Given my recent experience with adding unknown type name errors, it
> should not be hard to fix the above (just lack of time on my part),
> for example by moving this into c_parser_next_token_starts_typename:
> 
>   /* Try a bit harder to detect an unknown typename.  */
>   if (token->type == CPP_NAME
>       && token->id_kind == C_ID_ID
>       && (c_parser_peek_2nd_token (parser)->type == CPP_NAME
>           || c_parser_peek_2nd_token (parser)->type == CPP_MULT)
>       && !lookup_name (token->value)
> 
>       /* Do not try too hard when we could have "object in array".  */
>       && !parser->objc_could_be_foreach_context)
>     return true;
> 
> Maybe you can give this a shot before applying these patches?

I tried the example above with and without my patch and with and without
modifying c_parser_next_token_starts_typename like so:

static inline bool
c_parser_next_token_starts_typename (c_parser *parser)
{
  c_token *token = c_parser_peek_token (parser);

  /* Try a bit harder to detect an unknown typename.  */
  if (token->type == CPP_NAME
      && token->id_kind == C_ID_ID
      && (c_parser_peek_2nd_token (parser)->type == CPP_NAME
          || c_parser_peek_2nd_token (parser)->type == CPP_MULT)
      && !lookup_name (token->value)
      /* Do not try too hard when we could have "object in array".  */
      && !parser->objc_could_be_foreach_context)
    return true;

  return c_token_starts_typename (token);
}

and with:

static inline bool
c_parser_next_token_starts_typename (c_parser *parser)
{
  c_token *token = c_parser_peek_token (parser);

  if (c_token_starts_typename (token))
    return true;

  /* Try a bit harder to detect an unknown typename.  */
  if (token->type == CPP_NAME
      && token->id_kind == C_ID_ID
      && (c_parser_peek_2nd_token (parser)->type == CPP_NAME
          || c_parser_peek_2nd_token (parser)->type == CPP_MULT)
      && !lookup_name (token->value)
      /* Do not try too hard when we could have "object in array".  */
      && !parser->objc_could_be_foreach_context)
    return true;

  return false;
}

since I wasn't sure which version you meant.  (I think the latter, for
agreement with c_parser_next_tokens_start_declaration.)  The error
message in all cases is:

/home/froydnj/src/paolo.c:7:21: error: expected '=', ',', ';', 'asm' or '__attribute__' before 'pid_t'
/home/froydnj/src/paolo.c:11:3: error: unknown type name 'pid_t'

so my patch doesn't seem to have much effect on this testcase.  I think
the saving grace on your testcase is that in:

  typedef uintt16_t pid_t;

`pid_t' is not a keyword, so we don't stop parsing early, but I haven't
traced through the parser to verify that.

-Nathan

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH,c] better recovery from missing semicolon after declaration
  2010-11-18 19:33     ` Nathan Froyd
@ 2010-11-18 19:47       ` Paolo Bonzini
  0 siblings, 0 replies; 6+ messages in thread
From: Paolo Bonzini @ 2010-11-18 19:47 UTC (permalink / raw)
  To: Nathan Froyd; +Cc: gcc-patches

On 11/18/2010 08:04 PM, Nathan Froyd wrote:
> I think
> the saving grace on your testcase is that in:
>
>    typedef uintt16_t pid_t;
>
> `pid_t' is not a keyword, so we don't stop parsing early, but I haven't
> traced through the parser to verify that.

Yes, that's possible.  In that case, I have no objection to the patch; 
the testcase is quite weird indeed in having a keyword after the id.

For what is worth, my WIP patch has no effect on your testcase:

$ ./cc1
typedef BYTE unsigned char;
BYTE x;
<stdin>:1:14: error: expected Â‘=Â’, Â‘,Â’, Â‘;Â’, Â‘asmÂ’ or Â‘__attribute__Â’ 
before Â‘unsignedÂ’
<stdin>:2:1: error: unknown type name Â‘BYTEÂ’

but fixes the other:

$ ./cc1
typedef uintt16_t pid_t;
pid_t x;
<stdin>:1:1: error: unknown type name Â‘uintt16_tÂ’

Thanks,

Paolo

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH,c] better recovery from missing semicolon after declaration
  2010-11-17 15:46 [PATCH,c] better recovery from missing semicolon after declaration Nathan Froyd
  2010-11-17 16:17 ` Nathan Froyd
@ 2010-11-19 18:32 ` Joseph S. Myers
  1 sibling, 0 replies; 6+ messages in thread
From: Joseph S. Myers @ 2010-11-19 18:32 UTC (permalink / raw)
  To: Nathan Froyd; +Cc: gcc-patches

On Wed, 17 Nov 2010, Nathan Froyd wrote:

> +/* Return true if the next token from PARSER cannot follow a
> +   declaration, false otherwise.  */
> +static inline bool
> +c_parser_next_token_cannot_follow_declaration (c_parser *parser,
> +					       struct c_declarator *declarator)

The point of this function seems not to be that the token cannot follow a 
declaration - there are a great many other tokens that cannot follow a 
declaration - but that it cannot follow *and* looks like the start of the 
next declaration.

> +	case RID_STRUCT:
> +	case RID_UNION:
> +	case RID_UNSIGNED:
> +	case RID_LONG:
> +	case RID_INT128:
> +	case RID_SHORT:
> +	case RID_SIGNED:
> +	case RID_COMPLEX:
> +	case RID_INT:
> +	case RID_CHAR:
> +	case RID_FLOAT:
> +	case RID_DOUBLE:
> +	case RID_VOID:
> +	case RID_DFLOAT32:
> +	case RID_DFLOAT64:
> +	case RID_DFLOAT128:
> +	case RID_BOOL:
> +	case RID_ENUM:

What about RID_CONST, RID_VOLATILE, RID_RESTRICT, RID_FRACT, RID_ACCUM, 
RID_SAT, RID_TYPEOF, all of which appear to be in a similar situation?  
Where did this list come from?

> +	  /* We need to be careful, since K&R argument lists can begin
> +	     with these tokens.  Consult the types of the argument info;
> +	     if we actually have a type, then these tokens are invalid.
> +	     Don't bother grovelling through the whole list.  */
> +	  if (declarator->kind == cdk_pointer)
> +	    declarator = declarator->declarator;
> +	  if (declarator->kind == cdk_function)
> +	    return TYPE_P (TREE_VALUE (declarator->u.arg_info->types));
> +	  else
> +	    return true;

This is not correct.  You need to recurse all the way to see if the 
innermost declarator that isn't cdk_id or cdk_attrs is cdk_function, and 
look at that declarator if so, to find the type of the function being 
declared.  This patch breaks testcases such as:

int **f(a) int a; { return 0; }

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2010-11-19 17:54 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-11-17 15:46 [PATCH,c] better recovery from missing semicolon after declaration Nathan Froyd
2010-11-17 16:17 ` Nathan Froyd
2010-11-17 16:39   ` Paolo Bonzini
2010-11-18 19:33     ` Nathan Froyd
2010-11-18 19:47       ` Paolo Bonzini
2010-11-19 18:32 ` Joseph S. Myers

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).