From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 27079 invoked by alias); 2 Apr 2010 01:50:57 -0000 Mailing-List: contact archer-help@sourceware.org; run by ezmlm Sender: Precedence: bulk List-Post: List-Help: List-Subscribe: List-Id: Received: (qmail 27047 invoked by uid 22791); 2 Apr 2010 01:50:44 -0000 X-SWARE-Spam-Status: No, hits=-5.0 required=5.0 tests=BAYES_20,RCVD_IN_DNSWL_HI,SPF_HELO_PASS,T_RP_MATCHES_RCVD X-Spam-Check-By: sourceware.org Message-ID: <4BB54D69.1000009@redhat.com> Date: Fri, 02 Apr 2010 01:50:00 -0000 From: Chris Moller User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1b3pre) Gecko/20090513 Fedora/3.0-2.3.beta2.fc11 Thunderbird/3.0b2 MIME-Version: 1.0 To: Sergio Durigan Junior CC: Project Archer Subject: Re: Parser rewritting References: <201003301546.34866.sergiodj@redhat.com> In-Reply-To: <201003301546.34866.sergiodj@redhat.com> Content-Type: multipart/mixed; boundary="------------000606090404090209090407" X-SW-Source: 2010-q2/txt/msg00000.txt.bz2 This is a multi-part message in MIME format. --------------000606090404090209090407 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Content-length: 1476 On 03/30/10 14:46, Sergio Durigan Junior wrote: > Hello! > > As you may have noticed, in the last Archer meeting I brought a topic into > discussion: the rewritting of the GDB's parser. The current parser is written > using Bison, and unfortunately it is insufficient to satisfy our current > needs, especially for C++ productions. > > With that in mind, Tom asked me to start this discussion in the mailing-list > to see what you think about it. We decided to send an e-mail to the archer > list at first; this topic will eventually be discussed at the gdb list as > well. > A lot of years ago I wrote a fairly elaborate parser using antlr--definitely a cool tool and I recommend you consider it. It's a predicated LL(*) parser generator--the "predicated" bit making it possible, among other things, to handle the context-dependent bits of C/C++ grammar. Just as an example, I've attached a rudimentary antlr grammar that parses a subset of C/C++ decls--if you look, you'll see that the rules look a lot like the specifications in the C++, and in fact started out as a cut'n'paste of those specs. Also, if you look in the grammar for "is_cpp," you can see how rule predicates can be used to have the parser do different things depending on circumstances. Anyway, it's probably worth considering. (In addition to the .g file attached, I wrote a couple other .c and .h files that make it all work. I'll make them available if anyone wants him.) Chris --------------000606090404090209090407 Content-Type: text/plain; name="CPPparser.g" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="CPPparser.g" Content-length: 8503 grammar CPPparser; options { language = C; backtrack = true; } @header { #include "pd.h" } decl_specifier @init { bzero(&data_type, sizeof(data_type)); data_type.data_type = DATA_TYPE_TYPE_DESC; } @after { print_data (&data_type); } : storage_class_specifier* WS? type_specifier? function_specifier? FRIEND? TYPEDEF? CONSTEXPR? /* | alignment_specifier */ ; storage_class_specifier : REGISTER { if (type_desc.storage_class == STORAGE_CLASS_NONE) type_desc.storage_class = STORAGE_CLASS_REGISTER; else fprintf (stderr, "Storage class already set.\n"); } | STATIC { if (type_desc.storage_class == STORAGE_CLASS_NONE) type_desc.storage_class = STORAGE_CLASS_STATIC; else fprintf (stderr, "Storage class already set.\n"); } | THREAD_LOCAL | EXTERN { if (type_desc.storage_class == STORAGE_CLASS_NONE) type_desc.storage_class = STORAGE_CLASS_EXTERN; else fprintf (stderr, "Storage class already set.\n"); } | MUTABLE ; type_specifier @init { type_desc.type = TYPE_CODE_UNSET; type_desc.size = -1; type_desc.nr_longs = 0; type_desc.nosign_bit = 1; type_desc.signed_bit = 0; } : ( ({!is_cpp}? simple_type_specifier | {is_cpp}? simple_type_specifier_cpp) WS?)+ | class_specifier /* | enum_specifier */ /* | elaborated_type_specifier */ /* | typename_specifier */ | cv_qualifier* ; simple_type_specifier : /* nested_name_specifier? type_name */ /* | nested_name_specifier TEMPLATE type_name */ CHAR { type_desc.type = TYPE_CODE_INT; type_desc.size = sizeof(char); } | WCHAR_T { type_desc.type = TYPE_CODE_INT; type_desc.size = sizeof(wchar_t); } /* | BOOL { type_desc.type = TYPE_CODE_INT; type_desc.size = sizeof(bool); } */ | SHORT { type_desc.type = TYPE_CODE_INT; type_desc.size = sizeof(short); } | INT { type_desc.type = TYPE_CODE_INT; switch (type_desc.nr_longs) { case 0: type_desc.size = sizeof(int); break; case 1: type_desc.size = sizeof(long int); break; case 2: type_desc.size = sizeof(long long int); break; } } | LONG { if (type_desc.type == TYPE_CODE_UNSET) type_desc.type = TYPE_CODE_INT; if (type_desc.nr_longs < 2) type_desc.nr_longs++; switch (type_desc.nr_longs) { case 0: type_desc.size = sizeof(int); break; case 1: type_desc.size = sizeof(long int); break; case 2: type_desc.size = sizeof(long long int); break; } } | SIGNED { type_desc.nosign_bit = 0; type_desc.signed_bit = 1; } | UNSIGNED { type_desc.nosign_bit = 0; type_desc.signed_bit = 0; } | FLOAT { type_desc.type = TYPE_CODE_FLT; type_desc.size = sizeof(float); } | DOUBLE { type_desc.type = TYPE_CODE_FLT; type_desc.size = (type_desc.nr_longs > 0) ? sizeof(long double) : sizeof(double); } | VOID { type_desc.type = TYPE_CODE_VOID; } | AUTO { } /* | decltype ( expression) */ ; simple_type_specifier_cpp : /* nested_name_specifier? type_name */ /* | nested_name_specifier TEMPLATE type_name */ Char { type_desc.type = TYPE_CODE_INT; type_desc.size = sizeof(char); } | Wchar_t { type_desc.type = TYPE_CODE_INT; type_desc.size = sizeof(wchar_t); } /* | Bool { type_desc.type = TYPE_CODE_INT; type_desc.size = sizeof(bool); } */ | Short { type_desc.type = TYPE_CODE_INT; type_desc.size = sizeof(short); } | Int { type_desc.type = TYPE_CODE_INT; switch (type_desc.nr_longs) { case 0: type_desc.size = sizeof(int); break; case 1: type_desc.size = sizeof(long int); break; case 2: type_desc.size = sizeof(long long int); break; } } | Long { if (type_desc.type == TYPE_CODE_UNSET) type_desc.type = TYPE_CODE_INT; if (type_desc.nr_longs < 2) type_desc.nr_longs++; switch (type_desc.nr_longs) { case 0: type_desc.size = sizeof(int); break; case 1: type_desc.size = sizeof(long int); break; case 2: type_desc.size = sizeof(long long int); break; } } | Signed { type_desc.nosign_bit = 0; type_desc.signed_bit = 1; } | Unsigned { type_desc.nosign_bit = 0; type_desc.signed_bit = 0; } | Float { type_desc.type = TYPE_CODE_FLT; type_desc.size = sizeof(float); } | Double { type_desc.type = TYPE_CODE_FLT; type_desc.size = (type_desc.nr_longs > 0) ? sizeof(long double) : sizeof(double); } | Void { type_desc.type = TYPE_CODE_VOID; } | Auto { } /* | decltype ( expression) */ ; class_specifier : class_head '{' member_specification* '}' ; class_head : class_key identifier? /* | nested_name_specifier identifier base_clause? */ /* | nested_name_specifier? simple_template_id base_clause? */ ; member_specification: type_specifier identifier initialiser? ';' | scope_specifier ':' ; class_key : CLASS | STRUCT | UNION ; scope_specifier : PRIVATE | PUBLIC | PROTECTED ; initialiser : '=' numeric /* | string */ /* | array */ ; numeric : FIXED | FLOATING | EXPO ; identifier : ALPHAI ALPHAC ; /* type_name: class_name enum_name typedef_name ; */ cv_qualifier : CONST | VOLATILE ; function_specifier : INLINE | VIRTUAL | EXPLICIT ; /* Literals for decl_specifier. */ FRIEND : 'friend' ; TYPEDEF : 'typedef' ; CONSTEXPR : 'constexpr' ; /* Literals for storage_specifier. */ REGISTER : 'register' ; STATIC : 'static' ; THREAD_LOCAL : 'thread_local' ; EXTERN : 'extern' ; MUTABLE : 'mutable' ; /* Literals for function_specifier. */ INLINE : 'inline' ; VIRTUAL : 'virtual' ; EXPLICIT : 'explicit' ; /* Literals for simple_type_specifier. */ CHAR : 'char' ; WCHAR_T : 'wchar_t' ; BOOL : 'bool' ; SHORT : 'short' ; INT : 'int' ; LONG : 'long' ; SIGNED : 'signed' ; UNSIGNED : 'unsigned' ; FLOAT : 'float' ; DOUBLE : 'double' ; VOID : 'void' ; AUTO : 'auto' ; /* Literals for simple_type_specifier_cpp. */ Char : 'Char' ; Wchar_t : 'Wchar_t' ; Bool : 'Bool' ; Short : 'Short' ; Int : 'Int' ; Long : 'Long' ; Signed : 'Signed' ; Unsigned : 'Unsigned' ; Float : 'Float' ; Double : 'Double' ; Void : 'Void' ; Auto : 'Auto' ; /* Literals for cv_qualifier. */ CONST : 'const' ; VOLATILE : 'volatile' ; /* Literals for class_key. */ CLASS : 'class' ; STRUCT : 'struct' ; UNION : 'union' ; /* Literals for scope_specifier. */ PRIVATE : 'private' ; PUBLIC : 'public' ; PROTECTED : 'protected' ; SIGN : ('+' | '-') ; INTEGER : ('0'..'9') ; FIXED : INTEGER+ ; FLOATING : INTEGER '.' INTEGER* ; EXPO : INTEGER ('.' INTEGER*)? ('e' | 'E') SIGN? INTEGER ; ALPHAI : ('a'..'z' | 'A'..'Z' | '_') ; ALPHAC : (ALPHAI | INTEGER)* ; NEWLINE : '\r' ? '\n' ; WS : (' ' |'\t' |'\n' |'\r' )* /*{ SKIP(); }*/ ; --------------000606090404090209090407--