From: Siddhesh Poyarekar <siddhesh@gotplt.org>
To: Florian Weimer <fweimer@redhat.com>, libc-alpha@sourceware.org
Subject: Re: [PATCH v2 2/3] scripts: Enhance glibcpp to do basic macro processing
Date: Wed, 21 Sep 2022 13:08:38 -0400 [thread overview]
Message-ID: <49eb2336-9fdc-cbd2-bb1a-f6e6edd78744@gotplt.org> (raw)
In-Reply-To: <52eced32a56ad0a727b74058985aca32523c68e3.1663167514.git.fweimer@redhat.com>
On 2022-09-14 11:00, Florian Weimer via Libc-alpha wrote:
> ---
> scripts/glibcpp.py | 317 +++++++++++++++++++++++++++++++++++++++++
> support/Makefile | 10 +-
> support/tst-glibcpp.py | 217 ++++++++++++++++++++++++++++
> 3 files changed, 542 insertions(+), 2 deletions(-)
> create mode 100644 support/tst-glibcpp.py
LGTM.
Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
>
> diff --git a/scripts/glibcpp.py b/scripts/glibcpp.py
> index b44c6a4392..455459a609 100644
> --- a/scripts/glibcpp.py
> +++ b/scripts/glibcpp.py
> @@ -33,7 +33,9 @@ Accepts non-ASCII characters only within comments and strings.
> """
>
> import collections
> +import operator
> import re
> +import sys
>
> # Caution: The order of the outermost alternation matters.
> # STRING must be before BAD_STRING, CHARCONST before BAD_CHARCONST,
> @@ -210,3 +212,318 @@ def tokenize_c(file_contents, reporter):
>
> yield tok
> pos = mo.end()
> +
> +class MacroDefinition(collections.namedtuple('MacroDefinition',
> + 'name_token args body error')):
> + """A preprocessor macro definition.
> +
> + name_token is the Token_ for the name.
> +
> + args is None for a macro that is not function-like. Otherwise, it
> + is a tuple that contains the macro argument name tokens.
> +
> + body is a tuple that contains the tokens that constitue the body
> + of the macro definition (excluding whitespace).
> +
> + error is None if no error was detected, or otherwise a problem
> + description associated with this macro definition.
> +
> + """
> +
> + @property
> + def function(self):
> + """Return true if the macro is function-like."""
> + return self.args is not None
> +
> + @property
> + def name(self):
> + """Return the name of the macro being defined."""
> + return self.name_token.text
> +
> + @property
> + def line(self):
> + """Return the line number of the macro defintion."""
> + return self.name_token.line
> +
> + @property
> + def args_lowered(self):
> + """Return the macro argument list as a list of strings"""
> + if self.function:
> + return [token.text for token in self.args]
> + else:
> + return None
> +
> + @property
> + def body_lowered(self):
> + """Return the macro body as a list of strings."""
> + return [token.text for token in self.body]
> +
> +def macro_definitions(tokens):
> + """A generator for C macro definitions among tokens.
> +
> + The generator yields MacroDefinition objects.
> +
> + tokens must be iterable, yielding Token_ objects.
> +
> + """
> +
> + macro_name = None
> + macro_start = False # Set to false after macro name and one otken.
> + macro_args = None # Set to a list during the macro argument sequence.
> + in_macro_args = False # True while processing macro identifier-list.
> + error = None
> + body = []
> +
> + for token in tokens:
> + if token.context == 'define' and macro_name is None \
> + and token.kind == 'IDENT':
> + # Starting up macro processing.
> + if macro_start:
> + # First identifier is the macro name.
> + macro_name = token
> + else:
> + # Next token is the name.
> + macro_start = True
> + continue
> +
> + if macro_name is None:
> + # Drop tokens not in macro definitions.
> + continue
> +
> + if token.context != 'define':
> + # End of the macro definition.
> + if in_macro_args and error is None:
> + error = 'macro definition ends in macro argument list'
> + yield MacroDefinition(macro_name, macro_args, tuple(body), error)
> + # No longer in a macro definition.
> + macro_name = None
> + macro_start = False
> + macro_args = None
> + in_macro_args = False
> + error = None
> + body.clear()
> + continue
> +
> + if macro_start:
> + # First token after the macro name.
> + macro_start = False
> + if token.kind == 'PUNCTUATOR' and token.text == '(':
> + macro_args = []
> + in_macro_args = True
> + continue
> +
> + if in_macro_args:
> + if token.kind == 'IDENT' \
> + or (token.kind == 'PUNCTUATOR' and token.text == '...'):
> + # Macro argument or ... placeholder.
> + macro_args.append(token)
> + if token.kind == 'PUNCTUATOR':
> + if token.text == ')':
> + macro_args = tuple(macro_args)
> + in_macro_args = False
> + elif token.text == ',':
> + pass # Skip. Not a full syntax check.
> + elif error is None:
> + error = 'invalid punctuator in macro argument list: ' \
> + + repr(token.text)
> + elif error is None:
> + error = 'invalid {} token in macro argument list'.format(
> + token.kind)
> + continue
> +
> + if token.kind not in ('WHITESPACE', 'BLOCK_COMMENT'):
> + body.append(token)
> +
> + # Emit the macro in case the last line does not end with a newline.
> + if macro_name is not None:
> + if in_macro_args and error is None:
> + error = 'macro definition ends in macro argument list'
> + yield MacroDefinition(macro_name, macro_args, tuple(body), error)
> +
> +# Used to split UL etc. suffixes from numbers such as 123UL.
> +RE_SPLIT_INTEGER_SUFFIX = re.compile(r'([^ullULL]+)([ullULL]*)')
> +
> +BINARY_OPERATORS = {
> + '+': operator.add,
> + '<<': operator.lshift,
> +}
> +
> +# Use the general-purpose dict type if it is order-preserving.
> +if (sys.version_info[0], sys.version_info[1]) <= (3, 6):
> + OrderedDict = collections.OrderedDict
> +else:
> + OrderedDict = dict
> +
> +def macro_eval(macro_defs, reporter):
> + """Compute macro values
> +
> + macro_defs is the output from macro_definitions. reporter is an
> + object that accepts reporter.error(line_number, message) and
> + reporter.note(line_number, message) calls to report errors
> + and error context invocations.
> +
> + The returned dict contains the values of macros which are not
> + function-like, pairing their names with their computed values.
> +
> + The current implementation is incomplete. It is deliberately not
> + entirely faithful to C, even in the implemented parts. It checks
> + that macro replacements follow certain syntactic rules even if
> + they are never evaluated.
> +
> + """
> +
> + # Unevaluated macro definitions by name.
> + definitions = OrderedDict()
> + for md in macro_defs:
> + if md.name in definitions:
> + reporter.error(md.line, 'macro {} redefined'.format(md.name))
> + reporter.note(definitions[md.name].line,
> + 'location of previous definition')
> + else:
> + definitions[md.name] = md
> +
> + # String to value mappings for fully evaluated macros.
> + evaluated = OrderedDict()
> +
> + # String to macro definitions during evaluation. Nice error
> + # reporting relies on determinstic iteration order.
> + stack = OrderedDict()
> +
> + def eval_token(current, token):
> + """Evaluate one macro token.
> +
> + Integers and strings are returned as such (the latter still
> + quoted). Identifiers are expanded.
> +
> + None indicates an empty expansion or an error.
> +
> + """
> +
> + if token.kind == 'PP_NUMBER':
> + value = None
> + m = RE_SPLIT_INTEGER_SUFFIX.match(token.text)
> + if m:
> + try:
> + value = int(m.group(1), 0)
> + except ValueError:
> + pass
> + if value is None:
> + reporter.error(token.line,
> + 'invalid number {!r} in definition of {}'.format(
> + token.text, current.name))
> + return value
> +
> + if token.kind == 'STRING':
> + return token.text
> +
> + if token.kind == 'CHARCONST' and len(token.text) == 3:
> + return ord(token.text[1])
> +
> + if token.kind == 'IDENT':
> + name = token.text
> + result = eval1(current, name)
> + if name not in evaluated:
> + evaluated[name] = result
> + return result
> +
> + reporter.error(token.line,
> + 'unrecognized {!r} in definition of {}'.format(
> + token.text, current.name))
> + return None
> +
> +
> + def eval1(current, name):
> + """Evaluate one name.
> +
> + The name is looked up and the macro definition evaluated
> + recursively if necessary. The current argument is the macro
> + definition being evaluated.
> +
> + None as a return value indicates an error.
> +
> + """
> +
> + # Fast path if the value has already been evaluated.
> + if name in evaluated:
> + return evaluated[name]
> +
> + try:
> + md = definitions[name]
> + except KeyError:
> + reporter.error(current.line,
> + 'reference to undefined identifier {} in definition of {}'
> + .format(name, current.name))
> + return None
> +
> + if md.name in stack:
> + # Recursive macro definition.
> + md = stack[name]
> + reporter.error(md.line,
> + 'macro definition {} refers to itself'.format(md.name))
> + for md1 in reversed(list(stack.values())):
> + if md1 is md:
> + break
> + reporter.note(md1.line,
> + 'evaluated from {}'.format(md1.name))
> + return None
> +
> + stack[md.name] = md
> + if md.function:
> + reporter.error(current.line,
> + 'attempt to evaluate function-like macro {}'.format(name))
> + reporter.note(md.line, 'definition of {}'.format(md.name))
> + return None
> +
> + try:
> + body = md.body
> + if len(body) == 0:
> + # Empty expansion.
> + return None
> +
> + # Remove surrounding ().
> + if body[0].text == '(' and body[-1].text == ')':
> + body = body[1:-1]
> + had_parens = True
> + else:
> + had_parens = False
> +
> + if len(body) == 1:
> + return eval_token(md, body[0])
> +
> + # Minimal expression evaluator for binary operators.
> + op = body[1].text
> + if len(body) == 3 and op in BINARY_OPERATORS:
> + if not had_parens:
> + reporter.error(body[1].line,
> + 'missing parentheses around {} expression'.format(op))
> + reporter.note(md.line,
> + 'in definition of macro {}'.format(md.name))
> +
> + left = eval_token(md, body[0])
> + right = eval_token(md, body[2])
> +
> + if type(left) != type(1):
> + reporter.error(left.line,
> + 'left operand of {} is not an integer'.format(op))
> + reporter.note(md.line,
> + 'in definition of macro {}'.format(md.name))
> + if type(right) != type(1):
> + reporter.error(left.line,
> + 'right operand of {} is not an integer'.format(op))
> + reporter.note(md.line,
> + 'in definition of macro {}'.format(md.name))
> + return BINARY_OPERATORS[op](left, right)
> +
> + reporter.error(md.line,
> + 'uninterpretable macro token sequence: {}'.format(
> + ' '.join(md.body_lowered)))
> + return None
> + finally:
> + del stack[md.name]
> +
> + # Start of main body of macro_eval.
> + for md in definitions.values():
> + name = md.name
> + if name not in evaluated and not md.function:
> + evaluated[name] = eval1(md, name)
> + return evaluated
> diff --git a/support/Makefile b/support/Makefile
> index 9b50eac117..551d02941f 100644
> --- a/support/Makefile
> +++ b/support/Makefile
> @@ -274,12 +274,12 @@ $(objpfx)test-run-command : $(libsupport) $(common-objpfx)elf/static-stubs.o
> tests = \
> README-testing \
> tst-support-namespace \
> + tst-support-open-dev-null-range \
> + tst-support-process_state \
> tst-support_blob_repeat \
> tst-support_capture_subprocess \
> tst-support_descriptors \
> tst-support_format_dns_packet \
> - tst-support-open-dev-null-range \
> - tst-support-process_state \
> tst-support_quote_blob \
> tst-support_quote_blob_wide \
> tst-support_quote_string \
> @@ -304,6 +304,12 @@ $(objpfx)tst-support_record_failure-2.out: tst-support_record_failure-2.sh \
> $(evaluate-test)
> endif
>
> +tests-special += $(objpfx)tst-glibcpp.out
> +
> +$(objpfx)tst-glibcpp.out: tst-glibcpp.py $(..)scripts/glibcpp.py
> + PYTHONPATH=$(..)scripts $(PYTHON) tst-glibcpp.py > $@ 2>&1; \
> + $(evaluate-test)
> +
> $(objpfx)tst-support_format_dns_packet: $(common-objpfx)resolv/libresolv.so
>
> tst-support_capture_subprocess-ARGS = -- $(host-test-program-cmd)
> diff --git a/support/tst-glibcpp.py b/support/tst-glibcpp.py
> new file mode 100644
> index 0000000000..a2db1916cc
> --- /dev/null
> +++ b/support/tst-glibcpp.py
> @@ -0,0 +1,217 @@
> +#! /usr/bin/python3
> +# Tests for scripts/glibcpp.py
> +# Copyright (C) 2022 Free Software Foundation, Inc.
> +# This file is part of the GNU C Library.
> +#
> +# The GNU C Library is free software; you can redistribute it and/or
> +# modify it under the terms of the GNU Lesser General Public
> +# License as published by the Free Software Foundation; either
> +# version 2.1 of the License, or (at your option) any later version.
> +#
> +# The GNU C Library is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> +# Lesser General Public License for more details.
> +#
> +# You should have received a copy of the GNU Lesser General Public
> +# License along with the GNU C Library; if not, see
> +# <https://www.gnu.org/licenses/>.
> +
> +import inspect
> +import sys
> +
> +import glibcpp
> +
> +# Error counter.
> +errors = 0
> +
> +class TokenizerErrors:
> + """Used as the error reporter during tokenization."""
> +
> + def __init__(self):
> + self.errors = []
> +
> + def error(self, token, message):
> + self.errors.append((token, message))
> +
> +def check_macro_definitions(source, expected):
> + reporter = TokenizerErrors()
> + tokens = glibcpp.tokenize_c(source, reporter)
> +
> + actual = []
> + for md in glibcpp.macro_definitions(tokens):
> + if md.function:
> + md_name = '{}({})'.format(md.name, ','.join(md.args_lowered))
> + else:
> + md_name = md.name
> + actual.append((md_name, md.body_lowered))
> +
> + if actual != expected or reporter.errors:
> + global errors
> + errors += 1
> + # Obtain python source line information.
> + frame = inspect.stack(2)[1]
> + print('{}:{}: error: macro definition mismatch, actual definitions:'
> + .format(frame[1], frame[2]))
> + for md in actual:
> + print('note: {} {!r}'.format(md[0], md[1]))
> +
> + if reporter.errors:
> + for err in reporter.errors:
> + print('note: tokenizer error: {}: {}'.format(
> + err[0].line, err[1]))
> +
> +def check_macro_eval(source, expected, expected_errors=''):
> + reporter = TokenizerErrors()
> + tokens = list(glibcpp.tokenize_c(source, reporter))
> +
> + if reporter.errors:
> + # Obtain python source line information.
> + frame = inspect.stack(2)[1]
> + for err in reporter.errors:
> + print('{}:{}: tokenizer error: {}: {}'.format(
> + frame[1], frame[2], err[0].line, err[1]))
> + return
> +
> + class EvalReporter:
> + """Used as the error reporter during evaluation."""
> +
> + def __init__(self):
> + self.lines = []
> +
> + def error(self, line, message):
> + self.lines.append('{}: error: {}\n'.format(line, message))
> +
> + def note(self, line, message):
> + self.lines.append('{}: note: {}\n'.format(line, message))
> +
> + reporter = EvalReporter()
> + actual = glibcpp.macro_eval(glibcpp.macro_definitions(tokens), reporter)
> + actual_errors = ''.join(reporter.lines)
> + if actual != expected or actual_errors != expected_errors:
> + global errors
> + errors += 1
> + # Obtain python source line information.
> + frame = inspect.stack(2)[1]
> + print('{}:{}: error: macro evaluation mismatch, actual results:'
> + .format(frame[1], frame[2]))
> + for k, v in actual.items():
> + print(' {}: {!r}'.format(k, v))
> + for msg in reporter.lines:
> + sys.stdout.write(' | ' + msg)
> +
> +# Individual test cases follow.
> +
> +check_macro_definitions('', [])
> +check_macro_definitions('int main()\n{\n{\n', [])
> +check_macro_definitions("""
> +#define A 1
> +#define B 2 /* ignored */
> +#define C 3 // also ignored
> +#define D \
> + 4
> +#define STRING "string"
> +#define FUNCLIKE(a, b) (a + b)
> +#define FUNCLIKE2(a, b) (a + \
> + b)
> +""", [('A', ['1']),
> + ('B', ['2']),
> + ('C', ['3']),
> + ('D', ['4']),
> + ('STRING', ['"string"']),
> + ('FUNCLIKE(a,b)', list('(a+b)')),
> + ('FUNCLIKE2(a,b)', list('(a+b)')),
> + ])
> +check_macro_definitions('#define MACRO', [('MACRO', [])])
> +check_macro_definitions('#define MACRO\n', [('MACRO', [])])
> +check_macro_definitions('#define MACRO()', [('MACRO()', [])])
> +check_macro_definitions('#define MACRO()\n', [('MACRO()', [])])
> +
> +check_macro_eval('#define A 1', {'A': 1})
> +check_macro_eval('#define A (1)', {'A': 1})
> +check_macro_eval('#define A (1 + 1)', {'A': 2})
> +check_macro_eval('#define A (1U << 31)', {'A': 1 << 31})
> +check_macro_eval('''\
> +#define A (B + 1)
> +#define B 10
> +#define F(x) ignored
> +#define C "not ignored"
> +''', {
> + 'A': 11,
> + 'B': 10,
> + 'C': '"not ignored"',
> +})
> +
> +# Checking for evaluation errors.
> +check_macro_eval('''\
> +#define A 1
> +#define A 2
> +''', {
> + 'A': 1,
> +}, '''\
> +2: error: macro A redefined
> +1: note: location of previous definition
> +''')
> +
> +check_macro_eval('''\
> +#define A A
> +#define B 1
> +''', {
> + 'A': None,
> + 'B': 1,
> +}, '''\
> +1: error: macro definition A refers to itself
> +''')
> +
> +check_macro_eval('''\
> +#define A B
> +#define B A
> +''', {
> + 'A': None,
> + 'B': None,
> +}, '''\
> +1: error: macro definition A refers to itself
> +2: note: evaluated from B
> +''')
> +
> +check_macro_eval('''\
> +#define A B
> +#define B C
> +#define C A
> +''', {
> + 'A': None,
> + 'B': None,
> + 'C': None,
> +}, '''\
> +1: error: macro definition A refers to itself
> +3: note: evaluated from C
> +2: note: evaluated from B
> +''')
> +
> +check_macro_eval('''\
> +#define A 1 +
> +''', {
> + 'A': None,
> +}, '''\
> +1: error: uninterpretable macro token sequence: 1 +
> +''')
> +
> +check_macro_eval('''\
> +#define A 3*5
> +''', {
> + 'A': None,
> +}, '''\
> +1: error: uninterpretable macro token sequence: 3 * 5
> +''')
> +
> +check_macro_eval('''\
> +#define A 3 + 5
> +''', {
> + 'A': 8,
> +}, '''\
> +1: error: missing parentheses around + expression
> +1: note: in definition of macro A
> +''')
> +
> +if errors:
> + sys.exit(1)
next prev parent reply other threads:[~2022-09-21 17:08 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-09-14 14:59 [PATCH v2 0/3] Parse <elf.h> in the glibcelf Python module Florian Weimer
2022-09-14 14:59 ` [PATCH v2 1/3] scripts: Extract glibcpp.py from check-obsolete-constructs.py Florian Weimer
2022-09-14 15:00 ` [PATCH v2 2/3] scripts: Enhance glibcpp to do basic macro processing Florian Weimer
2022-09-21 17:08 ` Siddhesh Poyarekar [this message]
2022-09-14 15:00 ` [PATCH v2 3/3] elf: Extract glibcelf constants from <elf.h> Florian Weimer
2022-09-21 18:05 ` Siddhesh Poyarekar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=49eb2336-9fdc-cbd2-bb1a-f6e6edd78744@gotplt.org \
--to=siddhesh@gotplt.org \
--cc=fweimer@redhat.com \
--cc=libc-alpha@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).