[PATCH 02/22] libcpp: add linemap_position_for_file_line_and

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH 02/22] libcpp: add linemap_position_for_file_line_and_column
  2017-08-04 21:30 [PATCH 00/22] RFC: integrated 3rd-party static analysis support David Malcolm
@ 2017-08-04 21:30 ` David Malcolm
  2017-09-01 17:50   ` Jeff Law
  2017-08-04 21:30 ` [PATCH 01/22] Expose assert_loceq outside of input.c; add ASSERT_LOCEQ David Malcolm
                   ` (24 subsequent siblings)
  25 siblings, 1 reply; 35+ messages in thread
From: David Malcolm @ 2017-08-04 21:30 UTC (permalink / raw)
  To: gcc-patches; +Cc: David Malcolm

gcc/ChangeLog:
	* input.c (selftest::test_making_arbitrary_locations): New function.
	(selftest::input_c_tests): Call it.

libcpp/ChangeLog:
	* include/line-map.h (linemap_position_for_file_line_and_column):
	New decl.
	* line-map.c (linemap_position_for_file_line_and_column): New
	function.
---
 gcc/input.c               | 32 +++++++++++++++++++++++++++++
 libcpp/include/line-map.h |  9 +++++++++
 libcpp/line-map.c         | 51 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 92 insertions(+)

diff --git a/gcc/input.c b/gcc/input.c
index 1aad551..a3a8454 100644
--- a/gcc/input.c
+++ b/gcc/input.c
@@ -1795,6 +1795,37 @@ test_accessing_ordinary_linemaps (const line_table_case &case_)
   ASSERT_EQ (loc_d, src_range.m_finish);
 }
 
+/* Verify that linemap_position_for_file_line_and_column works.  */
+
+static void
+test_making_arbitrary_locations (const line_table_case &case_)
+{
+  line_table_test ltt (case_);
+
+  /* Verify that we can make various locations in arbitrary order,
+     sometimes changing file, sometimes going back and making a
+     location "earlier" than ones we've created before.  */
+
+  ASSERT_LOCEQ ("foo.c", 10, 5,
+		linemap_position_for_file_line_and_column (line_table,
+							   "foo.c", 10, 5));
+  ASSERT_LOCEQ ("foo.c", 10, 6,
+		linemap_position_for_file_line_and_column (line_table,
+							   "foo.c", 10, 6));
+  ASSERT_LOCEQ ("foo.c", 20, 1,
+		linemap_position_for_file_line_and_column (line_table,
+							   "foo.c", 20, 1));
+  ASSERT_LOCEQ ("bar.c", 100, 12,
+		linemap_position_for_file_line_and_column (line_table,
+							   "bar.c", 100, 12));
+  ASSERT_LOCEQ ("foo.c", 30, 1,
+		linemap_position_for_file_line_and_column (line_table,
+							   "foo.c", 30, 1));
+  ASSERT_LOCEQ ("foo.c", 15, 1,
+		linemap_position_for_file_line_and_column (line_table,
+							   "foo.c", 15, 1));
+}
+
 /* Verify various properties of UNKNOWN_LOCATION.  */
 
 static void
@@ -3528,6 +3559,7 @@ input_c_tests ()
   for_each_line_table_case (test_make_location_nonpure_range_endpoints);
 
   for_each_line_table_case (test_accessing_ordinary_linemaps);
+  for_each_line_table_case (test_making_arbitrary_locations);
   for_each_line_table_case (test_lexer);
   for_each_line_table_case (test_lexer_string_locations_simple);
   for_each_line_table_case (test_lexer_string_locations_ebcdic);
diff --git a/libcpp/include/line-map.h b/libcpp/include/line-map.h
index e696041..3c74bb0 100644
--- a/libcpp/include/line-map.h
+++ b/libcpp/include/line-map.h
@@ -1192,6 +1192,15 @@ linemap_position_for_loc_and_offset (struct line_maps *set,
 				     source_location loc,
 				     unsigned int offset);
 
+/* Encode and return a source location from a given file, line and column.
+   This is much less efficient than the above functions, and should only
+   be used as a last resort.  */
+
+source_location
+linemap_position_for_file_line_and_column (struct line_maps *set,
+					   const char *, linenum_type,
+					   unsigned int);
+
 /* Return the file this map is for.  */
 inline const char *
 LINEMAP_FILE (const line_map_ordinary *ord_map)
diff --git a/libcpp/line-map.c b/libcpp/line-map.c
index 0e5804b..32294f5 100644
--- a/libcpp/line-map.c
+++ b/libcpp/line-map.c
@@ -935,6 +935,57 @@ linemap_position_for_loc_and_offset (struct line_maps *set,
   return r;
 }
 
+/* Encode and return a source location from a given file, line and column.
+   This is much less efficient than the above functions, and should only
+   be used as a last resort.  */
+
+source_location
+linemap_position_for_file_line_and_column (struct line_maps *set,
+					   const char *file, linenum_type line,
+					   unsigned int column)
+{
+  /* First, attempt to find a pre-existing linemap that can represent
+     the location.  */
+  for (unsigned int i = 0; i < LINEMAPS_ORDINARY_USED (set); i++)
+    {
+      line_map_ordinary *ord_map = LINEMAPS_ORDINARY_MAP_AT (set, i);
+      if (0 == strcmp (file, ord_map->to_file))
+	{
+	  source_location loc
+	    = linemap_position_for_line_and_column (set, ord_map,
+						    line, column);
+	  /* Check that it's a valid location within ord_map.  */
+	  if (i + 1 < LINEMAPS_ORDINARY_USED (set))
+	    {
+	      line_map_ordinary *next_ord_map
+		= LINEMAPS_ORDINARY_MAP_AT (set, i + 1);
+	      if (loc >= next_ord_map->start_location)
+		continue;
+	    }
+
+	  return loc;
+	}
+    }
+
+  /* Failing that, we need a new linemap.  */
+  const line_map_ordinary *ord_map
+    = linemap_check_ordinary (linemap_add
+			      (set, LC_ENTER,
+			       /* Assume that it's not in a system header.  */
+			       false,
+			       xstrdup (file), line));
+  unsigned int max_column_hint = MAX (80, column * 2);
+  linemap_line_start (set, line, max_column_hint);
+
+  source_location loc
+    = linemap_position_for_line_and_column (set, ord_map,
+					    line, column);
+
+  linemap_add (set, LC_LEAVE, false, NULL, 0);
+
+  return loc;
+}
+
 /* Given a virtual source location yielded by a map (either an
    ordinary or a macro map), returns that map.  */
 
-- 
1.8.5.3

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH 00/22] RFC: integrated 3rd-party static analysis support
@ 2017-08-04 21:30 David Malcolm
  2017-08-04 21:30 ` [PATCH 02/22] libcpp: add linemap_position_for_file_line_and_column David Malcolm
                   ` (25 more replies)
  0 siblings, 26 replies; 35+ messages in thread
From: David Malcolm @ 2017-08-04 21:30 UTC (permalink / raw)
  To: gcc-patches; +Cc: David Malcolm

This patch kit clearly isn't ready yet as-is (see e.g. the
"known unknowns" below), but I'm posting it now in the hope of
getting early feedback.

Summary
=======

This patch kit provides an easy way to make integrate 3rd-party static
analysis tools into gcc, and have them:
(a) report through gcc's diagnostic subsystem, and
(b) "watermark" the generated binaries with queryable data on what checkers
    were run, and what the results were.

Here's an example showing gcc running a bank of 3rd-party checkers on this
source file:

  #include <stdlib.h>

  void test ()
  {
    void *ptr_1;
    void *ptr_2;

    ptr_1 = malloc (64);
    if (!ptr_1)
      return;
    ptr_2 = malloc (64);
    if (!ptr_2)
      return;

    free (ptr_2);
    free (ptr_1);
  }

via a simple command-line:

  $ ./xgcc -B. -c conditional-leak.c -Wrun-analyzers=policy.json
  conditional-leak.c:13:5: error: Potential leak of memory pointed to by 'ptr_1' [clang-analyzer:Memory leak]
       return;
       ^
  conditional-leak.c:8:11: note: state 1 of 4: Memory is allocated
     ptr_1 = malloc (64);
             ^
  conditional-leak.c:9:7: note: state 2 of 4: Assuming 'ptr_1' is non-null
     if (!ptr_1)
         ^
  conditional-leak.c:12:7: note: state 3 of 4: Assuming 'ptr_2' is null
     if (!ptr_2)
         ^
  conditional-leak.c:13:5: note: state 4 of 4: Potential leak of memory pointed to by 'ptr_1'
       return;
       ^
  conditional-leak.c:13:0: error: Memory leak: ptr_1 [cppcheck:memleak]
       return;

Of the checkers, clang's static analyzer and cppcheck both identify the
memory leak; the former also identifies the control flow (the other
checkers didn't report anything).

The idea is to provide a mechanism to make it easy for developers and
projects to impose policy on what checkers should be run, and to gate
the build if certain tests fail.

In this case, the results are treated as hard errors and block the build,
but policy could allow them to be warnings.

Extensive metadata is captured about what checkers were run, and what
they emitted, using the "Firehose" interchange format:

  http://firehose.readthedocs.io/en/latest/index.html

In the case where this doesn't block the build, this can be queried via a
  contrib/get-static-analysis.py
script, so e.g. you can verify that a setuid binary was indeed compiled
using all the checkers that you expect it to be.

This can also be used to embed data about the code into the watermark.
For example, checkers/ianal.py embeds information about "Copyright"
lines in the source code into the generated binaries, from where it
can be queried (this example is intended as a proof-of-concept rather
than as a real license-tracking solution...)

Statement of the problem
========================

Static analysis is IMHO done too late, if at all: static analysis tools are run
as an optional extra, "on the side", rather than in developers' normal
workflow, with some kind of "override the compiler and do extra work" hook,
which may preclude running more than one analyzer at once.  Analysis results
are reviewed (if at all) in some kind of on-the-side tool, rather than when the
code is being edited, or patches being prepared.

It would be better to have an easy way for developers to run analyzer(s)
as they're doing development, as part of their edit-compile-test cycle
- analysis problems are reported immediately, and can be acted on
immediately (e.g. by treating some checker tests as being hard errors).

It would also be good to have a way to run analyzer(s) when packages are
built, with a variety of precanned policies for analyzers.  For example,
setuid binaries and network-facing daemons could each be built with a
higher strictness of checking.

It would also be good to tag binaries with information on what analyzers
were run, what options they were invoked with, etc.
Potentially have "dump_file" information from optimization passes stored
in the metadata also.   Have a tool to query all of this.

This way a distribution can perform a query like:

  "show me all setuid binaries that contain code that wasn't checked
   with $CHECKER with $TEST set to be a hard error"

Can/should we break the build if there are issues?
Yes: but have a way to opt-in easily: if the tool is well-integrated with the
    compiler: e.g.
        -Wrun-analyzers=/usr/share/analyzers/userspace/network-facing-service
then upstream developers and packagers can turn on the setting, and see what
breaks, and fix it naturally within an compile-edit-test cycle

This gives a relatively painless way to opt-in to increasing levels of
strictness (e.g. by an upstream project, or by an individual developer).

Does this slow the build down?
Yes: but you can choose which analyzers run, and can choose to turn them off.
It ought to parallelize well.  I believe users will prefer to turn them on,
and have builders burn up the extra CPU cycles.
This may make much more sense for binary distributions (e.g. Fedora, Debian)
that it does for things like Gentoo.

Example policy files/options might be:
  -Wrun-analyzers=/usr/share/analyzers/userspace/network-facing-service
  -Wrun-analyzers=/usr/share/analyzers/userspace/application
  -Wrun-analyzers=/usr/share/analyzers/userspace/setuid-binary
  -Wrun-analyzers=/usr/share/analyzers/userspace/default
  -Wrun-analyzers=/usr/share/analyzers/kernel

or whatnot.

Idea is to provide mechanism, and for the distribution to decide on some
standard policies.

This may also allow us to sandbox a gcc plugin by running the plugin inside
another cc1, for plugins that add warnings - if the plugin ICEs, then the main
cc1 isn't affected (useful for doing mass rebuilds of code using an
experimental plugin).

Known unknowns
==============

How does one suppress a specific false-positive site?
Do we need a pragma for it?  (though pragmas ought to already affect some of
the underlying checkers...)

Do we really want .json for the policy format?
If we're expecting users to edit this, we need great error messages,
and probably support for comments.  Would YAML or somesuch be better?
Or have them as individual command-line flags, and the policy files are
"@" files for gcc.

How to mark which checkers are appropriate for which languages?

(etc; see also all the FIXMEs in the code...)

Dependencies
============

The "checkers" subdirectory uses Python 2 or 3, and has a few Python
dependencies, including "firehose" and "gccinvocation".

How it works
============

If enabled, toplev.c starts each of the various checkers from separate
threads from near the start of toplev.c, so that the checkers run in
parallel with each other, and with the bulk of cc1.  Near the end of
toplev.c it waits for each thread to finish, and reads the stdout,
which is expected to be in Firehose JSON format.  This is then sent
through the diagnostic subsystem.

Each "checker" is a harness script, which "knows" how to invoke
the particular 3rd-party tool, and coerce the output from the tool
into the common JSON format.

Some notes on the data model can be seen here:
  http://firehose.readthedocs.io/en/latest/data-model.html
(though that's expressed as Python objects and XML, rather than
the JSON format).

Successfully bootstrapped&regrtested the combination of the patches
on x86_64-pc-linux-gnu (though the only testcases are selftest based
unit-tests, rather than DejaGnu tests).

Thoughts?
Dave

David Malcolm (22):
  Expose assert_loceq outside of input.c; add ASSERT_LOCEQ
  libcpp: add linemap_position_for_file_line_and_column
  Add JSON implementation
  Add firehose.h/cc
  diagnostic.c/h: add support for external tools
  Makefile.in: hack in -lpthread
  Add minimal version of Nick Clifton's annobin code
  Add GNU_BUILD_ATTRIBUTE_STATIC_ANALYSIS to annobin.h
  Add selftest::read_file (..., FILE *, ...)
  Add checkers.h/cc
  Add checkers/test-sources
  Add -Wrun-analyzers= to common.opt, toplev.c, and invoke.texi
  Add checkers/checker.py
  Add checkers/always_fails.py
  Add checkers/clang_analyzer.py
  Add checkers/coverity.py
  Add checkers/cppcheck.py
  Add checkers/flawfinder.py
  Add checkers/ianal.py
  Add checkers/splint.py
  Add checkers/Makefile
  Add contrib/get-static-analysis.py

 checkers/ChangeLog                                 |    9 +
 checkers/Makefile                                  |   23 +
 checkers/always_fails.py                           |   57 +
 checkers/checker.py                                |  367 ++++
 checkers/clang_analyzer.py                         |  145 ++
 checkers/coverity.py                               |  141 ++
 checkers/cppcheck.py                               |  138 ++
 checkers/flawfinder.py                             |  124 ++
 checkers/ianal.py                                  |   79 +
 checkers/splint.py                                 |   77 +
 checkers/test-sources/conditional-leak.c           |   17 +
 checkers/test-sources/cpychecker-demo.c            |  110 ++
 checkers/test-sources/divide-by-zero.c             |    4 +
 checkers/test-sources/harmless.c                   |    9 +
 checkers/test-sources/multiple-1.c                 |    6 +
 checkers/test-sources/multiple-2.c                 |    9 +
 checkers/test-sources/out-of-bounds.c              |    6 +
 checkers/test-sources/read-through-null.c          |    4 +
 checkers/test-sources/return-of-stack-address.c    |    6 +
 checkers/test-sources/unconditional-file-leak.c    |   10 +
 contrib/get-static-analysis.py                     |   47 +
 gcc/Makefile.in                                    |    7 +-
 gcc/annobin.cc                                     |  185 ++
 gcc/annobin.h                                      |   45 +
 gcc/checkers.cc                                    |  736 ++++++++
 gcc/checkers.h                                     |   26 +
 gcc/common.opt                                     |    4 +
 gcc/diagnostic-show-locus.c                        |   29 +-
 gcc/diagnostic.c                                   |   85 +-
 gcc/diagnostic.h                                   |    5 +
 gcc/doc/invoke.texi                                |    8 +-
 gcc/firehose.cc                                    |  709 ++++++++
 gcc/firehose.h                                     |  199 ++
 gcc/input.c                                        |   71 +-
 gcc/json.cc                                        | 1914 ++++++++++++++++++++
 gcc/json.h                                         |  214 +++
 gcc/selftest-diagnostic.h                          |   62 +
 gcc/selftest-input.h                               |   54 +
 gcc/selftest-run-tests.c                           |    3 +
 gcc/selftest.c                                     |   16 +-
 gcc/selftest.h                                     |   10 +
 .../checker-output/test-clang-analyzer.json        |  122 ++
 .../selftests/checker-output/test-cppcheck.json    |   50 +
 .../selftests/checker-output/test-failure.json     |   38 +
 .../selftests/checker-policy/test-policy.json      |    7 +
 gcc/toplev.c                                       |    9 +
 libcpp/include/line-map.h                          |    9 +
 libcpp/line-map.c                                  |   51 +
 48 files changed, 6001 insertions(+), 55 deletions(-)
 create mode 100644 checkers/ChangeLog
 create mode 100644 checkers/Makefile
 create mode 100755 checkers/always_fails.py
 create mode 100755 checkers/checker.py
 create mode 100755 checkers/clang_analyzer.py
 create mode 100644 checkers/coverity.py
 create mode 100755 checkers/cppcheck.py
 create mode 100755 checkers/flawfinder.py
 create mode 100755 checkers/ianal.py
 create mode 100755 checkers/splint.py
 create mode 100644 checkers/test-sources/conditional-leak.c
 create mode 100644 checkers/test-sources/cpychecker-demo.c
 create mode 100644 checkers/test-sources/divide-by-zero.c
 create mode 100644 checkers/test-sources/harmless.c
 create mode 100644 checkers/test-sources/multiple-1.c
 create mode 100644 checkers/test-sources/multiple-2.c
 create mode 100644 checkers/test-sources/out-of-bounds.c
 create mode 100644 checkers/test-sources/read-through-null.c
 create mode 100644 checkers/test-sources/return-of-stack-address.c
 create mode 100644 checkers/test-sources/unconditional-file-leak.c
 create mode 100644 contrib/get-static-analysis.py
 create mode 100644 gcc/annobin.cc
 create mode 100644 gcc/annobin.h
 create mode 100644 gcc/checkers.cc
 create mode 100644 gcc/checkers.h
 create mode 100644 gcc/firehose.cc
 create mode 100644 gcc/firehose.h
 create mode 100644 gcc/json.cc
 create mode 100644 gcc/json.h
 create mode 100644 gcc/selftest-diagnostic.h
 create mode 100644 gcc/selftest-input.h
 create mode 100644 gcc/testsuite/selftests/checker-output/test-clang-analyzer.json
 create mode 100644 gcc/testsuite/selftests/checker-output/test-cppcheck.json
 create mode 100644 gcc/testsuite/selftests/checker-output/test-failure.json
 create mode 100644 gcc/testsuite/selftests/checker-policy/test-policy.json

-- 
1.8.5.3

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH 03/22] Add JSON implementation
  2017-08-04 21:30 [PATCH 00/22] RFC: integrated 3rd-party static analysis support David Malcolm
  2017-08-04 21:30 ` [PATCH 02/22] libcpp: add linemap_position_for_file_line_and_column David Malcolm
  2017-08-04 21:30 ` [PATCH 01/22] Expose assert_loceq outside of input.c; add ASSERT_LOCEQ David Malcolm
@ 2017-08-04 21:30 ` David Malcolm
  2017-09-01 17:56   ` Jeff Law
  2017-08-04 21:36 ` [PATCH 17/22] Add checkers/cppcheck.py David Malcolm
                   ` (22 subsequent siblings)
  25 siblings, 1 reply; 35+ messages in thread
From: David Malcolm @ 2017-08-04 21:30 UTC (permalink / raw)
  To: gcc-patches; +Cc: David Malcolm

This patch adds support to gcc for reading and writing JSON,
based on DOM-like trees of json::value instances.

gcc/ChangeLog:
	* Makefile.in (OBJS): Add json.o.
	* json.cc: New file.
	* json.h: New file.
	* selftest-run-tests.c (selftest::run_tests): Call json_cc_tests.
	* selftest.h (selftest::json_cc_tests): New decl.
---
 gcc/Makefile.in          |    1 +
 gcc/json.cc              | 1914 ++++++++++++++++++++++++++++++++++++++++++++++
 gcc/json.h               |  214 ++++++
 gcc/selftest-run-tests.c |    1 +
 gcc/selftest.h           |    1 +
 5 files changed, 2131 insertions(+)
 create mode 100644 gcc/json.cc
 create mode 100644 gcc/json.h

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index efca916..4f7fd0c 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1368,6 +1368,7 @@ OBJS = \
 	ira-color.o \
 	ira-emit.o \
 	ira-lives.o \
+	json.o \
 	jump.o \
 	langhooks.o \
 	lcm.o \
diff --git a/gcc/json.cc b/gcc/json.cc
new file mode 100644
index 0000000..e0d5a76
--- /dev/null
+++ b/gcc/json.cc
@@ -0,0 +1,1914 @@
+/* JSON parsing
+   Copyright (C) 2017 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "json.h"
+#include "pretty-print.h"
+#include "math.h"
+#include "selftest.h"
+
+using namespace json;
+
+/* class json::value.  */
+
+/* Generate a char * for this json::value tree.
+   The returned value must be freed by the caller.  */
+
+char *
+value::to_str () const
+{
+  pretty_printer pp;
+  print (&pp);
+  return xstrdup (pp_formatted_text (&pp));
+}
+
+/* Dump this json::value tree to OUTF.
+   No formatting is done.  There are no guarantees about the order
+   in which the key/value pairs of json::objects are printed.  */
+
+void
+value::dump (FILE *outf) const
+{
+  pretty_printer pp;
+  pp_buffer (&pp)->stream = outf;
+  print (&pp);
+  pp_flush (&pp);
+}
+
+/* If this json::value is a json::object, return it,
+   otherwise return NULL.  */
+
+const object *
+value::as_object () const
+{
+  if (get_kind () != JSON_OBJECT)
+    return NULL;
+  return static_cast <const object *> (this);
+}
+
+/* If this json::value is a json::array, return it,
+   otherwise return NULL.  */
+
+const array *
+value::as_array () const
+{
+  if (get_kind () != JSON_ARRAY)
+    return NULL;
+  return static_cast <const array *> (this);
+}
+
+/* If this json::value is a json::number, return it,
+   otherwise return NULL.  */
+
+const number *
+value::as_number () const
+{
+  if (get_kind () != JSON_NUMBER)
+    return NULL;
+  return static_cast <const number *> (this);
+}
+
+/* If this json::value is a json::string, return it,
+   otherwise return NULL.  */
+
+const string *
+value::as_string () const
+{
+  if (get_kind () != JSON_STRING)
+    return NULL;
+  return static_cast <const string *> (this);
+}
+
+/* Attempt to get the value of a key/value pair from this value
+   as if THIS value were an object.
+
+   If THIS is not a json::object, return write an error message to OUT_ERR
+   (which must be freed by the caller) and return false.
+
+   Otherwise write the value ptr (possibly NULL) to OUT_VALUE and
+   return true.  */
+
+bool
+value::get_optional_value_by_key (const char *name, const value *&out_value,
+				  char *&out_err) const
+{
+  const json::object *obj = as_object ();
+  if (!obj)
+    {
+      out_err = xstrdup ("not an object");
+      return false;
+    }
+  out_value = obj->get (name);
+  return true;
+}
+
+/* Attempt to get a string value of a key/value pair from this value
+   as if THIS value were an object.
+
+   If THIS is a json::object, and KEY is either not present, is a string,
+   or is the "null" JSON literal, then return true, and write to OUT_VALUE.
+   If a string, then the ptr is written to OUT_VALUE, otherwise NULL
+   is written to OUT_VALUE.
+
+   If THIS is not a json::object, or KEY is not a string/"null",
+   return false and write an error message to OUT_ERR
+   (which must be freed by the caller).  */
+
+bool
+value::get_optional_string_by_key (const char *name, const char *&out_value,
+				   char *&out_err) const
+{
+  const json::value *v;
+  if (!get_optional_value_by_key (name, v, out_err))
+    return false;
+  if (v && v->get_kind () != JSON_NULL)
+    {
+      const json::string *s = v->as_string ();
+      if (!s)
+	{
+	  out_err = xasprintf ("not a string: \"%s\"", name);
+	  return false;
+	}
+      out_value = s->get_string ();
+      return true;
+    }
+  else
+    {
+      out_value = NULL;
+      return true;
+    }
+}
+
+/* Attempt to get lookup the value of a key/value pair from this value
+   as if this value were an object.
+
+   To succeed, THIS must be a json::object, and it must have a key named
+   NAME.
+
+   On success, return true and write the value to OUT_VALUE.
+   On failure, return false and write an error message to OUT_ERR
+   (which must be freed by the caller).  */
+
+bool
+value::get_value_by_key (const char *name, const value *&out_value,
+			 char *&out_err) const
+{
+  const json::object *obj = as_object ();
+  if (!obj)
+    {
+      out_err = xstrdup ("not an object");
+      return false;
+    }
+  const json::value *v = obj->get (name);
+  if (!v)
+    {
+      out_err = xasprintf ("missing attribute: \"%s\"", name);
+      return false;
+    }
+  out_value = v;
+  return true;
+}
+
+/* As value::get_value_by_key, but the value must be a number;
+   if successful, write it as an int to OUT_VALUE.  */
+
+bool
+value::get_int_by_key (const char *name, int &out_value, char *&out_err) const
+{
+  const json::value *v;
+  if (!get_value_by_key (name, v, out_err))
+    return false;
+  const json::number *n = v->as_number ();
+  if (!n)
+    {
+      out_err = xasprintf ("not a number: \"%s\"", name);
+      return false;
+    }
+  out_value = n->get ();
+  return true;
+}
+
+/* As value::get_value_by_key, but the value must be a string;
+   if successful, write it as const char * to OUT_VALUE.  */
+
+bool
+value::get_string_by_key (const char *name, const char *&out_value,
+			  char *&out_err) const
+{
+  const json::value *v;
+  if (!get_value_by_key (name, v, out_err))
+    return false;
+  const json::string *s = v->as_string ();
+  if (!s)
+    {
+      out_err = xasprintf ("not a string: \"%s\"", name);
+      return false;
+    }
+  out_value = s->get_string ();
+  return true;
+}
+
+/* As value::get_value_by_key, but the value must be an array;
+   if successful, write it as a json::array * to OUT_VALUE.  */
+
+bool
+value::get_array_by_key (const char *name, const array *&out_value,
+			 char *&out_err) const
+{
+  const json::value *v;
+  if (!get_value_by_key (name, v, out_err))
+    return false;
+  const json::array *arr = v->as_array ();
+  if (!arr)
+    {
+      out_err = xasprintf ("not an array: \"%s\"", name);
+      return false;
+    }
+  out_value = arr;
+  return true;
+}
+
+/* class json::object, a subclass of json::value, representing
+   an unordered collection of key/value pairs.  */
+
+/* json:object's dtor.  */
+
+object::~object ()
+{
+  for (map_t::iterator it = m_map.begin (); it != m_map.end (); ++it)
+    {
+      free (const_cast <char *>((*it).first));
+      delete ((*it).second);
+    }
+}
+
+/* Implementation of json::value::print for json::object.  */
+
+void
+object::print (pretty_printer *pp) const
+{
+  /* Note that the order is not guaranteed.  */
+  pp_character (pp, '{');
+  for (map_t::iterator it = m_map.begin (); it != m_map.end (); ++it)
+    {
+      if (it != m_map.begin ())
+	pp_string (pp, ", ");
+      const char *key = const_cast <char *>((*it).first);
+      value *value = (*it).second;
+      pp_printf (pp, "\"%s\": ", key); // FIXME: escaping?
+      value->print (pp);
+    }
+  pp_character (pp, '}');
+}
+
+/* Implementation of json::value::clone for json::object.  */
+
+value *
+object::clone () const
+{
+  object *other = new object ();
+  for (map_t::iterator it = m_map.begin (); it != m_map.end (); ++it)
+    {
+      const char *key = const_cast <char *>((*it).first);
+      value *value = (*it).second;
+      other->set (key, value->clone ());
+    }
+  return other;
+}
+
+/* Get the json::value * for KEY, or NULL if the key is not present.  */
+
+value *
+object::get (const char *key) const
+{
+  value **slot = const_cast <object*> (this)->m_map.get (key);
+  if (slot)
+    return *slot;
+  return NULL;
+}
+
+/* As object::get (KEY), but return NULL if the value of the key
+   is the "null" JSON literal.  */
+
+value *
+object::get_if_nonnull (const char *key) const
+{
+  value *result = get (key);
+  if (!result)
+    return NULL;
+  if (result->get_kind () == JSON_NULL)
+    return NULL;
+  return result;
+}
+
+/* Set the json::value * for KEY, taking ownership of VALUE
+   (and taking a copy of KEY if necessary).  */
+
+void
+object::set (const char *key, value *v)
+{
+  value **ptr = m_map.get (key);
+  if (ptr)
+    {
+      /* If the key is already present, delete the existing value
+	 and overwrite it.  */
+      delete *ptr;
+      *ptr = v;
+    }
+  else
+    /* If the key wasn't already present, take a copy of the key,
+       and store the value.  */
+    m_map.put (xstrdup (key), v);
+}
+
+/* class json::array, a subclass of json::value, representing
+   an ordered collection of values.  */
+
+/* json::array's dtor.  */
+
+array::~array ()
+{
+  unsigned i;
+  value *v;
+  FOR_EACH_VEC_ELT (m_elements, i, v)
+    delete v;
+}
+
+/* Implementation of json::value::print for json::array.  */
+
+void
+array::print (pretty_printer *pp) const
+{
+  pp_character (pp, '[');
+  unsigned i;
+  value *v;
+  FOR_EACH_VEC_ELT (m_elements, i, v)
+    {
+      if (i)
+	pp_string (pp, ", ");
+      v->print (pp);
+    }
+  pp_character (pp, ']');
+}
+
+/* Implementation of json::value::clone for json::array.  */
+
+value *
+array::clone () const
+{
+  array *other = new array ();
+  unsigned i;
+  value *v;
+  FOR_EACH_VEC_ELT (m_elements, i, v)
+    other->append (v->clone ());
+  return other;
+}
+
+/* class json::number, a subclass of json::value, wrapping a double.  */
+
+/* Implementation of json::value::print for json::number.  */
+
+void
+number::print (pretty_printer *pp) const
+{
+  char tmp[1024];
+  snprintf (tmp, sizeof (tmp), "%g", m_value);
+  pp_string (pp, tmp);
+}
+
+/* Implementation of json::value::clone for json::number.  */
+
+value *
+number::clone () const
+{
+  return new number (m_value);
+}
+
+/* class json::string, a subclass of json::value.  */
+
+void
+string::print (pretty_printer *pp) const
+{
+  pp_character (pp, '"');
+  for (const char *ptr = m_utf8; *ptr; ptr++)
+    {
+      char ch = *ptr;
+      switch (ch)
+	{
+	case '"':
+	  pp_string (pp, "\\\"");
+	  break;
+	case '\\':
+	  pp_string (pp, "\\n");
+	  break;
+	case '\b':
+	  pp_string (pp, "\\b");
+	  break;
+	case '\f':
+	  pp_string (pp, "\\f");
+	  break;
+	case '\n':
+	  pp_string (pp, "\\n");
+	  break;
+	case '\r':
+	  pp_string (pp, "\\r");
+	  break;
+	case '\t':
+	  pp_string (pp, "\\t");
+	  break;
+
+	default:
+	  pp_character (pp, ch);
+	}
+    }
+  pp_character (pp, '"');
+}
+
+/* Implementation of json::value::clone for json::string.  */
+
+value *
+string::clone () const
+{
+  return new string (m_utf8);
+}
+
+/* class json::literal, a subclass of json::value.  */
+
+/* Implementation of json::value::print for json::literal.  */
+
+void
+literal::print (pretty_printer *pp) const
+{
+  switch (m_kind)
+    {
+    case JSON_TRUE:
+      pp_string (pp, "true");
+      break;
+    case JSON_FALSE:
+      pp_string (pp, "false");
+      break;
+    case JSON_NULL:
+      pp_string (pp, "null");
+      break;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Implementation of json::value::clone for json::literal.  */
+
+value *
+literal::clone () const
+{
+  return new literal (m_kind);
+}
+
+\f
+/* Declarations relating to parsing JSON, all within an
+   anonymous namespace.  */
+
+namespace {
+
+/* A typedef representing a single unicode character.  */
+
+typedef unsigned unichar;
+
+/* An enum for discriminating different kinds of JSON token.  */
+
+enum token_id
+{
+  TOK_ERROR,
+
+  TOK_EOF,
+
+  /* Punctuation.  */
+  TOK_OPEN_SQUARE,
+  TOK_OPEN_CURLY,
+  TOK_CLOSE_SQUARE,
+  TOK_CLOSE_CURLY,
+  TOK_COLON,
+  TOK_COMMA,
+
+  /* Literal names.  */
+  TOK_TRUE,
+  TOK_FALSE,
+  TOK_NULL,
+
+  TOK_STRING,
+  TOK_NUMBER
+};
+
+/* Human-readable descriptions of enum token_id.  */
+
+static const char *token_id_name[] = {
+  "error",
+  "EOF",
+  "'['",
+  "'{'",
+  "']'",
+  "'}'",
+  "':'",
+  "','",
+  "'true'",
+  "'false'",
+  "'null'",
+  "string",
+  "number"
+};
+
+/* Tokens within the JSON lexer.  */
+
+struct token
+{
+  /* The kind of token.  */
+  enum token_id id;
+
+  /* The location of this token within the unicode
+     character stream.  */
+  int index;
+
+  union
+  {
+    /* Value for TOK_ERROR and TOK_STRING.  */
+    char *string;
+
+    /* Value for TOK_NUMBER.  */
+    double number;
+  } u;
+};
+
+/* A class for lexing JSON.  */
+
+class lexer
+{
+ public:
+  lexer ();
+  ~lexer ();
+  bool add_utf8 (size_t length, const char *utf8_buf, char **err_out);
+
+  const token *peek ();
+  void consume ();
+
+ private:
+  bool get_char (unichar &out);
+  void unget_char ();
+  static void dump_token (FILE *outf, const token *tok);
+  void lex_token (token *out);
+  void lex_string (token *out);
+  void lex_number (token *out, unichar first_char);
+  bool rest_of_literal (const char *suffix);
+
+ private:
+  auto_vec<unichar> m_buffer;
+  int m_next_char_idx;
+
+  static const int MAX_TOKENS = 1;
+  token m_next_tokens[MAX_TOKENS];
+  int m_num_next_tokens;
+};
+
+/* A class for parsing JSON.  */
+
+class parser
+{
+ public:
+  parser (char **err_out);
+  bool add_utf8 (size_t length, const char *utf8_buf, char **err_out);
+  value *parse_value (int depth);
+  object *parse_object (int depth);
+  array *parse_array (int depth);
+
+  bool seen_error_p () const { return *m_err_out; }
+  void require_eof ();
+
+ private:
+  void require (enum token_id tok_id);
+  void error_at (int, const char *, ...) ATTRIBUTE_PRINTF_3;
+
+ private:
+  lexer m_lexer;
+  char **m_err_out;
+};
+
+} // anonymous namespace for parsing implementation
+
+/* Parser implementation.  */
+
+/* lexer's ctor.  */
+
+lexer::lexer ()
+: m_buffer (), m_next_char_idx (0), m_num_next_tokens (0)
+{
+}
+
+/* lexer's dtor.  */
+
+lexer::~lexer ()
+{
+  while (m_num_next_tokens > 0)
+    consume ();
+}
+
+/* Peek the next token.  */
+
+const token *
+lexer::peek ()
+{
+  if (m_num_next_tokens == 0)
+    {
+      lex_token (&m_next_tokens[0]);
+      m_num_next_tokens++;
+    }
+  return &m_next_tokens[0];
+}
+
+/* Consume the next token.  */
+
+void
+lexer::consume ()
+{
+  if (m_num_next_tokens == 0)
+    peek ();
+
+  gcc_assert (m_num_next_tokens > 0);
+  gcc_assert (m_num_next_tokens <= MAX_TOKENS);
+
+  if (0)
+    {
+      fprintf (stderr, "consuming token: ");
+      dump_token (stderr, &m_next_tokens[0]);
+      fprintf (stderr, "\n");
+    }
+
+  if (m_next_tokens[0].id == TOK_ERROR
+      || m_next_tokens[0].id == TOK_STRING)
+    free (m_next_tokens[0].u.string);
+
+  m_num_next_tokens--;
+  memmove (&m_next_tokens[0], &m_next_tokens[1],
+	   sizeof (token) * m_num_next_tokens);
+}
+
+/* Add LENGTH bytes of UTF-8 encoded text from UTF8_BUF to this lexer's
+   buffer.  */
+
+bool
+lexer::add_utf8 (size_t length, const char *utf8_buf, char **err_out)
+{
+  /* FIXME: adapted from charset.c:one_utf8_to_cppchar.  */
+  static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x03, 0x01 };
+  static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+
+  const uchar *inbuf = (const unsigned char *) (utf8_buf);
+  const uchar **inbufp = &inbuf;
+  size_t *inbytesleftp = &length;
+
+  while (length > 0)
+    {
+      unichar c;
+      const uchar *inbuf = *inbufp;
+      size_t nbytes, i;
+
+      c = *inbuf;
+      if (c < 0x80)
+	{
+	  m_buffer.safe_push (c);
+	  *inbytesleftp -= 1;
+	  *inbufp += 1;
+	  continue;
+	}
+
+      /* The number of leading 1-bits in the first byte indicates how many
+	 bytes follow.  */
+      for (nbytes = 2; nbytes < 7; nbytes++)
+	if ((c & ~masks[nbytes-1]) == patns[nbytes-1])
+	  goto found;
+      *err_out = xstrdup ("ill-formed UTF-8 sequence");
+      return false;
+    found:
+
+      if (*inbytesleftp < nbytes)
+	{
+	  *err_out = xstrdup ("ill-formed UTF-8 sequence");
+	  return false;
+	}
+
+      c = (c & masks[nbytes-1]);
+      inbuf++;
+      for (i = 1; i < nbytes; i++)
+	{
+	  unichar n = *inbuf++;
+	  if ((n & 0xC0) != 0x80)
+	    {
+	      *err_out = xstrdup ("ill-formed UTF-8 sequence");
+	      return false;
+	    }
+	  c = ((c << 6) + (n & 0x3F));
+	}
+
+      /* Make sure the shortest possible encoding was used.  */
+      if ((   c <=      0x7F && nbytes > 1)
+	  || (c <=     0x7FF && nbytes > 2)
+	  || (c <=    0xFFFF && nbytes > 3)
+	  || (c <=  0x1FFFFF && nbytes > 4)
+	  || (c <= 0x3FFFFFF && nbytes > 5))
+	{
+	  *err_out = xstrdup ("ill-formed UTF-8:"
+			      " shortest possible encoding not used");
+	  return false;
+	}
+
+      /* Make sure the character is valid.  */
+      if (c > 0x7FFFFFFF || (c >= 0xD800 && c <= 0xDFFF))
+	{
+	  *err_out = xstrdup ("ill-formed UTF-8: invalid character");
+	  return false;
+	}
+
+      m_buffer.safe_push (c);
+      *inbufp = inbuf;
+      *inbytesleftp -= nbytes;
+    }
+  return true;
+}
+
+/* Attempt to get the next unicode character from this lexer's buffer.
+   If successful, write it to OUT and return true.
+   Otherwise, return false.  */
+
+bool
+lexer::get_char (unichar &out)
+{
+  if (m_next_char_idx >= (int)m_buffer.length ())
+    return false;
+
+  out = m_buffer[m_next_char_idx++];
+  return true;
+}
+
+/* FIXME.  */
+
+void
+lexer::unget_char ()
+{
+  --m_next_char_idx;
+}
+
+/* Print a textual representation of TOK to OUTF.
+   This is intended for debugging the lexer and parser,
+   rather than for user-facing output.  */
+
+void
+lexer::dump_token (FILE *outf, const token *tok)
+{
+  switch (tok->id)
+    {
+    case TOK_ERROR:
+      fprintf (outf, "TOK_ERROR (\"%s\")", tok->u.string);
+      break;
+
+    case TOK_EOF:
+      fprintf (outf, "TOK_EOF");
+      break;
+
+    case TOK_OPEN_SQUARE:
+      fprintf (outf, "TOK_OPEN_SQUARE");
+      break;
+
+    case TOK_OPEN_CURLY:
+      fprintf (outf, "TOK_OPEN_CURLY");
+      break;
+
+    case TOK_CLOSE_SQUARE:
+      fprintf (outf, "TOK_CLOSE_SQUARE");
+      break;
+
+    case TOK_CLOSE_CURLY:
+      fprintf (outf, "TOK_CLOSE_CURLY");
+      break;
+
+    case TOK_COLON:
+      fprintf (outf, "TOK_COLON");
+      break;
+
+    case TOK_COMMA:
+      fprintf (outf, "TOK_COMMA");
+      break;
+
+    case TOK_TRUE:
+      fprintf (outf, "TOK_TRUE");
+      break;
+
+    case TOK_FALSE:
+      fprintf (outf, "TOK_FALSE");
+      break;
+
+    case TOK_NULL:
+      fprintf (outf, "TOK_NULL");
+      break;
+
+    case TOK_STRING:
+      fprintf (outf, "TOK_STRING (\"%s\")", tok->u.string);
+      break;
+
+    case TOK_NUMBER:
+      fprintf (outf, "TOK_NUMBER (%f)", tok->u.number);
+      break;
+
+    default:
+      gcc_unreachable ();
+      break;
+    }
+}
+
+/* Attempt to lex the input buffer, writing the next token to OUT.
+   On errors, TOK_ERROR (or TOK_EOF) is written to OUT.  */
+
+void
+lexer::lex_token (token *out)
+{
+  /* Skip to next non-whitespace char.  */
+  unichar next_char;
+  while (1)
+    {
+      out->index = m_next_char_idx;
+      if (!get_char (next_char))
+	{
+	  out->id = TOK_EOF;
+	  return;
+	}
+      if (next_char != ' '
+	  && next_char != '\t'
+	  && next_char != '\n'
+	  && next_char != '\r')
+	break;
+    }
+
+  switch (next_char)
+    {
+    case '[':
+      out->id = TOK_OPEN_SQUARE;
+      break;
+
+    case '{':
+      out->id = TOK_OPEN_CURLY;
+      break;
+
+    case ']':
+      out->id = TOK_CLOSE_SQUARE;
+      break;
+
+    case '}':
+      out->id = TOK_CLOSE_CURLY;
+      break;
+
+    case ':':
+      out->id = TOK_COLON;
+      break;
+
+    case ',':
+      out->id = TOK_COMMA;
+      break;
+
+    case '"':
+      lex_string (out);
+      break;
+
+    case '-':
+    case '0':
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+    case '8':
+    case '9':
+      lex_number (out, next_char);
+      break;
+
+    case 't':
+      /* Handle literal "true".  */
+      if (rest_of_literal ("rue"))
+	{
+	  out->id = TOK_TRUE;
+	  break;
+	}
+      else
+	goto err;
+
+    case 'f':
+      /* Handle literal "false".  */
+      if (rest_of_literal ("alse"))
+	{
+	  out->id = TOK_FALSE;
+	  break;
+	}
+      else
+	goto err;
+
+    case 'n':
+      /* Handle literal "null".  */
+      if (rest_of_literal ("ull"))
+	{
+	  out->id = TOK_NULL;
+	  break;
+	}
+      else
+	goto err;
+
+    err:
+    default:
+      out->id = TOK_ERROR;
+      out->u.string = xasprintf ("unexpected character: %c", next_char);
+      break;
+    }
+}
+
+/* Having consumed an open-quote character from the lexer's buffer, attempt
+   to lex the rest of a JSON string, writing the result to OUT (or TOK_ERROR)
+   if an error occurred.
+   (ECMA-404 section 9; RFC 7159 section 7).  */
+
+void
+lexer::lex_string (token *out)
+{
+  auto_vec<unichar> content;
+  bool still_going = true;
+  while (still_going)
+    {
+      unichar uc;
+      if (!get_char (uc))
+	{
+	  out->id = TOK_ERROR;
+	  out->u.string = xstrdup ("EOF within string");
+	  return;
+	}
+      switch (uc)
+	{
+	case '"':
+	  still_going = false;
+	  break;
+	case '\\':
+	  {
+	    unichar next_char;
+	    if (!get_char (next_char))
+	      {
+		out->id = TOK_ERROR;
+		out->u.string = xstrdup ("EOF within string");;
+		return;
+	      }
+	    switch (next_char)
+	      {
+	      case '"':
+	      case '\\':
+	      case '/':
+		content.safe_push (next_char);
+		break;
+
+	      case 'b':
+		content.safe_push ('\b');
+		break;
+
+	      case 'f':
+		content.safe_push ('\f');
+		break;
+
+	      case 'n':
+		content.safe_push ('\n');
+		break;
+
+	      case 'r':
+		content.safe_push ('\r');
+		break;
+
+	      case 't':
+		content.safe_push ('\t');
+		break;
+
+	      case 'u':
+		{
+		  unichar result = 0;
+		  for (int i = 0; i < 4; i++)
+		    {
+		      unichar hexdigit;
+		      if (!get_char (hexdigit))
+			{
+			  out->id = TOK_ERROR;
+			  out->u.string = xstrdup ("EOF within string");
+			  return;
+			}
+		      result <<= 4;
+		      if (hexdigit >= '0' && hexdigit <= '9')
+			result += hexdigit - '0';
+		      else if (hexdigit >= 'a' && hexdigit <= 'f')
+			result += (hexdigit - 'a') + 10;
+		      else if (hexdigit >= 'A' && hexdigit <= 'F')
+			result += (hexdigit - 'A') + 10;
+		      else
+			{
+			  out->id = TOK_ERROR;
+			  out->u.string = xstrdup ("bogus hex char");
+			  return;
+			}
+		    }
+		  content.safe_push (result);
+		}
+		break;
+
+	      default:
+		out->id = TOK_ERROR;
+		out->u.string = xstrdup ("unrecognized escape char");
+		return;
+	      }
+	  }
+	  break;
+
+	default:
+	  /* Reject unescaped control characters U+0000 through U+001F
+	     (ECMA-404 section 9 para 1; RFC 7159 section 7 para 1).  */
+	  if (uc <= 0x1f)
+	    {
+		out->id = TOK_ERROR;
+		out->u.string = xstrdup ("unescaped control char");
+		return;
+	    }
+
+	  /* Otherwise, add regular unicode code point.  */
+	  content.safe_push (uc);
+	  break;
+	}
+    }
+
+  out->id = TOK_STRING;
+
+  auto_vec<char> utf8_buf;
+  // FIXME: adapted from libcpp/charset.c:one_cppchar_to_utf8
+  for (unsigned i = 0; i < content.length (); i++)
+    {
+      static const uchar masks[6] =  { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+      static const uchar limits[6] = { 0x80, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE };
+      size_t nbytes;
+      uchar buf[6], *p = &buf[6];
+      unichar c = content[i];
+
+      nbytes = 1;
+      if (c < 0x80)
+	*--p = c;
+      else
+	{
+	  do
+	    {
+	      *--p = ((c & 0x3F) | 0x80);
+	      c >>= 6;
+	      nbytes++;
+	    }
+	  while (c >= 0x3F || (c & limits[nbytes-1]));
+	  *--p = (c | masks[nbytes-1]);
+	}
+
+      while (p < &buf[6])
+	utf8_buf.safe_push (*p++);
+    }
+
+  out->u.string = XNEWVEC (char, utf8_buf.length () + 1);
+  for (unsigned i = 0; i < utf8_buf.length (); i++)
+    out->u.string[i] = utf8_buf[i];
+  out->u.string[utf8_buf.length ()] = '\0';
+
+  // FIXME: leaks?  have a json_context do the allocation
+}
+
+/* Having consumed FIRST_CHAR, an initial digit or '-' character from
+   the lexer's buffer attempt to lex the rest of a JSON number, writing
+   the result to OUT (or TOK_ERROR) if an error occurred.
+   (ECMA-404 section 8; RFC 7159 section 6).  */
+
+void
+lexer::lex_number (token *out, unichar first_char)
+{
+  bool negate = false;
+  double value = 0.0;
+  if (first_char == '-')
+    {
+      negate = true;
+      if (!get_char (first_char))
+	{
+	  out->id = TOK_ERROR;
+	  out->u.string = xstrdup ("expected digit");
+	  return;
+	}
+    }
+
+  if (first_char == '0')
+    value = 0.0;
+  else if (!ISDIGIT (first_char))
+    {
+      out->id = TOK_ERROR;
+      out->u.string = xstrdup ("expected digit");
+      return;
+    }
+  else
+    {
+      /* Got a nonzero digit; expect zero or more digits.  */
+      value = first_char - '0';
+      while (1)
+	{
+	  unichar uc;
+	  if (!get_char (uc))
+	    break;
+	  if (ISDIGIT (uc))
+	    {
+	      value *= 10;
+	      value += uc -'0';
+	      continue;
+	    }
+	  else
+	    {
+	      unget_char ();
+	      break;
+	    }
+	}
+    }
+
+  /* Optional '.', followed by one or more decimals.  */
+  unichar next_char;
+  if (get_char (next_char))
+    {
+      if (next_char == '.')
+	{
+	  /* Parse decimal digits.  */
+	  bool had_digit = false;
+	  // FIXME: does this lose too much precision?
+	  double digit_factor = 0.1;
+	  while (get_char (next_char))
+	    {
+	      if (!ISDIGIT (next_char))
+		{
+		  unget_char ();
+		  break;
+		}
+	      value += (next_char - '0') * digit_factor;
+	      digit_factor *= 0.1;
+	      had_digit = true;
+	    }
+	  if (!had_digit)
+	    {
+	      out->id = TOK_ERROR;
+	      out->u.string = xstrdup ("expected digit");
+	      return;
+	    }
+	}
+      else
+	unget_char ();
+    }
+
+  /* Parse 'e' and 'E'.  */
+  unichar exponent_char;
+  if (get_char (exponent_char))
+    {
+      if (exponent_char == 'e' || exponent_char == 'E')
+	{
+	  /* Optional +/-.  */
+	  unichar sign_char;
+	  int exponent = 0;
+	  bool negate_exponent = false;
+	  bool had_exponent_digit = false;
+	  if (!get_char (sign_char))
+	    {
+	      out->id = TOK_ERROR;
+	      out->u.string = xstrdup ("EOF within exponent");
+	      return;
+	    }
+	  if (sign_char == '-')
+	    negate_exponent = true;
+	  else if (sign_char == '+')
+	    ;
+	  else if (ISDIGIT (sign_char))
+	    {
+	      exponent = sign_char - '0';
+	      had_exponent_digit = true;
+	    }
+	  else
+	    {
+	      out->id = TOK_ERROR;
+	      out->u.string
+		= xstrdup ("expected '-','+' or digit within exponent");
+	      return;
+	    }
+
+	  /* One or more digits (we might have seen the digit above,
+	     though).  */
+	  while (1)
+	    {
+	      unichar uc;
+	      if (!get_char (uc))
+		break;
+	      if (ISDIGIT (uc))
+		{
+		  exponent *= 10;
+		  exponent += uc -'0';
+		  had_exponent_digit = true;
+		  continue;
+		}
+	      else
+		{
+		  unget_char ();
+		  break;
+		}
+	    }
+	  if (!had_exponent_digit)
+	    {
+	      out->id = TOK_ERROR;
+	      out->u.string = xstrdup ("expected digit within exponent");
+	      return;
+	    }
+	  if (negate_exponent)
+	    exponent = -exponent;
+	  /* FIXME: better way to do this?  */
+	  value = value * pow (10, exponent);
+	}
+      else
+	unget_char ();
+    }
+
+  if (negate)
+    value = -value;
+
+  out->id = TOK_NUMBER;
+  out->u.number = value;
+}
+
+/* Determine if the next characters to be lexed match SUFFIX.
+   SUFFIX must be pure ASCII.
+   If so, consume the characters and return true.
+   Otherwise, return false.  */
+
+bool
+lexer::rest_of_literal (const char *suffix)
+{
+  int suffix_idx = 0;
+  int buf_idx = m_next_char_idx;
+  while (1)
+    {
+      if (suffix[suffix_idx] == '\0')
+	{
+	  m_next_char_idx += suffix_idx;
+	  return true;
+	}
+      if (buf_idx >= (int)m_buffer.length ())
+	return false;
+      /* This assumes that suffix is ASCII.  */
+      if (m_buffer[buf_idx] != (unichar)suffix[suffix_idx])
+	return false;
+      buf_idx++;
+      suffix_idx++;
+    }
+}
+
+/* parser's ctor.  */
+
+parser::parser (char **err_out)
+: m_lexer (), m_err_out (err_out)
+{
+  gcc_assert (err_out);
+  gcc_assert (*err_out == NULL);
+  *err_out = NULL;
+}
+
+/* Add LENGTH bytes of UTF-8 encoded text from UTF8_BUF to this parser's
+   lexer's buffer.  */
+
+bool
+parser::add_utf8 (size_t length, const char *utf8_buf, char **err_out)
+{
+  return m_lexer.add_utf8 (length, utf8_buf, err_out);
+}
+
+/* Parse a JSON value (object, array, number, string, or literal).
+   (ECMA-404 section 5; RFC 7159 section 3).  */
+
+value *
+parser::parse_value (int depth)
+{
+  const token *tok = m_lexer.peek ();
+
+  /* Avoid stack overflow with deeply-nested inputs; RFC 7159 section 9
+     states: "An implementation may set limits on the maximum depth
+     of nesting.".
+
+     Ideally we'd avoid this limit (e.g. by rewriting parse_value,
+     parse_object, and parse_array into a single function with a vec of
+     state).  */
+  const int MAX_DEPTH = 100;
+  if (depth >= MAX_DEPTH)
+    {
+      error_at (tok->index, "maximum nesting depth exceeded: %i", MAX_DEPTH);
+      return NULL;
+    }
+
+  switch (tok->id)
+    {
+    case TOK_OPEN_CURLY:
+      return parse_object (depth);
+
+    case TOK_STRING:
+      {
+	string *result = new string (tok->u.string);
+	m_lexer.consume ();
+	return result;
+      }
+
+    case TOK_OPEN_SQUARE:
+      return parse_array (depth);
+
+    case TOK_NUMBER:
+      {
+	number *result = new number (tok->u.number);
+	m_lexer.consume ();
+	return result;
+      }
+
+    case TOK_TRUE:
+      {
+	literal *result = new literal (JSON_TRUE);
+	m_lexer.consume ();
+	return result;
+      }
+
+    case TOK_FALSE:
+      {
+	literal *result = new literal (JSON_FALSE);
+	m_lexer.consume ();
+	return result;
+      }
+
+    case TOK_NULL:
+      {
+	literal *result = new literal (JSON_NULL);
+	m_lexer.consume ();
+	return result;
+      }
+
+    default:
+      error_at (tok->index, "unexpected token: %s", token_id_name[tok->id]);
+      return NULL;
+    }
+}
+
+/* Parse a JSON object.
+   (ECMA-404 section 6; RFC 7159 section 4).  */
+
+object *
+parser::parse_object (int depth)
+{
+  require (TOK_OPEN_CURLY);
+
+  object *result = new object ();
+
+  const token *tok = m_lexer.peek ();
+  if (tok->id == TOK_CLOSE_CURLY)
+    {
+      require (TOK_CLOSE_CURLY);
+      return result;
+    }
+  if (tok->id != TOK_STRING)
+    {
+      error_at (tok->index, "expected string for object key");
+      return result;
+    }
+  while (!seen_error_p ())
+    {
+      tok = m_lexer.peek ();
+      if (tok->id != TOK_STRING)
+	{
+	  error_at (tok->index, "expected string for object key");
+	  return result;
+	}
+      char *key = xstrdup (tok->u.string);
+      m_lexer.consume ();
+
+      require (TOK_COLON);
+
+      value *v = parse_value (depth + 1);
+      if (!v)
+	{
+	  free (key);
+	  return result;
+	}
+      /* We don't enforce uniqueness for keys.  */
+      result->set (key, v);
+      free (key);
+
+      tok = m_lexer.peek ();
+      if (tok->id == TOK_COMMA)
+	{
+	  m_lexer.consume ();
+	  continue;
+	}
+      else
+	{
+	  require (TOK_CLOSE_CURLY);
+	  break;
+	}
+    }
+  return result;
+}
+
+/* Parse a JSON array.
+   (ECMA-404 section 7; RFC 7159 section 5).  */
+
+array *
+parser::parse_array (int depth)
+{
+  require (TOK_OPEN_SQUARE);
+
+  array *result = new array ();
+
+  const token *tok = m_lexer.peek ();
+  if (tok->id == TOK_CLOSE_SQUARE)
+    {
+      m_lexer.consume ();
+      return result;
+    }
+
+  while (!seen_error_p ())
+    {
+      value *v = parse_value (depth + 1);
+      if (!v)
+	return result;
+
+      result->append (v);
+
+      tok = m_lexer.peek ();
+      if (tok->id == TOK_COMMA)
+	{
+	  m_lexer.consume ();
+	  continue;
+	}
+      else
+	{
+	  require (TOK_CLOSE_SQUARE);
+	  break;
+	}
+    }
+
+  return result;
+}
+
+/* Require an EOF, or fail if there is surplus input.  */
+
+void
+parser::require_eof ()
+{
+  require (TOK_EOF);
+}
+
+/* Consume the next token, issuing an error if it is not of kind TOK_ID.  */
+
+void
+parser::require (enum token_id tok_id)
+{
+  const token *tok = m_lexer.peek ();
+  if (tok->id != tok_id)
+    {
+      if (tok->id == TOK_ERROR)
+	error_at (tok->index, "expected %s; got bad token: %s",
+		  token_id_name[tok_id], tok->u.string);
+      else
+	error_at (tok->index, "expected %s; got %s", token_id_name[tok_id],
+		  token_id_name[tok->id]);
+    }
+  m_lexer.consume ();
+}
+
+/* Issue a parsing error.  If this is the first error that has occurred on
+   the parser, store it within the parser's m_err_out (the buffer will
+   eventually need to be free by the caller of the parser).
+   Otherwise the error is discarded.
+
+   TODO: maybe provide a callback so that client code can print all errors?  */
+
+void
+parser::error_at (int index, const char *fmt, ...)
+{
+  va_list ap;
+  va_start (ap, fmt);
+  char *formatted = xvasprintf (fmt, ap);
+  va_end (ap);
+
+  char *msg_with_index = xasprintf ("error at index %i: %s",
+				    index, formatted);
+  free (formatted);
+
+  if (0)
+    fprintf (stderr, "%s\n", msg_with_index);
+  if (*m_err_out == NULL)
+    *m_err_out = msg_with_index;
+  else
+    free (msg_with_index);
+}
+
+/* Attempt to parse the UTF-8 encoded buffer at UTF8_BUF
+   of the given LENGTH.
+   If successful, return a non-NULL json::value *.
+   if there was a problem, return NULL and write an error
+   message to err_out, which must be freed by the caller.  */
+
+value *
+json::parse_utf8_string (size_t length, const char *utf8_buf,
+			 char **err_out)
+{
+  gcc_assert (err_out);
+  gcc_assert (*err_out == NULL);
+
+  parser p (err_out);
+  if (!p.add_utf8 (length, utf8_buf, err_out))
+    return NULL;
+  value *result = p.parse_value (0);
+  if (!p.seen_error_p ())
+    p.require_eof ();
+  if (p.seen_error_p ())
+    {
+      gcc_assert (*err_out);
+      delete result;
+      return NULL;
+    }
+  return result;
+}
+
+/* Attempt to parse the nil-terminated UTF-8 encoded buffer at
+   UTF8_BUF.
+   If successful, return a non-NULL json::value *.
+   if there was a problem, return NULL and write an error
+   message to err_out, which must be freed by the caller.  */
+
+value *
+json::parse_utf8_string (const char *utf8, char **err_out)
+{
+  return parse_utf8_string (strlen (utf8), utf8, err_out);
+}
+
+\f
+#if CHECKING_P
+
+namespace selftest {
+
+/* Selftests.  */
+
+/* Verify that JV->to_str () equals EXPECTED_JSON.  */
+
+static void
+assert_to_str_eq (const char *expected_json, json::value *jv)
+{
+  char *json = jv->to_str ();
+  ASSERT_STREQ (expected_json, json);
+  free (json);
+}
+
+/* FIXME.  */
+
+static void
+test_parse_string ()
+{
+  char *err = NULL;
+  json::value *jv = parse_utf8_string ("\"foo\"", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_STRING, jv->get_kind ());
+  ASSERT_STREQ ("foo", ((json::string *)jv)->get_string ());
+  assert_to_str_eq ("\"foo\"", jv);
+
+  json::value *clone = jv->clone ();
+  ASSERT_EQ (JSON_STRING, clone->get_kind ());
+  ASSERT_STREQ ("foo", ((json::string *)clone)->get_string ());
+  assert_to_str_eq ("\"foo\"", clone);
+  delete clone;
+  delete jv;
+
+  const char *contains_quotes = "\"before \\\"quoted\\\" after\"";
+  jv = parse_utf8_string (contains_quotes, &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_STRING, jv->get_kind ());
+  ASSERT_STREQ ("before \"quoted\" after", ((json::string *)jv)->get_string ());
+  assert_to_str_eq (contains_quotes, jv);
+  delete jv;
+
+  /* Test of non-ASCII input.  This string is the Japanese word "mojibake",
+     written as C octal-escaped UTF-8.  */
+  const char *mojibake = (/* Opening quote.  */
+			  "\""
+			  /* U+6587 CJK UNIFIED IDEOGRAPH-6587
+			     UTF-8: 0xE6 0x96 0x87
+			     C octal escaped UTF-8: \346\226\207.  */
+			  "\346\226\207"
+			  /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57
+			     UTF-8: 0xE5 0xAD 0x97
+			     C octal escaped UTF-8: \345\255\227.  */
+			  "\345\255\227"
+			 /* U+5316 CJK UNIFIED IDEOGRAPH-5316
+			      UTF-8: 0xE5 0x8C 0x96
+			      C octal escaped UTF-8: \345\214\226.  */
+			  "\345\214\226"
+			 /* U+3051 HIRAGANA LETTER KE
+			      UTF-8: 0xE3 0x81 0x91
+			      C octal escaped UTF-8: \343\201\221.  */
+			  "\343\201\221"
+			  /* Closing quote.  */
+			  "\"");
+  jv = parse_utf8_string (mojibake, &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_STRING, jv->get_kind ());
+  /* Result of get_string should be UTF-8 encoded, without quotes.  */
+  ASSERT_STREQ ("\346\226\207" "\345\255\227" "\345\214\226" "\343\201\221",
+		((json::string *)jv)->get_string ());
+  /* Result of dump should be UTF-8 encoded, with quotes.  */
+  assert_to_str_eq (mojibake, jv);
+  delete jv;
+
+  /* Test of \u-escaped unicode.  This is "mojibake" again, as above.  */
+  const char *escaped_unicode = "\"\\u6587\\u5b57\\u5316\\u3051\"";
+  jv = parse_utf8_string (escaped_unicode, &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_STRING, jv->get_kind ());
+  /* Result of get_string should be UTF-8 encoded, without quotes.  */
+  ASSERT_STREQ ("\346\226\207" "\345\255\227" "\345\214\226" "\343\201\221",
+		((json::string *)jv)->get_string ());
+  /* Result of dump should be UTF-8 encoded, with quotes.  */
+  assert_to_str_eq (mojibake, jv);
+  delete jv;
+}
+
+/* FIXME.  */
+
+static void
+test_parse_number ()
+{
+  json::value *jv, *clone;
+
+  char *err = NULL;
+  jv = parse_utf8_string ("42", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_NUMBER, jv->get_kind ());
+  ASSERT_EQ (42.0, ((json::number *)jv)->get ());
+  assert_to_str_eq ("42", jv);
+  clone = jv->clone ();
+  ASSERT_EQ (JSON_NUMBER, clone->get_kind ());
+  delete clone;
+  delete jv;
+
+  /* Negative number.  */
+  jv = parse_utf8_string ("-17", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_NUMBER, jv->get_kind ());
+  ASSERT_EQ (-17.0, ((json::number *)jv)->get ());
+  assert_to_str_eq ("-17", jv);
+  delete jv;
+
+  /* Decimal.  */
+  jv = parse_utf8_string ("3.141", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_NUMBER, jv->get_kind ());
+  ASSERT_EQ (3.141, ((json::number *)jv)->get ());
+  assert_to_str_eq ("3.141", jv);
+  delete jv;
+
+  /* Exponents.  */
+  jv = parse_utf8_string ("3.141e+0", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_NUMBER, jv->get_kind ());
+  ASSERT_EQ (3.141, ((json::number *)jv)->get ());
+  assert_to_str_eq ("3.141", jv);
+  delete jv;
+
+  jv = parse_utf8_string ("42e2", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_NUMBER, jv->get_kind ());
+  ASSERT_EQ (4200, ((json::number *)jv)->get ());
+  assert_to_str_eq ("4200", jv);
+  delete jv;
+
+  jv = parse_utf8_string ("42e-1", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_NUMBER, jv->get_kind ());
+  ASSERT_EQ (4.2, ((json::number *)jv)->get ());
+  assert_to_str_eq ("4.2", jv);
+  delete jv;
+
+}
+
+/* FIXME.  */
+
+static void
+test_parse_array ()
+{
+  json::value *jv, *clone;
+
+  char *err = NULL;
+  jv = parse_utf8_string ("[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_EQ (JSON_ARRAY, jv->get_kind ());
+  json::array *arr = static_cast <json::array *> (jv);
+  ASSERT_EQ (10, arr->get_length ());
+  for (int i = 0; i < 10; i++)
+    {
+      json::value *element = arr->get (i);
+      ASSERT_EQ (JSON_NUMBER, element->get_kind ());
+      ASSERT_EQ (i, ((json::number *)element)->get ());
+    }
+  assert_to_str_eq ("[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", jv);
+
+  clone = jv->clone ();
+  ASSERT_EQ (JSON_ARRAY, clone->get_kind ());
+  arr = static_cast <json::array *> (clone);
+  ASSERT_EQ (10, arr->get_length ());
+  for (int i = 0; i < 10; i++)
+    {
+      json::value *element = arr->get (i);
+      ASSERT_EQ (JSON_NUMBER, element->get_kind ());
+      ASSERT_EQ (i, ((json::number *)element)->get ());
+    }
+  assert_to_str_eq ("[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", clone);
+  delete clone;
+
+  delete jv;
+}
+
+/* FIXME.  */
+
+static void
+test_parse_object ()
+{
+  char *err = NULL;
+  json::value *jv
+    = parse_utf8_string ("{\"foo\": \"bar\", \"baz\": [42, null]}", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_TRUE (jv != NULL);
+  ASSERT_EQ (JSON_OBJECT, jv->get_kind ());
+  json::object *jo = static_cast <json::object *> (jv);
+
+  json::value *foo_value = jo->get ("foo");
+  ASSERT_TRUE (foo_value != NULL);
+  ASSERT_EQ (JSON_STRING, foo_value->get_kind ());
+  ASSERT_STREQ ("bar", ((json::string *)foo_value)->get_string ());
+
+  json::value *baz_value = jo->get ("baz");
+  ASSERT_TRUE (baz_value != NULL);
+  ASSERT_EQ (JSON_ARRAY, baz_value->get_kind ());
+
+  json::array *baz_array = (json::array *)baz_value;
+  ASSERT_EQ (2, baz_array->get_length ());
+  ASSERT_EQ (42, baz_array->get (0)->as_number ()->get ());
+  ASSERT_EQ (JSON_NULL, baz_array->get (1)->get_kind ());
+
+  // TODO: error-handling
+  // TODO: partial document
+
+  /* We can't use assert_to_str_eq since ordering is not guaranteed.  */
+
+  json::value *clone = jv->clone ();
+  ASSERT_EQ (JSON_OBJECT, clone->get_kind ());
+  ASSERT_EQ (JSON_STRING, clone->as_object ()->get ("foo")->get_kind ());
+  delete clone;
+
+  delete jv;
+}
+
+/* Verify that the literals "true", "false" and "null" are parsed,
+   dumped, and are clonable.  */
+
+static void
+test_parse_literals ()
+{
+  json::value *jv, *clone;
+  char *err = NULL;
+  jv = parse_utf8_string ("true", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_TRUE (jv != NULL);
+  ASSERT_EQ (JSON_TRUE, jv->get_kind ());
+  assert_to_str_eq ("true", jv);
+  clone = jv->clone ();
+  ASSERT_EQ (JSON_TRUE, clone->get_kind ());
+  delete clone;
+  delete jv;
+
+  jv = parse_utf8_string ("false", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_TRUE (jv != NULL);
+  ASSERT_EQ (JSON_FALSE, jv->get_kind ());
+  assert_to_str_eq ("false", jv);
+  clone = jv->clone ();
+  ASSERT_EQ (JSON_FALSE, clone->get_kind ());
+  delete clone;
+  delete jv;
+
+  jv = parse_utf8_string ("null", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_TRUE (jv != NULL);
+  ASSERT_EQ (JSON_NULL, jv->get_kind ());
+  assert_to_str_eq ("null", jv);
+  clone = jv->clone ();
+  ASSERT_EQ (JSON_NULL, clone->get_kind ());
+  delete clone;
+  delete jv;
+}
+
+/* FIXME.  */
+
+static void
+test_parse_jsonrpc ()
+{
+  char *err = NULL;
+  const char *request
+    = ("{\"jsonrpc\": \"2.0\", \"method\": \"subtract\","
+       " \"params\": [42, 23], \"id\": 1}");
+  json::value *jv = parse_utf8_string (request, &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_TRUE (jv != NULL);
+  delete jv;
+}
+
+/* FIXME.  */
+
+static void
+test_parse_empty_object ()
+{
+  char *err = NULL;
+  json::value *jv = parse_utf8_string ("{}", &err);
+  ASSERT_EQ (NULL, err);
+  ASSERT_TRUE (jv != NULL);
+  ASSERT_EQ (JSON_OBJECT, jv->get_kind ());
+  assert_to_str_eq ("{}", jv);
+  delete jv;
+}
+
+/* FIXME.  */
+
+static void
+test_error_empty_string ()
+{
+  char *err = NULL;
+  json::value *jv = parse_utf8_string ("", &err);
+  ASSERT_STREQ ("error at index 0: unexpected token: EOF", err);
+  ASSERT_TRUE (jv == NULL);
+  free (err);
+}
+
+/* FIXME.  */
+
+static void
+test_error_missing_comma ()
+{
+  char *err = NULL;
+  /*                  01234567.  */
+  const char *json = "[0, 1 2]";
+  json::value *jv = parse_utf8_string (json, &err);
+  ASSERT_STREQ ("error at index 6: expected ']'; got number",
+		err);
+  // FIXME: unittest the lexer?
+  ASSERT_TRUE (jv == NULL);
+  free (err);
+}
+
+/* Run all of the selftests within this file.  */
+
+void
+json_cc_tests ()
+{
+  test_parse_string ();
+  test_parse_number ();
+  test_parse_array ();
+  test_parse_object ();
+  test_parse_literals ();
+  test_parse_jsonrpc ();
+  test_parse_empty_object ();
+  test_error_empty_string ();
+  test_error_missing_comma ();
+
+  /* FIXME: tests for roundtripping (noting that we don't preserve
+     object key ordering).  */
+
+  /* FIXME: cloning.  */
+}
+
+} // namespace selftest
+
+#endif /* #if CHECKING_P */
diff --git a/gcc/json.h b/gcc/json.h
new file mode 100644
index 0000000..aedf84a
--- /dev/null
+++ b/gcc/json.h
@@ -0,0 +1,214 @@
+/* JSON parsing
+   Copyright (C) 2017 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_JSON_H
+#define GCC_JSON_H
+
+/* Implementation of JSON, a lightweight data-interchange format.
+
+   See http://www.json.org/
+   and http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf
+   and https://tools.ietf.org/html/rfc7159
+
+   Supports parsing text into a DOM-like tree of json::value *, dumping
+   json::value * to text.  */
+
+namespace json
+{
+
+/* Forward decls of json::value and its subclasses (using indentation
+   to denote inheritance.  */
+
+class value;
+  class object;
+  class array;
+  class number;
+  class string;
+  class literal;
+
+/* An enum for discriminating the subclasses of json::value.  */
+
+enum kind
+{
+  /* class json::object.  */
+  JSON_OBJECT,
+
+  /* class json::array.  */
+  JSON_ARRAY,
+
+  /* class json::number.  */
+  JSON_NUMBER,
+
+  /* class json::string.  */
+  JSON_STRING,
+
+  /* class json::literal uses these three values to identify the
+     particular literal.  */
+  JSON_TRUE,
+  JSON_FALSE,
+  JSON_NULL
+};
+
+/* Base class of JSON value.  */
+
+class value
+{
+ public:
+  virtual ~value () {}
+  virtual enum kind get_kind () const = 0;
+  virtual void print (pretty_printer *pp) const = 0;
+
+  /* Create a deep copy of the value, returning a value which must be
+     deleted by the caller.  */
+  virtual value *clone () const = 0;
+
+  char *to_str () const;
+  void dump (FILE *) const;
+
+  /* Methods for dynamically casting a value to one of the subclasses,
+     returning NULL if the value is of the wrong kind.  */
+  const object *as_object () const;
+  const array *as_array () const;
+  const number *as_number () const;
+  const string *as_string () const;
+
+  /* Convenience accessors for attempting to perform key/value lookups
+     on this value as if it were an json::object.
+
+     On success, return true and write the value to OUT_VALUE.
+     On failure, return false and write an error message to OUT_ERR
+     (which must be freed by the caller).  */
+  bool get_value_by_key (const char *name, const value *&out_value,
+			 char *&out_err) const;
+  bool get_int_by_key (const char *name, int &out_value, char *&out_err) const;
+  bool get_string_by_key (const char *name, const char *&out_value,
+			  char *&out_err) const;
+  bool get_array_by_key (const char *name, const array *&out_value,
+			 char *&out_err) const;
+
+  /* As above, but the key is optional.  THIS must still be an object,
+     though.  */
+  bool get_optional_value_by_key (const char *name, const value *&out_value,
+				  char *&out_err) const;
+  bool get_optional_string_by_key (const char *name, const char *&out_value,
+				   char *&out_err) const;
+};
+
+/* Subclass of value for objects: an unordered collection of
+   key/value pairs.  */
+
+class object : public value
+{
+ public:
+  ~object ();
+
+  enum kind get_kind () const FINAL OVERRIDE { return JSON_OBJECT; }
+  void print (pretty_printer *pp) const FINAL OVERRIDE;
+  value *clone () const FINAL OVERRIDE;
+
+  value *get (const char *key) const;
+  value *get_if_nonnull (const char *key) const;
+
+  void set (const char *key, value *v);
+
+ private:
+  typedef hash_map <char *, value *,
+    simple_hashmap_traits<nofree_string_hash, value *> > map_t;
+  map_t m_map;
+};
+
+/* Subclass of value for arrays.  */
+
+class array : public value
+{
+ public:
+  ~array ();
+
+  enum kind get_kind () const FINAL OVERRIDE { return JSON_ARRAY; }
+  void print (pretty_printer *pp) const FINAL OVERRIDE;
+  value *clone () const FINAL OVERRIDE;
+
+  unsigned get_length () const { return m_elements.length (); }
+  value *get (int idx) const { return m_elements[idx]; }
+  void append (value *v) { m_elements.safe_push (v); }
+
+ private:
+  auto_vec<value *> m_elements;
+};
+
+/* Subclass of value for numbers.  */
+
+class number : public value
+{
+ public:
+  number (double value) : m_value (value) {}
+
+  enum kind get_kind () const FINAL OVERRIDE { return JSON_NUMBER; }
+  void print (pretty_printer *pp) const FINAL OVERRIDE;
+  value *clone () const FINAL OVERRIDE;
+
+  double get () const { return m_value; }
+
+ private:
+  double m_value;
+};
+
+/* Subclass of value for strings.  */
+
+class string : public value
+{
+ public:
+  string (const char *utf8) : m_utf8 (xstrdup (utf8)) {}
+  ~string () { free (m_utf8); }
+
+  enum kind get_kind () const FINAL OVERRIDE { return JSON_STRING; }
+  void print (pretty_printer *pp) const FINAL OVERRIDE;
+  value *clone () const FINAL OVERRIDE;
+
+  const char *get_string () const { return m_utf8; }
+
+ private:
+  char *m_utf8;
+};
+
+/* Subclass of value for the three JSON literals "true", "false",
+   and "null".  */
+
+class literal : public value
+{
+ public:
+  literal (enum kind kind) : m_kind (kind) {}
+
+  enum kind get_kind () const FINAL OVERRIDE { return m_kind; }
+  void print (pretty_printer *pp) const FINAL OVERRIDE;
+  value *clone () const FINAL OVERRIDE;
+
+ private:
+  enum kind m_kind;
+};
+
+/* Declarations for parsing JSON to a json::value * tree.  */
+
+extern value *parse_utf8_string (size_t length, const char *utf8_buf,
+				 char **err_out);
+extern value *parse_utf8_string (const char *utf8, char **err_out);
+
+} // namespace json
+
+#endif  /* GCC_JSON_H  */
diff --git a/gcc/selftest-run-tests.c b/gcc/selftest-run-tests.c
index 30e476d..025e574 100644
--- a/gcc/selftest-run-tests.c
+++ b/gcc/selftest-run-tests.c
@@ -66,6 +66,7 @@ selftest::run_tests ()
   sreal_c_tests ();
   fibonacci_heap_c_tests ();
   typed_splay_tree_c_tests ();
+  json_cc_tests ();
 
   /* Mid-level data structures.  */
   input_c_tests ();
diff --git a/gcc/selftest.h b/gcc/selftest.h
index 0572fef..4e8891c 100644
--- a/gcc/selftest.h
+++ b/gcc/selftest.h
@@ -183,6 +183,7 @@ extern void ggc_tests_c_tests ();
 extern void hash_map_tests_c_tests ();
 extern void hash_set_tests_c_tests ();
 extern void input_c_tests ();
+extern void json_cc_tests ();
 extern void pretty_print_c_tests ();
 extern void read_rtl_function_c_tests ();
 extern void rtl_tests_c_tests ();
-- 
1.8.5.3

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH 01/22] Expose assert_loceq outside of input.c; add ASSERT_LOCEQ
  2017-08-04 21:30 [PATCH 00/22] RFC: integrated 3rd-party static analysis support David Malcolm
  2017-08-04 21:30 ` [PATCH 02/22] libcpp: add linemap_position_for_file_line_and_column David Malcolm
@ 2017-08-04 21:30 ` David Malcolm
  2017-09-01 17:49   ` Jeff Law
  2017-08-04 21:30 ` [PATCH 03/22] Add JSON implementation David Malcolm
                   ` (23 subsequent siblings)
  25 siblings, 1 reply; 35+ messages in thread
From: David Malcolm @ 2017-08-04 21:30 UTC (permalink / raw)
  To: gcc-patches; +Cc: David Malcolm

gcc/ChangeLog:
	* input.c: Include "selftest-input.h".
	(selftest::assert_loceq): Remove "static".  Add "report_loc" param
	and update assertions to use it.
	(selftest::test_accessing_ordinary_linemaps): Use ASSERT_LOCEQ
	rather than assert_loceq.
	(selftest::test_builtins): Likewise.
	* selftest-input.h: New file.
---
 gcc/input.c          | 39 +++++++++++++++++++------------------
 gcc/selftest-input.h | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+), 18 deletions(-)
 create mode 100644 gcc/selftest-input.h

diff --git a/gcc/input.c b/gcc/input.c
index 0480eb2..1aad551 100644
--- a/gcc/input.c
+++ b/gcc/input.c
@@ -23,6 +23,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "intl.h"
 #include "diagnostic-core.h"
 #include "selftest.h"
+#include "selftest-input.h"
 #include "cpplib.h"
 
 #ifndef HAVE_ICONV
@@ -1613,21 +1614,23 @@ test_should_have_column_data_p ()
 }
 
 /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
-   on LOC.  */
+   on LOC.  Use REPORT_LOC as the effective location when reporting
+   any issues.  */
 
-static void
-assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum,
+void
+assert_loceq (const location &report_loc,
+	      const char *exp_filename, int exp_linenum, int exp_colnum,
 	      location_t loc)
 {
-  ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
-  ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
+  ASSERT_STREQ_AT (report_loc, exp_filename, LOCATION_FILE (loc));
+  ASSERT_EQ_AT (report_loc, exp_linenum, LOCATION_LINE (loc));
   /* If location_t values are sufficiently high, then column numbers
      will be unavailable and LOCATION_COLUMN (loc) will be 0.
      When close to the threshold, column numbers *may* be present: if
      the final linemap before the threshold contains a line that straddles
      the threshold, locations in that line have column information.  */
   if (should_have_column_data_p (loc))
-    ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
+    ASSERT_EQ_AT (report_loc, exp_colnum, LOCATION_COLUMN (loc));
 }
 
 /* Various selftests involve constructing a line table and one or more
@@ -1761,23 +1764,23 @@ test_accessing_ordinary_linemaps (const line_table_case &case_)
   linemap_add (line_table, LC_LEAVE, false, NULL, 0);
 
   /* Verify that we can recover the location info.  */
-  assert_loceq ("foo.c", 1, 1, loc_a);
-  assert_loceq ("foo.c", 1, 23, loc_b);
-  assert_loceq ("foo.c", 2, 1, loc_c);
-  assert_loceq ("foo.c", 2, 17, loc_d);
-  assert_loceq ("foo.c", 3, 700, loc_e);
-  assert_loceq ("foo.c", 4, 100, loc_back_to_short);
+  ASSERT_LOCEQ ("foo.c", 1, 1, loc_a);
+  ASSERT_LOCEQ ("foo.c", 1, 23, loc_b);
+  ASSERT_LOCEQ ("foo.c", 2, 1, loc_c);
+  ASSERT_LOCEQ ("foo.c", 2, 17, loc_d);
+  ASSERT_LOCEQ ("foo.c", 3, 700, loc_e);
+  ASSERT_LOCEQ ("foo.c", 4, 100, loc_back_to_short);
 
   /* In the very wide line, the initial location should be fully tracked.  */
-  assert_loceq ("foo.c", 5, 2000, loc_start_of_very_long_line);
+  ASSERT_LOCEQ ("foo.c", 5, 2000, loc_start_of_very_long_line);
   /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should
      be disabled.  */
-  assert_loceq ("foo.c", 5, 0, loc_too_wide);
-  assert_loceq ("foo.c", 5, 0, loc_too_wide_2);
+  ASSERT_LOCEQ ("foo.c", 5, 0, loc_too_wide);
+  ASSERT_LOCEQ ("foo.c", 5, 0, loc_too_wide_2);
   /*...and column-tracking should be re-enabled for subsequent lines.  */
-  assert_loceq ("foo.c", 6, 10, loc_sane_again);
+  ASSERT_LOCEQ ("foo.c", 6, 10, loc_sane_again);
 
-  assert_loceq ("bar.c", 1, 150, loc_f);
+  ASSERT_LOCEQ ("bar.c", 1, 150, loc_f);
 
   ASSERT_FALSE (is_location_from_builtin_token (loc_a));
   ASSERT_TRUE (pure_location_p (line_table, loc_a));
@@ -1807,7 +1810,7 @@ test_unknown_location ()
 static void
 test_builtins ()
 {
-  assert_loceq (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
+  ASSERT_LOCEQ (_("<built-in>"), 0, 0, BUILTINS_LOCATION);
   ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
 }
 
diff --git a/gcc/selftest-input.h b/gcc/selftest-input.h
new file mode 100644
index 0000000..d56af36
--- /dev/null
+++ b/gcc/selftest-input.h
@@ -0,0 +1,54 @@
+/* Support for selftests of location handling.
+   Copyright (C) 2016-2017 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_SELFTEST_INPUT_H
+#define GCC_SELFTEST_INPUT_H
+
+/* The selftest code should entirely disappear in a production
+   configuration, hence we guard all of it with #if CHECKING_P.  */
+
+#if CHECKING_P
+
+namespace selftest {
+
+/* input.c.  */
+
+/* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
+   on LOC.  Use REPORT_LOC as the effective location when reporting
+   any issues.  */
+
+extern void assert_loceq (const location &report_loc,
+			  const char *exp_filename, int exp_linenum,
+			  int exp_colnum, location_t loc);
+
+/* Evaluate EXP_FILENAME, EXP_LINENUM, EXP_COLNUM, and LOC.
+   Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN
+   on LOC.  */
+
+#define ASSERT_LOCEQ(EXP_FILENAME, EXP_LINENUM, EXP_COLNUM, LOC)	\
+  SELFTEST_BEGIN_STMT							\
+    ::selftest::assert_loceq (SELFTEST_LOCATION, (EXP_FILENAME),	\
+			      (EXP_LINENUM), (EXP_COLNUM), (LOC));	\
+  SELFTEST_END_STMT
+
+} /* end of namespace selftest.  */
+
+#endif /* #if CHECKING_P */
+
+#endif /* GCC_SELFTEST_INPUT_H */
-- 
1.8.5.3

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH 08/22] Add GNU_BUILD_ATTRIBUTE_STATIC_ANALYSIS to annobin.h
  2017-08-04 21:30 [PATCH 00/22] RFC: integrated 3rd-party static analysis support David Malcolm
                   ` (7 preceding siblings ...)
  2017-08-04 21:36 ` [PATCH 10/22] Add checkers.h/cc David Malcolm
@ 2017-08-04 21:36 ` David Malcolm
  2017-08-04 21:36 ` [PATCH 09/22] Add selftest::read_file (..., FILE *, ...) David Malcolm
                   ` (16 subsequent siblings)
  25 siblings, 0 replies; 35+ messages in thread
From: David Malcolm @ 2017-08-04 21:36 UTC (permalink / raw)
  To: gcc-patches; +Cc: David Malcolm

gcc/ChangeLog:
	* annobin.h (GNU_BUILD_ATTRIBUTE_STATIC_ANALYSIS): New define.
---
 gcc/annobin.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/annobin.h b/gcc/annobin.h
index 76eb01c..1152316 100644
--- a/gcc/annobin.h
+++ b/gcc/annobin.h
@@ -35,6 +35,7 @@
 #define GNU_BUILD_ATTRIBUTE_ABI		6
 #define GNU_BUILD_ATTRIBUTE_PIC		7
 #define GNU_BUILD_ATTRIBUTE_SHORT_ENUM	8
+#define GNU_BUILD_ATTRIBUTE_STATIC_ANALYSIS	9
 
 #define NOTE_GNU_PROPERTY_SECTION_NAME	".note.gnu.property"
 #define GNU_BUILD_ATTRS_SECTION_NAME	".gnu.build.attributes"
-- 
1.8.5.3

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH 09/22] Add selftest::read_file (..., FILE *, ...)
  2017-08-04 21:30 [PATCH 00/22] RFC: integrated 3rd-party static analysis support David Malcolm
                   ` (8 preceding siblings ...)
  2017-08-04 21:36 ` [PATCH 08/22] Add GNU_BUILD_ATTRIBUTE_STATIC_ANALYSIS to annobin.h David Malcolm
@ 2017-08-04 21:36 ` David Malcolm
  2017-08-04 21:36 ` [PATCH 22/22] Add contrib/get-static-analysis.py David Malcolm
                   ` (15 subsequent siblings)
  25 siblings, 0 replies; 35+ messages in thread
From: David Malcolm @ 2017-08-04 21:36 UTC (permalink / raw)
  To: gcc-patches; +Cc: David Malcolm

This patch is a hack used by the followup checkers.cc patch,
and ought to be removed in any final version of the kit.

gcc/ChangeLog:
	* selftest.c (read_file): New overload.
	* selftest.h (read_file): New overload.
---
 gcc/selftest.c | 16 +++++++++++++---
 gcc/selftest.h |  7 +++++++
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/gcc/selftest.c b/gcc/selftest.c
index b41b9f5..a6674be 100644
--- a/gcc/selftest.c
+++ b/gcc/selftest.c
@@ -162,7 +162,19 @@ read_file (const location &loc, const char *path)
   FILE *f_in = fopen (path, "r");
   if (!f_in)
     fail_formatted (loc, "unable to open file: %s", path);
+  char *result = read_file (loc, f_in, path);
+  fclose (f_in);
+  return result;
+}
+
+/* Read all of F_IN into memory, returning a 0-terminated buffer
+   that must be freed by the caller.  F_IN is *not* closed.
+   Fail (and abort) if there are any problems, with LOC as the reported
+   location of the failure, using DESC as a description of the file.  */
 
+char *
+read_file (const location &loc, FILE *f_in, const char *desc)
+{
   /* Read content, allocating FIXME.  */
   char *result = NULL;
   size_t total_sz = 0;
@@ -186,11 +198,9 @@ read_file (const location &loc, const char *path)
     }
 
   if (!feof (f_in))
-    fail_formatted (loc, "error reading from %s: %s", path,
+    fail_formatted (loc, "error reading from %s: %s", desc,
 		    xstrerror (errno));
 
-  fclose (f_in);
-
   /* 0-terminate the buffer.  */
   gcc_assert (total_sz < alloc_sz);
   result[total_sz] = '\0';
diff --git a/gcc/selftest.h b/gcc/selftest.h
index e86ce38..541bb71 100644
--- a/gcc/selftest.h
+++ b/gcc/selftest.h
@@ -153,6 +153,13 @@ for_each_line_table_case (void (*testcase) (const line_table_case &));
 
 extern char *read_file (const location &loc, const char *path);
 
+/* Read all of F_IN into memory, returning a 0-terminated buffer
+   that must be freed by the caller.  F_IN is *not* closed.
+   Fail (and abort) if there are any problems, with LOC as the reported
+   location of the failure, using DESC as a description of the file.  */
+
+extern char *read_file (const location &loc, FILE *infile, const char *desc);
+
 /* A helper function for writing tests that interact with the
    garbage collector.  */
 
-- 
1.8.5.3

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH 16/22] Add checkers/coverity.py
  2017-08-04 21:30 [PATCH 00/22] RFC: integrated 3rd-party static analysis support David Malcolm
                   ` (5 preceding siblings ...)
  2017-08-04 21:36 ` [PATCH 19/22] Add checkers/ianal.py David Malcolm
@ 2017-08-04 21:36 ` David Malcolm
  2017-08-04 21:36 ` [PATCH 10/22] Add checkers.h/cc David Malcolm
                   ` (18 subsequent siblings)
  25 siblings, 0 replies; 35+ messages in thread
From: David Malcolm @ 2017-08-04 21:36 UTC (permalink / raw)
  To: gcc-patches; +Cc: David Malcolm

This patch is an example of supporting a proprietary 3rd-party tool:
a harness for invoking the Coverity checker.

It uses the '--json-output-v2' option to cov-format-errors, and then uses
firehose.parsers.coverity.parse_json_v2 to parse the generated Coverity
JSON format, turning it into firehose JSON.

This isn't a great example of use of either the checker infrastructure, or
of Coverity.

As far as I can tell, Coverity is designed to be run on a
number of source files at once, performing a relatively cheap data-gathering
phase per-source-file, and then performing a more expensive LTO-style
analysis that can follow dataflow between source files, thus obtaining
much more accurate results that a purely one-file-at-a-time checker can.

In contrast, the checker machinery in this patch kit is designed to run
one file at a time.  The harness code in this patch attempts to "square
this circle", but it's not a good fit; it can detect problems that
are within one source file, but prevents the checker from finding
the more interesting problems that it's normally capable of.

checkers/ChangeLog:
	* coverity.py: New file.
---
 checkers/coverity.py | 141 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 141 insertions(+)
 create mode 100644 checkers/coverity.py

diff --git a/checkers/coverity.py b/checkers/coverity.py
new file mode 100644
index 0000000..533a6ae
--- /dev/null
+++ b/checkers/coverity.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python
+#   Copyright 2017 David Malcolm <dmalcolm@redhat.com>
+#   Copyright 2017 Red Hat, Inc.
+#
+#   This is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by
+#   the Free Software Foundation, either version 3 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#   General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program.  If not, see
+#   <http://www.gnu.org/licenses/>.
+
+#   Coverity is a trademark of Synopsys, Inc. in the U.S. and/or other
+#   countries.
+
+import os
+import sys
+import tempfile
+import unittest
+
+from gccinvocation import GccInvocation
+
+from checker import Checker, Context, CheckerTests, make_file, make_stats, \
+    tool_main
+
+from firehose.model import Analysis, Generator, Metadata, Failure, \
+    Location, File, Message, Issue, Trace
+from firehose.parsers.coverity import parse_json_v2
+
+os.environ['PATH'] = '/opt/coverity/bin:' + os.environ['PATH']
+
+class InvokeCoverity(Checker):
+    """
+    Checker subclass that invokes Coverity
+    """
+    name = 'coverity'
+
+    def __init__(self, ctxt, verbose=False):
+        Checker.__init__(self, ctxt)
+        self.verbose = verbose
+
+    def raw_invoke(self, gccinv, sourcefile):
+        # tempfile.TemporaryDirectory is only available from Python 3.2 onwards,
+        # so handle tempdir cleanup "by hand"
+        try:
+            tempdir_name = tempfile.mkdtemp()
+
+            json_name = os.path.join(tempdir_name, 'output.json')
+            build_args = ['cov-build', '--dir', tempdir_name, 'gcc'] + gccinv.argv[1:]
+            if self.verbose:
+                print(build_args)
+            build_result = self.run_subprocess(sourcefile, build_args)
+            if self.verbose:
+                print(build_result)
+
+            analyze_args = ['cov-analyze', '--dir', tempdir_name,
+                            '--wait-for-license']
+            analyze_result = self.run_subprocess(sourcefile, analyze_args)
+            if self.verbose:
+                print(analyze_result)
+
+            format_args = ['cov-format-errors', '--dir', tempdir_name,
+                           '--json-output-v2', json_name]
+            format_result = self.run_subprocess(sourcefile, format_args)
+            if self.verbose:
+                print(format_result)
+
+            # Parse the output, returning an Analysis instance
+            analysis = parse_json_v2(json_name)
+            if self.verbose:
+                print(analysis)
+            return analysis
+
+            # FIXME: timing metadata?
+
+        finally:
+            pass # FIXME: cleanup tempdir
+
+class CoverityTests(CheckerTests):
+    def make_tool(self):
+        return self.make_tool_from_class(InvokeCoverity)
+
+    def verify_basic_metadata(self, analysis, sourcefile):
+        # Verify basic metadata:
+        self.assert_metadata(analysis, 'coverity', sourcefile)
+
+    def test_file_not_found(self):
+        analysis = self.invoke('does-not-exist.c')
+        self.assertEqual(len(analysis.results), 0)
+
+    def test_timeout(self):
+        sourcefile = 'test-sources/harmless.c'
+        tool = self.make_tool()
+        tool.timeout = 0
+        gccinv = GccInvocation(['gcc', sourcefile])
+        analysis = tool.checked_invoke(gccinv, sourcefile)
+        self.assert_metadata(analysis, 'coverity', sourcefile)
+        self.assertEqual(len(analysis.results), 1)
+        r0 = analysis.results[0]
+        self.assertIsInstance(r0, Failure)
+        self.assertEqual(r0.failureid, 'timeout')
+
+    def test_out_of_bounds(self):
+        analysis = self.invoke('test-sources/out-of-bounds.c')
+        if 0:
+            print(analysis)
+        self.assertEqual(len(analysis.results), 2)
+
+        r0 = analysis.results[0]
+        self.assertIsInstance(r0, Issue)
+        self.assertEqual(r0.testid, 'OVERRUN')
+        self.assertEqual(r0.location.point.line, 5)
+        self.assertEqual(r0.message.text,
+                         'Overrunning array "arr" of 10 4-byte elements at'
+                         ' element index 15 (byte offset 60) using index "15".')
+        self.assertIsInstance(r0.trace, Trace)
+        self.assertEqual(len(r0.trace.states), 1)
+
+        r1 = analysis.results[1]
+        self.assertIsInstance(r1, Issue)
+        self.assertEqual(r1.testid, 'UNINIT')
+        self.assertEqual(r1.location.point.line, 5)
+        self.assertEqual(r1.message.text,
+                         'Using uninitialized value "arr[15]".')
+        self.assertIsInstance(r1.trace, Trace)
+        self.assertEqual(len(r1.trace.states), 2)
+        self.assertEqual(r1.trace.states[0].location.point.line, 3)
+        self.assertEqual(r1.trace.states[0].notes.text,
+                         'Declaring variable "arr" without initializer.')
+        self.assertEqual(r1.trace.states[1].location.point.line, 5)
+        self.assertEqual(r1.trace.states[1].notes.text,
+                         'Using uninitialized value "arr[15]".')
+
+if __name__ == '__main__':
+    sys.exit(tool_main(sys.argv, InvokeCoverity))
-- 
1.8.5.3

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH 19/22] Add checkers/ianal.py
  2017-08-04 21:30 [PATCH 00/22] RFC: integrated 3rd-party static analysis support David Malcolm
                   ` (4 preceding siblings ...)
  2017-08-04 21:36 ` [PATCH 07/22] Add minimal version of Nick Clifton's annobin code David Malcolm
@ 2017-08-04 21:36 ` David Malcolm
  2017-08-04 21:36 ` [PATCH 16/22] Add checkers/coverity.py David Malcolm
                   ` (19 subsequent siblings)
  25 siblings, 0 replies; 35+ messages in thread
From: David Malcolm @ 2017-08-04 21:36 UTC (permalink / raw)
  To: gcc-patches; +Cc: David Malcolm

This patch is a demo of handling code metrics and metadata ("info" results
in Firehose terminology).

It adds a standalone tool harness which scans the main input file
looking for "Copyright" lines, returning information on them as
firehose JSON.

When sent through GCC's diagnostic subsystem by checker.cc, these
"info" results are emitted as notes, e.g.:

../../src/checkers/test-sources/cpychecker-demo.c:2:4: note: I am not a lawyer [not-a-lawyer:copyright-line]
    Copyright 2011 David Malcolm <dmalcolm@redhat.com>
    ^~~~~~~~~
../../src/checkers/test-sources/cpychecker-demo.c:3:4: note: I am not a lawyer [not-a-lawyer:copyright-line]
    Copyright 2011 Red Hat, Inc.
    ^~~~~~~~~

and they're captured in the generated binary by the watermarking code.

checkers/ChangeLog:
	* ianal.py: New file.
---
 checkers/ianal.py | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 79 insertions(+)
 create mode 100755 checkers/ianal.py

diff --git a/checkers/ianal.py b/checkers/ianal.py
new file mode 100755
index 0000000..a918f41
--- /dev/null
+++ b/checkers/ianal.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+#   Copyright 2017 David Malcolm <dmalcolm@redhat.com>
+#   Copyright 2017 Red Hat, Inc.
+#
+#   This is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by
+#   the Free Software Foundation, either version 3 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#   General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program.  If not, see
+#   <http://www.gnu.org/licenses/>.
+
+import re
+import sys
+
+from firehose.model import Analysis, Generator, Metadata, Info, \
+    Location, Message, Range, Point
+
+from checker import Checker, CheckerTests, make_file, tool_main
+
+class NotALawyer(Checker):
+    """
+    Checker subclass that looks for "Copyright" lines, as a demo
+    of handling "info" results.
+    """
+    name = 'not-a-lawyer'
+
+    def raw_invoke(self, gccinv, sourcefile):
+        results = []
+        file_ = make_file(sourcefile)
+        with open(sourcefile) as f:
+            for lineidx, line in enumerate(f):
+                m = re.match('.*(Copyright).*', line)
+                if m:
+                    start, end = m.span(1)
+                    linenum = lineidx + 1
+                    range_ = Range(start=Point(linenum, start + 1),
+                                   end=Point(linenum, end))
+                    location = Location(file_, None, range_=range_)
+                    info = Info(infoid='copyright-line',
+                                location=location,
+                                message=Message('I am not a lawyer'),
+                                customfields=None)
+                    results.append(info)
+        metadata = Metadata(generator=Generator(self.name), sut=None,
+                            file_=file_, stats=None)
+        analysis = Analysis(metadata, results)
+        return analysis
+
+class NotALawyerTests(CheckerTests):
+    def make_tool(self):
+        return self.make_tool_from_class(NotALawyer)
+
+    def verify_basic_metadata(self, analysis, sourcefile):
+        # Verify basic metadata:
+        self.assert_metadata(analysis, 'not-a-lawyer', sourcefile)
+
+    def test_basic(self):
+        analysis = self.invoke('test-sources/cpychecker-demo.c')
+        self.assertEqual(len(analysis.results), 2)
+        r0 = analysis.results[0]
+        self.assertIsInstance(r0, Info)
+        self.assertEqual(r0.infoid, 'copyright-line')
+        self.assertEqual(r0.location.file.givenpath,
+                         'test-sources/cpychecker-demo.c')
+        self.assertEqual(r0.message.text, 'I am not a lawyer')
+        self.assertEqual(r0.location.range_.start.line, 2)
+        self.assertEqual(r0.location.range_.start.column, 4)
+        self.assertEqual(r0.location.range_.end.line, 2)
+        self.assertEqual(r0.location.range_.end.column, 12)
+
+if __name__ == '__main__':
+    sys.exit(tool_main(sys.argv, NotALawyer))
-- 
1.8.5.3

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH 10/22] Add checkers.h/cc
  2017-08-04 21:30 [PATCH 00/22] RFC: integrated 3rd-party static analysis support David Malcolm
                   ` (6 preceding siblings ...)
  2017-08-04 21:36 ` [PATCH 16/22] Add checkers/coverity.py David Malcolm
@ 2017-08-04 21:36 ` David Malcolm
  2017-08-04 21:36 ` [PATCH 08/22] Add GNU_BUILD_ATTRIBUTE_STATIC_ANALYSIS to annobin.h David Malcolm
                   ` (17 subsequent siblings)
  25 siblings, 0 replies; 35+ messages in thread
From: David Malcolm @ 2017-08-04 21:36 UTC (permalink / raw)
  To: gcc-patches; +Cc: David Malcolm

This implements a new "policy" class to read a description of
a set of checkers to run, along with a "checker" class to
handle actually running the checkers, ouputting the results
through gcc's diagnostic subsystem, and watermarking the
generated binary with the results and metadata.

Caveats:

* there's currently no way to express suppressions (e.g.
 "run clang-analyzer, but ignore errors foo, bar, and baz");
  that said, it *does* capture that metadata about the diagnostics.
  I'm thinking of something like a 4-state enum value per test id:
  - error: hard error that fails the build
  - warn: warn, but don't fail the build
  - log: capture within watermark, but don't warn
  - drop: ignore altogether
  defaulting to "error".

* the policy is read from a monolithic JSON file; this format
  is clunky for users to work with, and probably would be
  easier to do as simple gcc options, one per checker

* to save time, some of this code relies on stuff within
  "selftest", which would need moving out of there for
  release builds

* there are quite a few other FIXMEs in this patch

gcc/ChangeLog:
	* Makefile.in (OBJS): Add checkers.o.
	* checkers.cc: New file.
	* checkers.h: New file.
	* selftest-run-tests.c (selftest::run_tests): Call
	selftest::checkers_cc_tests.
	* selftest.h (selftest::checkers_cc_tests): New decl.

gcc/testsuite/ChangeLog:
	* selftests/checker-policy/test-policy.json: New file.
---
 gcc/Makefile.in                                    |   1 +
 gcc/checkers.cc                                    | 736 +++++++++++++++++++++
 gcc/checkers.h                                     |  26 +
 gcc/selftest-run-tests.c                           |   1 +
 gcc/selftest.h                                     |   1 +
 .../selftests/checker-policy/test-policy.json      |   7 +
 6 files changed, 772 insertions(+)
 create mode 100644 gcc/checkers.cc
 create mode 100644 gcc/checkers.h
 create mode 100644 gcc/testsuite/selftests/checker-policy/test-policy.json

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 319e3f3..189833e 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1236,6 +1236,7 @@ OBJS = \
 	cfgloopanal.o \
 	cfgloopmanip.o \
 	cfgrtl.o \
+	checkers.o \
 	symtab.o \
 	cgraph.o \
 	cgraphbuild.o \
diff --git a/gcc/checkers.cc b/gcc/checkers.cc
new file mode 100644
index 0000000..1a16799
--- /dev/null
+++ b/gcc/checkers.cc
@@ -0,0 +1,736 @@
+/* Running 3rd-party code analysis tools.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "options.h"
+#include "diagnostic.h"
+#include "selftest.h"
+#include "firehose.h"
+#include "json.h"
+#include <pthread.h>
+#include "checkers.h"
+#include "annobin.h"
+#include "cpplib.h"
+#include "incpath.h"
+
+
+static bool
+diagnostic_at_rich_loc_va (rich_location *richloc,
+			   diagnostic_info *diagnostic,
+			   const char *gmsgid,
+			   va_list *ap) ATTRIBUTE_GCC_DIAG(3,0);
+
+static bool
+diagnostic_at (location_t loc, diagnostic_info *diagnostic,
+	       const char *gmsgid, ...)  ATTRIBUTE_GCC_DIAG(3,4);
+
+/* Print any trace of states associated with ISSUE.  */
+
+static void
+print_any_trace (const firehose::issue &issue)
+{
+  if (!issue.m_trace)
+    return;
+  if (0)
+    inform (UNKNOWN_LOCATION, "got trace");
+
+  /* Filter out any states within the trace that don't have text.  */
+  issue.m_trace->filter ();
+
+  /* If we're just left with a single state that duplicates what we
+     already printed for the issue, don't bother printing it.  */
+  if (issue.m_trace->is_redundant_p (issue))
+    return;
+
+  int i;
+  firehose::state *s;
+  int num_states = issue.m_trace->m_states.length ();
+  FOR_EACH_VEC_ELT (issue.m_trace->m_states, i, s)
+    {
+      if (s->m_notes)
+	inform (s->m_location, "state %i of %i: %s", i + 1, num_states,
+		s->m_notes);
+      else
+	inform (s->m_location, "state %i of %i", i + 1, num_states);
+    }
+}
+
+/* FIXME.  */
+
+bool
+diagnostic_at_rich_loc_va (rich_location *richloc,
+			   diagnostic_info *diagnostic,
+			   const char *gmsgid,
+			   va_list *ap)
+{
+  gcc_assert (richloc);
+  gcc_assert (diagnostic);
+  gcc_assert (gmsgid);
+  gcc_assert (ap);
+
+  diagnostic_t kind = diagnostic->kind;
+#if 0
+  if (kind == DK_PERMERROR)
+    {
+      diagnostic_set_info (diagnostic, gmsgid, ap, richloc,
+			   permissive_error_kind (global_dc));
+      diagnostic.option_index = permissive_error_option (global_dc);
+    }
+  else
+#endif
+    {
+      diagnostic_set_info (diagnostic, gmsgid, ap, richloc, kind);
+#if 0
+      if (kind == DK_WARNING || kind == DK_PEDWARN)
+	diagnostic.option_index = opt;
+#endif
+    }
+  return diagnostic_report_diagnostic (global_dc, diagnostic);
+}
+
+/* Emit DIAGNOSTIC at LOC.  */
+
+bool
+diagnostic_at (location_t loc, diagnostic_info *diagnostic,
+	       const char *gmsgid, ...)
+{
+  va_list ap;
+  va_start (ap, gmsgid);
+  rich_location richloc (line_table, loc);
+  bool result = diagnostic_at_rich_loc_va (&richloc, diagnostic, gmsgid, &ap);
+  va_end (ap);
+  return result;
+}
+
+/* Emit a diagnostic for ISSUE.  */
+
+static void
+handle_issue (const firehose::analysis &analysis,
+	      const firehose::issue &issue)
+{
+  diagnostic_info diagnostic;
+  diagnostic.kind = DK_ERROR;
+  diagnostic.external_tool = analysis.m_metadata.m_generator.m_name;
+  diagnostic.external_test_id = issue.m_testid;
+  diagnostic_at (issue.m_location, &diagnostic, "%s",
+		 issue.m_message);
+  print_any_trace (issue);
+}
+
+/* Emit a diagnostic for INFO.  */
+
+static void
+handle_info (const firehose::analysis &analysis,
+	     const firehose::info &info)
+{
+  diagnostic_info diagnostic;
+  diagnostic.kind = DK_NOTE;
+  diagnostic.external_tool = analysis.m_metadata.m_generator.m_name;
+  diagnostic.external_test_id = info.m_infoid;
+  diagnostic_at (info.m_location, &diagnostic, "%s",
+		 info.m_message);
+}
+
+/* Emit a diagnostic for FAILURE.  */
+
+static void
+handle_failure (const firehose::analysis &analysis,
+		const firehose::failure &failure)
+{
+  diagnostic_info diagnostic;
+  diagnostic.kind = DK_ERROR;
+  diagnostic.external_tool = analysis.m_metadata.m_generator.m_name;
+  diagnostic.external_test_id = failure.m_failureid;
+  diagnostic_at (failure.m_location, &diagnostic, "%s",
+		 failure.m_message);
+}
+
+/* FIXME: taken from jit-playback.c.  */
+
+/* A subclass of auto_vec <char *> that frees all of its elements on
+   deletion.  */
+
+class auto_argvec : public auto_vec <char *>
+{
+ public:
+  ~auto_argvec ();
+};
+
+/* auto_argvec's dtor, freeing all contained strings, automatically
+   chaining up to ~auto_vec <char *>, which frees the internal buffer.  */
+
+auto_argvec::~auto_argvec ()
+{
+  int i;
+  char *str;
+  FOR_EACH_VEC_ELT (*this, i, str)
+    free (str);
+}
+
+/* A struct to hold the results from a checker-invocation thread.  */
+
+struct thread_result
+{
+  thread_result (char *utf8_buffer, char *err)
+  : m_utf8_buffer (utf8_buffer), m_err (err) {}
+
+  ~thread_result () { free (m_utf8_buffer); free (m_err); }
+
+  char *m_utf8_buffer;
+  char *m_err;
+};
+
+/* A particular checker to run.
+   A "checker" is an executable which takes GCC-style command-line
+   arguments and writes a Firehose JSON file to stdout.  */
+
+class checker
+{
+ public:
+  checker ();
+  ~checker ();
+
+  static checker *from_json (const json::value *jv, char *&out_err);
+
+  void start ();
+  void finish ();
+  void run_single_threaded ();
+
+  const char *get_executable () const { return m_executable; }
+
+  const char *get_output () const { return m_utf8_buffer; }
+  const json::value *get_json_output () const { return m_jv; }
+
+ private:
+  static void *run_checker_thread (void *ptr);
+  thread_result *run_in_thread ();
+  char *capture_stdout (char *&out_err);
+  void make_args (auto_argvec &out) const;
+  void handle_json ();
+
+  char *m_executable;
+  pthread_t m_tid;
+
+  char *m_utf8_buffer;
+  json::value *m_jv;
+};
+
+/* A policy, listing which checkers to run.  */
+
+class policy
+{
+ public:
+  ~policy ();
+
+  bool read_from_file (const char *path, char *&out_err);
+
+  auto_vec<checker *> m_checkers;
+};
+
+/* checker's ctor.  */
+
+checker::checker () : m_executable (NULL), m_utf8_buffer (NULL), m_jv (NULL)
+{
+}
+
+/* checker's dtor.  */
+
+checker::~checker ()
+{
+  free (m_executable);
+  free (m_utf8_buffer);
+  delete m_jv;
+}
+
+/* Attempt to allocate a new checker based on JV.
+   On failure, return NULL and write to OUT_ERR (which must be freed
+   by the caller).  */
+
+checker *
+checker::from_json (const json::value *jv, char *&out_err)
+{
+  checker *ch = new checker ();
+
+  const char *executable;
+  if (!jv->get_string_by_key ("executable", executable, out_err))
+    {
+      delete ch;
+      return NULL;
+    }
+  ch->m_executable = xstrdup (executable);
+
+  // FIXME: languages
+
+  return ch;
+}
+
+/* Callback to pthread_create, for running one checker within a thread.
+   This is the entrypoint of the per-checker thread.  */
+
+void *
+checker::run_checker_thread (void *ptr)
+{
+  checker *ch = static_cast <checker *> (ptr);
+  return ch->run_in_thread ();
+}
+
+/* Create a thread for this checker, calling run_in_thread within it.
+   This is run on the main thread, called by checkers_start.  */
+
+void
+checker::start ()
+{
+  pthread_create (&m_tid,
+		  NULL,
+		  run_checker_thread,
+		  this);
+}
+
+/* Wait for this checker's thread to finish, and call
+   handle_json on the UTF-8 JSON result.
+
+   This is run on the main thread, called by checkers_finish.  */
+
+void
+checker::finish ()
+{
+  /* Wait for the thread to finish.  */
+  void *retval;
+  if (int err = pthread_join (m_tid, &retval))
+    {
+      error_at (UNKNOWN_LOCATION, "error invoking checker %qs: pthread_join failed: %i",
+		m_executable, err);
+      return;
+    }
+  thread_result *result = static_cast <thread_result *> (retval);
+
+  /* Process the output.  */
+  if (!result->m_utf8_buffer)
+    {
+      error_at (UNKNOWN_LOCATION, "error invoking checker %qs: %qs",
+		m_executable, result->m_err);
+      delete result;
+      return;
+    }
+
+  m_utf8_buffer = result->m_utf8_buffer;
+  result->m_utf8_buffer = NULL;
+  handle_json ();
+  delete result;
+}
+
+/* This is the "main" routine of the per-checker thread when
+   run in multithreaded mode.
+   Attempt to run the checker in a subprocess, and return the stdout
+   and any error messages.  */
+
+thread_result *
+checker::run_in_thread ()
+{
+  char *err = NULL;
+  char *utf8_buffer = capture_stdout (err);
+
+  return new thread_result (utf8_buffer, err);
+}
+
+/* This is the single-threaded way to invoke a checker.
+   Run the checker in a subprocess, capture its stdout as UTF-8 JSON,
+   and call handle_json on it.  */
+
+void
+checker::run_single_threaded ()
+{
+  char *err = NULL;
+  m_utf8_buffer = capture_stdout (err);
+  if (!m_utf8_buffer)
+    {
+      error_at (UNKNOWN_LOCATION, "error invoking checker %qs: %qs",
+		m_executable, err);
+      free (err);
+      return;
+    }
+  handle_json ();
+}
+
+/* Run the checker, capturing its stdout.
+
+   Return a buffer containing the captured stdout, which must be freed
+   by the caller.
+
+   This can be run either on the main thread, or within the
+   per-checker thread.  */
+
+// FIXME: this assumes that pex is thread-safe; is it?
+
+char *
+checker::capture_stdout (char *&out_err)
+{
+  auto_argvec argvec;
+
+  make_args (argvec);
+
+  /* pex argv arrays are NULL-terminated.  */
+  argvec.safe_push (NULL);
+
+  struct pex_obj *obj;
+  const char *errmsg;
+  int exit_status = 0;
+  int err = 0;
+
+  obj = pex_init (0, progname, NULL);
+
+  errmsg = pex_run (obj,
+		    PEX_SEARCH | PEX_USE_PIPES, /* int flags, */
+		    m_executable, /* const char *executable, */
+		    const_cast <char *const *> (argvec.address ()), /* argv, */
+		    NULL, /* const char *outname */
+		    NULL, /* const char *errname */
+		    &err); /* int *err*/
+  if (errmsg == NULL)
+    {
+      if (!pex_get_status (obj, 1, &exit_status))
+	{
+	  err = 0;
+	  out_err = xstrdup ("pex_get_status failed");
+	}
+    }
+
+  FILE *outf = pex_read_output (obj, 0);
+  if (!outf)
+    {
+      out_err = xstrdup ("unable to read stdout");
+      return NULL;
+    }
+  /* "outf" is owned by "obj".  */
+
+  // FIXME: use something other than a selftest API for this!
+  char *utf8_buffer = selftest::read_file (SELFTEST_LOCATION, outf,
+					   "stdout from checker");
+
+  pex_free (obj);
+
+  if (errmsg)
+    {
+      out_err = xstrdup (errmsg);
+      return NULL;
+    }
+  if (exit_status || err)
+    {
+      out_err = xasprintf ("exit_status: %i err: %i",
+			   exit_status, err);
+      return NULL;
+    }
+
+  return utf8_buffer;
+}
+
+/* Subroutine of checker::capture_stdout.
+   This can be run either on the main thread, or within the
+   per-checker thread.  */
+
+void
+checker::make_args (auto_argvec &out) const
+{
+#define ADD_ARG(arg) out.safe_push (xstrdup (arg))
+
+  ADD_ARG (m_executable);
+
+  /* FIXME: for some reason the clang-analyzer harness attempts to invoke cc1
+     and fails if it can't find it.  */
+  ADD_ARG ("-B.");
+
+  /* Provide -I arguments.  */
+  /* FIXME: other kinds of cpp_dir?  */
+  for (cpp_dir *dir = get_added_cpp_dirs (QUOTE); dir; dir = dir->next)
+    {
+      char *dash_i_arg = concat ("-I", dir->name, NULL);
+      out.safe_push (dash_i_arg);
+    }
+
+  /* FIXME: supply -D args.  */
+
+  ADD_ARG ("-c");
+  ADD_ARG (main_input_filename);
+
+#undef ADD_ARG
+}
+
+/* Given UTF8_BUFFER, a non-NULL UTF-8-encoded buffer of JSON output
+   from a tool, in Firehose JSON format, emit the results through GCC's
+   diagnostic subsystem.
+   This must be run on the main thread (firehose::analysis::from_json can
+   touch the global line_table, and error_at and the other diagnostic
+   emission requires the main thread).   */
+
+void
+checker::handle_json ()
+{
+  /* Attempt to parse the buffer as UTF-8-encoded JSON.  */
+  char *err = NULL;
+  m_jv = json::parse_utf8_string (m_utf8_buffer, &err);
+  if (!m_jv)
+    {
+      gcc_assert (err);
+      error_at (UNKNOWN_LOCATION,
+		"unable to parse tool output as UTF-8 JSON: %s", err);
+      free (err);
+      return;
+    }
+
+  gcc_assert (err == NULL);
+
+  if (0)
+    {
+      m_jv->dump(stderr);
+      fprintf (stderr, "\n");
+    }
+
+  /* Attempt to parse the JSON values into Firehose objects.  */
+  firehose::analysis analysis;
+  analysis.from_json (m_jv, err);
+  if (err)
+    {
+      error_at (UNKNOWN_LOCATION, "error parsing JSON output: %qs", err);
+      free (err);
+      return;
+    }
+
+  gcc_assert (err == NULL);
+
+  /* Emit the results as GCC diagnostics.  */
+  int i;
+  firehose::result *result;
+  FOR_EACH_VEC_ELT (analysis.m_results, i, result)
+    {
+      switch (result->get_kind ())
+	{
+	case firehose::result::FIREHOSE_ISSUE:
+	  handle_issue (analysis, *static_cast <firehose::issue *> (result));
+	  break;
+	case firehose::result::FIREHOSE_INFO:
+	  handle_info (analysis, *static_cast <firehose::info *> (result));
+	  break;
+	case firehose::result::FIREHOSE_FAILURE:
+	  handle_failure (analysis, *static_cast <firehose::failure *> (result));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+    }
+}
+
+/* policy's dtor.  */
+
+policy::~policy ()
+{
+  int i;
+  checker *checker;
+  FOR_EACH_VEC_ELT (m_checkers, i, checker)
+    delete checker;
+}
+
+/* Load policy from the JSON file at PATH.
+   If successful, return true.
+   Othewise, return false, writing to out_err (the caller
+   must free the string).  */
+
+bool
+policy::read_from_file (const char *path, char *&out_err)
+{
+  // FIXME: this shouldn't be just in the selftests
+  char *utf8_buffer = selftest::read_file (SELFTEST_LOCATION, path);
+  // FIXME: error-checking
+
+  /* Attempt to parse the buffer as UTF-8-encoded JSON.  */
+  json::value *jv = json::parse_utf8_string (utf8_buffer, &out_err);
+  if (!jv)
+    {
+      free (utf8_buffer);
+      return false;
+    }
+
+  /* Convert to a policy object.  */
+  const json::array *arr = jv->as_array ();
+  if (!arr)
+    {
+      out_err = xstrdup ("not an array");
+      delete jv;
+      return false;
+    }
+
+  for (unsigned i = 0; i < arr->get_length (); i++)
+    {
+      checker *ch = checker::from_json (arr->get (i), out_err);
+      if (!ch)
+	{
+	  delete jv;
+	  return false;
+	}
+      m_checkers.safe_push (ch);
+    }
+
+  delete jv;
+  return true;
+}
+
+/* Interface for use by toplev.c  */
+
+static bool use_threads = true; // FIXME; move to class policy?
+static policy *the_policy = NULL;
+
+/* Called near the beginning of toplev.c.
+
+   Load a policy file from PATH.
+   If using threads, invoke the checkers specified by the policy,
+   each with their own thread reading the stdout from the checker.  */
+
+void
+checkers_start (const char *path)
+{
+  the_policy = new policy ();
+
+  /* Try to load a policy file.  */
+  char *err = NULL;
+  if (!the_policy->read_from_file (path, err))
+    {
+      error_at (UNKNOWN_LOCATION,
+		"unable to load checker policy %qs: %qs",
+		path, err);
+      free (err);
+      return;
+    }
+
+  /* If using threads, start the checkers specified by the policy now,
+     each on their own thread.  */
+  if (use_threads)
+    {
+      int i;
+      checker *ch;
+      FOR_EACH_VEC_ELT (the_policy->m_checkers, i, ch)
+	ch->start ();
+    }
+}
+
+class note_buffer : public auto_vec<char>
+{
+ public:
+  void push_string (const char *str)
+  {
+    while (char ch = *str++)
+      safe_push (ch);
+  }
+};
+
+/* Called near the end of toplev.c.
+
+   If using threads, wait for each checker thread to finish, and
+   process the results.
+   Otherwise, run each checker now in the main thread, sequentially,
+   processing the results.  */
+
+void
+checkers_finish ()
+{
+  int i;
+  checker *ch;
+
+  /* This should have been created in checkers_start.  */
+  gcc_assert (the_policy);
+
+  if (use_threads)
+    {
+      FOR_EACH_VEC_ELT (the_policy->m_checkers, i, ch)
+	ch->finish ();
+    }
+  else
+    {
+      FOR_EACH_VEC_ELT (the_policy->m_checkers, i, ch)
+	ch->run_single_threaded ();
+    }
+
+  /* Watermark the binary with the analysis results/metadata.  */
+  {
+    json::array *all_results = new json::array ();
+    FOR_EACH_VEC_ELT (the_policy->m_checkers, i, ch)
+      if (ch->get_json_output ())
+	all_results->append (ch->get_json_output ()->clone ());
+      else
+	{
+	  /* FIXME: what to do about recording failures?  presumably we should do these
+	     in JSON format also.  */
+	}
+    char *all_results_str = all_results->to_str ();
+
+    /* annobin_output_string_note uses ".asciz" to write the "name", without
+       escaping newlines, quotes, or backslashes.  Hence we have to use
+       annobin_output_note directly, with name_is_string=false, which
+       uses ".dc.b" to write the name.  */
+    size_t len = strlen (all_results_str);
+    char *buffer = (char *) xmalloc (len + 5);
+    sprintf (buffer, "GA%c%c%s", GNU_BUILD_ATTRIBUTE_TYPE_STRING,
+	     GNU_BUILD_ATTRIBUTE_STATIC_ANALYSIS, all_results_str);
+    free (all_results_str);
+
+    annobin_output_note (buffer, len + 5, false, "static analysis results",
+			 NULL, 0, false, NT_GNU_BUILD_ATTRIBUTE_OPEN);
+
+    free (buffer);
+  }
+
+  delete the_policy;
+  the_policy = NULL;
+}
+
+#if CHECKING_P
+
+namespace selftest {
+
+/* Selftests.  */
+
+/* Verify that we can load a policy file.  */
+
+static void
+test_policy_parsing ()
+{
+  char *filename = locate_file ("checker-policy/test-policy.json");
+  char *err = NULL;
+  policy p;
+  bool success = p.policy::read_from_file (filename, err);
+  ASSERT_TRUE (success);
+  ASSERT_EQ (NULL, err);
+  free (filename);
+
+  ASSERT_EQ (4, p.m_checkers.length ());
+  ASSERT_STREQ ("../../src/checkers/clang_analyzer.py",
+		p.m_checkers[0]->get_executable ());
+}
+
+/* Run all of the selftests within this file.  */
+
+void
+checkers_cc_tests ()
+{
+  test_policy_parsing ();
+}
+
+} // namespace selftest
+
+#endif /* #if CHECKING_P */
diff --git a/gcc/checkers.h b/gcc/checkers.h
new file mode 100644
index 0000000..f023871
--- /dev/null
+++ b/gcc/checkers.h
@@ -0,0 +1,26 @@
+/* Running 3rd-party code analysis tools.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_CHECKERS_H
+#define GCC_CHECKERS_H
+
+extern void checkers_start (const char *path);
+extern void checkers_finish ();
+
+#endif /* GCC_CHECKERS_H.  */
diff --git a/gcc/selftest-run-tests.c b/gcc/selftest-run-tests.c
index 8afcd43..18c50a6 100644
--- a/gcc/selftest-run-tests.c
+++ b/gcc/selftest-run-tests.c
@@ -75,6 +75,7 @@ selftest::run_tests ()
   rtl_tests_c_tests ();
   read_rtl_function_c_tests ();
   firehose_cc_tests ();
+  checkers_cc_tests ();
 
   /* Higher-level tests, or for components that other selftests don't
      rely on.  */
diff --git a/gcc/selftest.h b/gcc/selftest.h
index 541bb71..0fe2c57 100644
--- a/gcc/selftest.h
+++ b/gcc/selftest.h
@@ -178,6 +178,7 @@ extern const char *path_to_selftest_files;
 /* Declarations for specific families of tests (by source file), in
    alphabetical order.  */
 extern void bitmap_c_tests ();
+extern void checkers_cc_tests ();
 extern void diagnostic_c_tests ();
 extern void diagnostic_show_locus_c_tests ();
 extern void edit_context_c_tests ();
diff --git a/gcc/testsuite/selftests/checker-policy/test-policy.json b/gcc/testsuite/selftests/checker-policy/test-policy.json
new file mode 100644
index 0000000..90532b2
--- /dev/null
+++ b/gcc/testsuite/selftests/checker-policy/test-policy.json
@@ -0,0 +1,7 @@
+[{ "executable": "../../src/checkers/clang_analyzer.py",
+   "languages": ["c", "c++"] },
+ { "executable": "../../src/checkers/cppcheck.py",
+   "languages": ["c", "c++"] },
+ { "executable": "../../src/checkers/flawfinder.py",
+   "languages": ["c", "c++"] },
+ { "executable": "../../src/checkers/ianal.py"}]
-- 
1.8.5.3

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH 22/22] Add contrib/get-static-analysis.py
  2017-08-04 21:30 [PATCH 00/22] RFC: integrated 3rd-party static analysis support David Malcolm
                   ` (9 preceding siblings ...)
  2017-08-04 21:36 ` [PATCH 09/22] Add selftest::read_file (..., FILE *, ...) David Malcolm
@ 2017-08-04 21:36 ` David Malcolm
  2017-08-04 21:37 ` [PATCH 04/22] Add firehose.h/cc David Malcolm
                   ` (14 subsequent siblings)
  25 siblings, 0 replies; 35+ messages in thread
From: David Malcolm @ 2017-08-04 21:36 UTC (permalink / raw)
  To: gcc-patches; +Cc: David Malcolm

This patch adds a simple Python 2/3 script for reading the
static analysis "watermark" from object files, writing the
JSON to stdout (prettyprinting it with indentation and newlines
for ease of human reading).

contrib/ChangeLog:
	* get-static-analysis.py: New file.
---
 contrib/get-static-analysis.py | 47 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 contrib/get-static-analysis.py

diff --git a/contrib/get-static-analysis.py b/contrib/get-static-analysis.py
new file mode 100644
index 0000000..c246718
--- /dev/null
+++ b/contrib/get-static-analysis.py
@@ -0,0 +1,47 @@
+# FIXME
+# Extract static analysis results from input file and pretty-print JSON to stdout
+# This file is intended to be compatible with both Python 2 and Python 3
+
+import json
+import os
+import subprocess
+import sys
+import tempfile
+
+SECTION_NAME = '.gnu.build.attributes'
+
+def get_json_content(filename):
+    """
+    Extract the JSON from SECTION_NAME from filename, returning
+    as a bytes instance.
+    """
+    with tempfile.NamedTemporaryFile() as outfile:
+        try:
+            subprocess.check_call(['objcopy', '-O', 'binary',
+                                   '--only-section=%s' % SECTION_NAME,
+                                   '--set-section-flags',
+                                   '%s=alloc' % SECTION_NAME,
+                                   filename, outfile.name])
+        except subprocess.CalledProcessError:
+            if not os.path.exists(outfile.name):
+                outfile.delete = False
+            raise
+        with open(outfile.name, 'rb') as f_in:
+            buf = f_in.read()
+            if not buf:
+                raise ValueError('section not found: %s' % SECTION_NAME)
+            # Expect 16 bytes of header, then JSON, then a 0-terminator and padding
+            json_buf = buf[16:].split(b'\x00')[0]
+            return json_buf
+
+filename = sys.argv[1]
+try:
+    json_buf = get_json_content(filename)
+except subprocess.CalledProcessError:
+    sys.exit(1)
+except ValueError as exc:
+    print(exc)
+    sys.exit(1)
+json_str = json_buf.decode('utf-8')
+jv = json.loads(json_str)
+json.dump(jv, sys.stdout, indent=4)
-- 
1.8.5.3

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH 07/22] Add minimal version of Nick Clifton's annobin code
  2017-08-04 21:30 [PATCH 00/22] RFC: integrated 3rd-party static analysis support David Malcolm
                   ` (3 preceding siblings ...)
  2017-08-04 21:36 ` [PATCH 17/22] Add checkers/cppcheck.py David Malcolm
@ 2017-08-04 21:36 ` David Malcolm
  2017-09-01 18:17   ` Jeff Law
  2017-08-04 21:36 ` [PATCH 19/22] Add checkers/ianal.py David Malcolm
                   ` (20 subsequent siblings)
  25 siblings, 1 reply; 35+ messages in thread
From: David Malcolm @ 2017-08-04 21:36 UTC (permalink / raw)
  To: gcc-patches; +Cc: David Malcolm

This patch provides a way to "watermark" binaries with
metadata.  It's used later in the patch kit to watermark
binaries with static analysis results and metadata.

See:
  https://fedoraproject.org/wiki/Toolchain/Watermark

Note: this is a version of Nick Clifton's "annobin" gcc plugin:
  https://nickc.fedorapeople.org/
heavily hacked up by me:
* removed everything (including plugin support) not needed by
  later patches in the kit
* rewritten as an API, rather than as a plugin
* removed annobin_inform (..., "ICE: ...") calls in favor of
  gcc_assert.
* line-wrapped
* added a annobin_ensure_init to initialize annobin_is_64bit.
* added #ifndef guard to annobin.h

It includes the commits:
* Remove size limit on string passed to annobin_output_string_note
* Version 2 of spec: Add a GA prefix to all names

gcc/ChangeLog:
	* Makefile.in (OBJS): Add annobin.o.
	* annobin.cc: New file.
	* annobin.h: New file.
---
 gcc/Makefile.in |   1 +
 gcc/annobin.cc  | 185 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 gcc/annobin.h   |  44 ++++++++++++++
 3 files changed, 230 insertions(+)
 create mode 100644 gcc/annobin.cc
 create mode 100644 gcc/annobin.h

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 9ceb3f3..319e3f3 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1216,6 +1216,7 @@ OBJS = \
 	ggc-page.o \
 	alias.o \
 	alloc-pool.o \
+	annobin.o \
 	auto-inc-dec.o \
 	auto-profile.o \
 	bb-reorder.o \
diff --git a/gcc/annobin.cc b/gcc/annobin.cc
new file mode 100644
index 0000000..ad8e49a
--- /dev/null
+++ b/gcc/annobin.cc
@@ -0,0 +1,185 @@
+/* annobin - support for annotating binary files.
+   Copyright (c) 2017 Red Hat.
+   Created by Nick Clifton.
+   Heavily hacked up by David Malcolm.
+
+  This is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License as published
+  by the Free Software Foundation; either version 3, or (at your
+  option) any later version.
+
+  It is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "diagnostic-core.h"
+#include "annobin.h"
+#include "output.h"
+
+/* Internal variable, used by target specific parts of the annobin plugin as well
+   as this generic part.  True if the object file being generated is for a 64-bit
+   target.  */
+bool           annobin_is_64bit = false;
+
+static void
+annobin_ensure_init (void)
+{
+  static bool done_once = false;
+  if (done_once)
+    return;
+  done_once = true;
+
+  /* Compute the default data size.  */
+  switch (POINTER_SIZE)
+    {
+    case 16:
+    case 32:
+      annobin_is_64bit = false; break;
+    case 64:
+      annobin_is_64bit = true; break;
+    default:
+      sorry ("unknown target pointer size: %d", POINTER_SIZE);
+    }
+}
+
+void
+annobin_output_note (const void * name, unsigned namesz, bool name_is_string,
+		     const char * name_description,
+		     const void * desc, unsigned descsz, bool desc_is_string,
+		     unsigned type)
+{
+  annobin_ensure_init ();
+
+  unsigned i;
+
+  if (type == NT_GNU_BUILD_ATTRIBUTE_FUNC
+      || type == NT_GNU_BUILD_ATTRIBUTE_OPEN)
+    {
+      fprintf (asm_out_file, "\t.pushsection %s\n",
+	       GNU_BUILD_ATTRS_SECTION_NAME);
+    }
+
+  if (name == NULL)
+    {
+      gcc_assert (namesz == 0);
+      fprintf (asm_out_file, "\t.dc.l 0\t\t%s no name\n", ASM_COMMENT_START);
+    }
+  else if (name_is_string)
+    {
+      gcc_assert (strlen ((const char *) name) == namesz - 1);
+      fprintf (asm_out_file, "\t.dc.l %u \t%s namesz = strlen (%s)\n", namesz,
+               ASM_COMMENT_START, (const char *) name);
+    }
+  else
+    fprintf (asm_out_file, "\t.dc.l %u\t\t%s size of name\n", namesz,
+	     ASM_COMMENT_START);
+
+  if (desc == NULL)
+    {
+      gcc_assert (descsz == 0);
+      fprintf (asm_out_file, "\t.dc.l 0\t\t%s no description\n",
+	       ASM_COMMENT_START);
+    }
+  else if (desc_is_string)
+    {
+      gcc_assert (descsz == (annobin_is_64bit ? 8 : 4));
+      fprintf (asm_out_file, "\t.dc.l %u\t\t%s descsz = sizeof (address)\n",
+	       descsz, ASM_COMMENT_START);
+    }
+  else
+    fprintf (asm_out_file, "\t.dc.l %u\t\t%s size of description\n", descsz,
+	     ASM_COMMENT_START);
+
+  fprintf (asm_out_file, "\t.dc.l %#x\t%s type = %s\n", type, ASM_COMMENT_START,
+	   type == NT_GNU_BUILD_ATTRIBUTE_OPEN ? "OPEN" :
+	   type == NT_GNU_BUILD_ATTRIBUTE_FUNC ? "FUNC" :
+	   type == NT_GNU_PROPERTY_TYPE_0      ? "PROPERTY_TYPE_0"
+	   : "*UNKNOWN*");
+
+  if (name)
+    {
+      if (name_is_string)
+	{
+	  fprintf (asm_out_file, "\t.asciz \"%s\"", (const char *)name);
+	}
+      else
+	{
+	  fprintf (asm_out_file, "\t.dc.b");
+	  for (i = 0; i < namesz; i++)
+	    fprintf (asm_out_file, " %#x%c",
+		     ((const unsigned char *) name)[i],
+		     i < (namesz - 1) ? ',' : ' ');
+	}
+
+      fprintf (asm_out_file, "\t%s name (%s)\n",
+	       ASM_COMMENT_START, name_description);
+
+      if (namesz % 4)
+	{
+	  fprintf (asm_out_file, "\t.dc.b");
+	  while (namesz % 4)
+	    {
+	      namesz++;
+	      fprintf (asm_out_file, " 0%c", namesz % 4 ? ',' : ' ');
+	    }
+	  fprintf (asm_out_file, "\t%s Padding\n", ASM_COMMENT_START);
+	}
+    }
+
+  if (desc)
+    {
+      if (desc_is_string)
+	{
+	  /* The DESCRIPTION string is the name of a symbol.  We want to produce
+	     a reference to this symbol of the appropriate size for the target
+	     architecture.  */
+	  if (annobin_is_64bit)
+	    fprintf (asm_out_file, "\t.quad %s", (const char *)desc);
+	  else
+	    fprintf (asm_out_file, "\t.dc.l %s", (const char *)desc);
+	  fprintf (asm_out_file, "\t%s description (symbol name)\n",
+		   ASM_COMMENT_START);
+	}
+      else
+	{
+	  fprintf (asm_out_file, "\t.dc.b");
+
+	  for (i = 0; i < descsz; i++)
+	    {
+	      fprintf (asm_out_file, " %#x", ((const unsigned char *) desc)[i]);
+	      if (i == (descsz - 1))
+		fprintf (asm_out_file, "\t%s description\n", ASM_COMMENT_START);
+	      else if ((i % 8) == 7)
+		fprintf (asm_out_file, "\t%s description\n\t.dc.b",
+			 ASM_COMMENT_START);
+	      else
+		fprintf (asm_out_file, ",");
+	    }
+
+	  if (descsz % 4)
+	    {
+	      fprintf (asm_out_file, "\t.dc.b");
+	      while (descsz % 4)
+		{
+		  descsz++;
+		  fprintf (asm_out_file, " 0%c", descsz % 4 ? ',' : ' ');
+		}
+	      fprintf (asm_out_file, "\t%s Padding\n", ASM_COMMENT_START);
+	    }
+	}
+    }
+
+  if (type == NT_GNU_BUILD_ATTRIBUTE_FUNC
+      || type == NT_GNU_BUILD_ATTRIBUTE_OPEN)
+    {
+      fprintf (asm_out_file, "\t.popsection\n");
+      fflush (asm_out_file);
+    }
+
+  fprintf (asm_out_file, "\n");
+}
diff --git a/gcc/annobin.h b/gcc/annobin.h
new file mode 100644
index 0000000..76eb01c
--- /dev/null
+++ b/gcc/annobin.h
@@ -0,0 +1,44 @@
+/* annobin - support for annotating binary files.
+   Copyright (c) 2017 Red Hat.
+   Created by Nick Clifton.
+   Heavily hacked up by David Malcolm.
+
+  This is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License as published
+  by the Free Software Foundation; either version 3, or (at your
+  option) any later version.
+
+  It is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.  */
+
+#ifndef GCC_ANNOBIN_H
+#define GCC_ANNOBIN_H
+
+#define SHF_GNU_BUILD_NOTE      (1 << 20)	/* Section contains GNU BUILD ATTRIBUTE notes.  */
+#define NT_GNU_PROPERTY_TYPE_0  5		/* Generated by gcc.  */
+
+#define NT_GNU_BUILD_ATTRIBUTE_OPEN	0x100
+#define NT_GNU_BUILD_ATTRIBUTE_FUNC	0x101
+
+#define GNU_BUILD_ATTRIBUTE_TYPE_NUMERIC	'*'
+#define GNU_BUILD_ATTRIBUTE_TYPE_STRING		'$'
+#define GNU_BUILD_ATTRIBUTE_TYPE_BOOL_TRUE	'+'
+#define GNU_BUILD_ATTRIBUTE_TYPE_BOOL_FALSE	'!'
+
+#define GNU_BUILD_ATTRIBUTE_VERSION	1
+#define GNU_BUILD_ATTRIBUTE_STACK_PROT	2
+#define GNU_BUILD_ATTRIBUTE_RELRO	3
+#define GNU_BUILD_ATTRIBUTE_STACK_SIZE	4
+#define GNU_BUILD_ATTRIBUTE_TOOL	5
+#define GNU_BUILD_ATTRIBUTE_ABI		6
+#define GNU_BUILD_ATTRIBUTE_PIC		7
+#define GNU_BUILD_ATTRIBUTE_SHORT_ENUM	8
+
+#define NOTE_GNU_PROPERTY_SECTION_NAME	".note.gnu.property"
+#define GNU_BUILD_ATTRS_SECTION_NAME	".gnu.build.attributes"
+
+extern void annobin_output_note (const void *, unsigned, bool, const char *, const void *, unsigned, bool, unsigned);
+
+#endif  /* GCC_ANNOBIN_H  */
-- 
1.8.5.3

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH 17/22] Add checkers/cppcheck.py
  2017-08-04 21:30 [PATCH 00/22] RFC: integrated 3rd-party static analysis support David Malcolm
                   ` (2 preceding siblings ...)
  2017-08-04 21:30 ` [PATCH 03/22] Add JSON implementation David Malcolm
@ 2017-08-04 21:36 ` David Malcolm
  2017-08-04 21:36 ` [PATCH 07/22] Add minimal version of Nick Clifton's annobin code David Malcolm
                   ` (21 subsequent siblings)
  25 siblings, 0 replies; 35+ messages in thread
From: David Malcolm @ 2017-08-04 21:36 UTC (permalink / raw)
  To: gcc-patches; +Cc: David Malcolm

This patch adds a harness for invoking cppcheck:
   http://cppcheck.sourceforge.net/
returning the results in JSON format.

It runs "cppcheck --xml --xml-version=2", then uses
firehose.parsers.cppcheck.parse_file to parse the generated .xml file,
turning it into firehose JSON.

checkers/ChangeLog:
	* cppcheck.py: New file.
---
 checkers/cppcheck.py | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 138 insertions(+)
 create mode 100755 checkers/cppcheck.py

diff --git a/checkers/cppcheck.py b/checkers/cppcheck.py
new file mode 100755
index 0000000..9b6a864
--- /dev/null
+++ b/checkers/cppcheck.py
@@ -0,0 +1,138 @@
+#!/usr/bin/env python
+#   Copyright 2012, 2013, 2015, 2017 David Malcolm <dmalcolm@redhat.com>
+#   Copyright 2012, 2013, 2015, 2017 Red Hat, Inc.
+#
+#   This is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by
+#   the Free Software Foundation, either version 3 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#   General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program.  If not, see
+#   <http://www.gnu.org/licenses/>.
+
+import sys
+import tempfile
+
+from firehose.model import Failure, Issue
+from firehose.parsers.cppcheck import parse_file
+from gccinvocation import GccInvocation
+
+from checker import Checker, CheckerTests, make_file, make_stats, \
+    tool_main
+
+class InvokeCppcheck(Checker):
+    """
+    Checker subclass that invokes "cppcheck"
+    """
+    name = 'cppcheck'
+
+    def raw_invoke(self, gccinv, sourcefile):
+        args = ['cppcheck',
+                '--xml', '--xml-version=2',
+                sourcefile]
+        return self._run_subprocess(sourcefile, args)
+
+    def handle_output(self, result):
+        if result.returncode:
+            analysis = self._make_failed_analysis(result.sourcefile, result.timer,
+                                                  msgtext='Bad exit code running %s' % self.name,
+                                                  failureid='bad-exit-code')
+            self.set_custom_fields(result, analysis)
+            return analysis
+
+        # (there doesn't seem to be a way to have cppcheck directly
+        # save its XML output to a given location)
+
+        with tempfile.NamedTemporaryFile() as outfile:
+            outfile.write(result.err)
+            outfile.flush()
+
+            with open(outfile.name) as infile:
+                # Parse stderr into firehose XML format and save:
+                analysis = parse_file(infile,
+                                      file_=make_file(result.sourcefile),
+                                      stats=make_stats(result.timer))
+                self.set_custom_fields(result, analysis)
+                return analysis
+
+    def set_custom_fields(self, result, analysis):
+        analysis.set_custom_field('cppcheck-invocation',
+                                  ' '.join(result.argv))
+        result.set_custom_fields(analysis)
+
+class CppcheckTests(CheckerTests):
+    def make_tool(self):
+        return self.make_tool_from_class(InvokeCppcheck)
+
+    def verify_basic_metadata(self, analysis, sourcefile):
+        # Verify basic metadata:
+        self.assert_metadata(analysis, 'cppcheck', sourcefile)
+        self.assert_has_custom_field(analysis, 'cppcheck-invocation')
+        self.assert_has_custom_field(analysis, 'stdout')
+        self.assert_has_custom_field(analysis, 'stderr')
+
+    def test_file_not_found(self):
+        analysis = self.invoke('does-not-exist.c')
+        self.assertEqual(len(analysis.results), 1)
+        self.assertIsInstance(analysis.results[0], Failure)
+        self.assertEqual(analysis.results[0].failureid, 'bad-exit-code')
+
+    def test_timeout(self):
+        sourcefile = 'test-sources/harmless.c'
+        tool = self.make_tool()
+        tool.timeout = 0
+        gccinv = GccInvocation(['gcc', sourcefile])
+        analysis = tool.checked_invoke(gccinv, sourcefile)
+        self.assert_metadata(analysis, 'cppcheck', sourcefile)
+        self.assertEqual(len(analysis.results), 1)
+        r0 = analysis.results[0]
+        self.assertIsInstance(r0, Failure)
+        self.assertEqual(r0.failureid, 'timeout')
+        self.assert_has_custom_field(analysis, 'timeout')
+        self.assert_has_custom_field(analysis, 'command-line')
+
+    def test_harmless_file(self):
+        analysis = self.invoke('test-sources/harmless.c')
+        self.assertEqual(len(analysis.results), 0)
+
+    def test_read_through_null(self):
+        analysis = self.invoke('test-sources/read-through-null.c')
+        self.assertEqual(len(analysis.results), 1)
+        r0 = analysis.results[0]
+        self.assertIsInstance(r0, Issue)
+        self.assertEqual(r0.testid, 'nullPointer')
+        self.assertEqual(r0.location.file.givenpath,
+                         'test-sources/read-through-null.c')
+        self.assertEqual(r0.location.point.line, 3)
+        self.assertEqual(r0.message.text,
+                         "Null pointer dereference")
+        self.assertEqual(r0.severity, 'error')
+
+    def test_out_of_bounds(self):
+        analysis = self.invoke('test-sources/out-of-bounds.c')
+        self.assertEqual(len(analysis.results), 2)
+
+        r0 = analysis.results[0]
+        self.assertIsInstance(r0, Issue)
+        self.assertEqual(r0.testid, 'arrayIndexOutOfBounds')
+        self.assertEqual(r0.location.file.givenpath,
+                         'test-sources/out-of-bounds.c')
+        self.assertEqual(r0.location.point.line, 5)
+        self.assertEqual(
+            r0.message.text,
+            "Array 'arr[10]' accessed at index 15, which is out of bounds.")
+        self.assertEqual(r0.severity, 'error')
+
+        r1 = analysis.results[1]
+        self.assertIsInstance(r1, Issue)
+        self.assertEqual(r1.testid, 'uninitvar')
+        # etc
+
+if __name__ == '__main__':
+    sys.exit(tool_main(sys.argv, InvokeCppcheck))
-- 
1.8.5.3

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH 04/22] Add firehose.h/cc
  2017-08-04 21:30 [PATCH 00/22] RFC: integrated 3rd-party static analysis support David Malcolm
                   ` (10 preceding siblings ...)
  2017-08-04 21:36 ` [PATCH 22/22] Add contrib/get-static-analysis.py David Malcolm
@ 2017-08-04 21:37 ` David Malcolm
  2017-08-04 21:37 ` [PATCH 15/22] Add checkers/clang_analyzer.py David Malcolm
                   ` (13 subsequent siblings)
  25 siblings, 0 replies; 35+ messages in thread
From: David Malcolm @ 2017-08-04 21:37 UTC (permalink / raw)
  To: gcc-patches; +Cc: David Malcolm

"Firehose" is a serialization format for results from code
analysis tools:

  http://firehose.readthedocs.io/en/latest/index.html

(along with a Python module for working with the format).

This patch implements a set of C++ classes modeling the format,
with support for populating them from a JSON dump, so that we
can lossly serialize diagnostics and other static analysis results.

gcc/ChangeLog:
	* Makefile.in (OBJS): Add firehose.o.
	* firehose.cc: New file.
	* firehose.h: New file.
	* selftest-run-tests.c (selftest::run_tests): Call
	selftest::firehose_cc_tests.
	* selftest.h (selftest::firehose_cc_tests): New decl.

gcc/testsuite/ChangeLog:
	* selftests/checker-output/test-clang-analyzer.json: New file.
	* selftests/checker-output/test-cppcheck.json: New file.
	* selftests/checker-output/test-failure.json: New file.
---
 gcc/Makefile.in                                    |   1 +
 gcc/firehose.cc                                    | 709 +++++++++++++++++++++
 gcc/firehose.h                                     | 199 ++++++
 gcc/selftest-run-tests.c                           |   1 +
 gcc/selftest.h                                     |   1 +
 .../checker-output/test-clang-analyzer.json        | 122 ++++
 .../selftests/checker-output/test-cppcheck.json    |  50 ++
 .../selftests/checker-output/test-failure.json     |  38 ++
 8 files changed, 1121 insertions(+)
 create mode 100644 gcc/firehose.cc
 create mode 100644 gcc/firehose.h
 create mode 100644 gcc/testsuite/selftests/checker-output/test-clang-analyzer.json
 create mode 100644 gcc/testsuite/selftests/checker-output/test-cppcheck.json
 create mode 100644 gcc/testsuite/selftests/checker-output/test-failure.json

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 4f7fd0c..488f699 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1280,6 +1280,7 @@ OBJS = \
 	expr.o \
 	fibonacci_heap.o \
 	final.o \
+	firehose.o \
 	fixed-value.o \
 	fold-const.o \
 	fold-const-call.o \
diff --git a/gcc/firehose.cc b/gcc/firehose.cc
new file mode 100644
index 0000000..b2aa167
--- /dev/null
+++ b/gcc/firehose.cc
@@ -0,0 +1,709 @@
+/* Serialization format for checker results.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "firehose.h"
+#include "selftest.h"
+#include "selftest-input.h"
+
+namespace firehose {
+
+/* Attempt to parse JV as a json object containing "line" and "column"
+   attributes (a serialization of a firehose.model.Point python object).
+
+   If successful, write a location_t to OUT_VALUE, using GIVENPATH as the
+   filename, and return true.
+   Otherwise, write an error message to OUT_ERR (which must be freed by
+   the caller) and return false. */
+
+static bool
+get_location_from_point (const char *givenpath, const json::value *jv,
+			 location_t &out_value, char *&out_err)
+{
+  int line;
+  if (!jv->get_int_by_key ("line", line, out_err))
+    return false;
+
+  int column;
+  if (!jv->get_int_by_key ("column", column, out_err))
+    return false;
+
+  out_value
+   = linemap_position_for_file_line_and_column (line_table,
+						givenpath, line, column);
+  return true;
+}
+
+/* As above, but expect JV to be a json object containing a "start"
+   and "end" (a serialization of a firehose.model.Range python object).  */
+
+static bool
+get_location_from_range (const char *givenpath, const json::value *jv,
+			 location_t &out_value, char *&out_err)
+{
+  const json::value *jv_start;
+  if (!jv->get_value_by_key ("start", jv_start, out_err))
+    return false;
+
+  location_t start;
+  if (!get_location_from_point (givenpath, jv_start,
+				start, out_err))
+    return false;
+
+  const json::value *jv_end;
+  if (!jv->get_value_by_key ("end", jv_end, out_err))
+    return false;
+  location_t end;
+  if (!get_location_from_point (givenpath, jv_end,
+				end, out_err))
+    return false;
+
+  out_value = make_location (start, start, end);
+  return true;
+}
+
+/* Attempt to extract an attribute "location" from JV, where the value
+   ought to be a serialization of a firehose.model.Location python object.
+
+   If successful, write a location_t to OUT_VALUE and return true.
+   Otherwise, write an error message to OUT_ERR (which must be freed by
+   the caller) and return false. */
+
+static bool
+get_location (const json::value *jv, location_t &out_value, char *&out_err)
+{
+  const json::value *location;
+  if (!jv->get_value_by_key ("location", location, out_err))
+    return false;
+
+  const json::value *file;
+  if (!location->get_value_by_key ("file", file, out_err))
+    return false;
+  const char *givenpath;
+  if (!file->get_string_by_key ("givenpath", givenpath, out_err))
+    return false;
+
+  const json::value *point = location->as_object ()->get_if_nonnull ("point");
+  if (point)
+    {
+      if (!get_location_from_point (givenpath, point, out_value, out_err))
+	return false;
+    }
+  else
+    {
+      const json::value *range
+	= location->as_object ()->get_if_nonnull ("range_");
+
+      if (range)
+	{
+	  if (!get_location_from_range (givenpath, range, out_value,
+					out_err))
+	    return false;
+	}
+    }
+
+  // ignore "function" for now
+  return true;
+}
+
+/* firehose::state's ctor.  */
+
+state::state () : m_location (UNKNOWN_LOCATION), m_notes (NULL)
+{
+}
+
+/* firehose::state's dtor.  */
+
+state::~state ()
+{
+  free (m_notes);
+}
+
+/* Attempt to allocate a new firehose::state based on JV, which ought to be a
+   serialization of a firehose.model.State python object.
+
+   Return the new state if successful.
+   Otherwise return NULL and write an error message to OUT_ERR
+   (which must be freed by the caller).  */
+
+state *
+state::from_json (const json::value *jv, char *&out_err)
+{
+  state *s = new state ();
+
+  /* Extract the state's location to m_location.  */
+  if (!get_location (jv, s->m_location, out_err))
+    {
+      delete s;
+      return NULL;
+    }
+
+  /* Get any notes.  */
+  json::value *notes = jv->as_object ()->get_if_nonnull ("notes");
+  if (notes)
+    {
+      const char *text;
+      if (!notes->get_string_by_key ("text", text, out_err))
+	{
+	  delete s;
+	  return NULL;
+	}
+      s->m_notes = xstrdup (text);
+    }
+
+  return s;
+}
+
+/* firehose::trace's dtor.  */
+
+trace::~trace ()
+{
+  int i;
+  state *state;
+  FOR_EACH_VEC_ELT (m_states, i, state)
+    delete state;
+}
+
+/* Attempt to allocate a new firehose::trace based on JV, which ought to be a
+   serialization of a firehose.model.State python object.
+
+   Return the new state if successful.
+   Otherwise return NULL and write an error message to OUT_ERR
+   (which must be freed by the caller).  */
+
+trace *
+trace::from_json (const json::value *jv, char *&out_err)
+{
+  const json::array *states;
+  if (!jv->get_array_by_key ("states", states, out_err))
+    return NULL;
+
+  trace *t = new trace ();
+  for (unsigned idx = 0; idx < states->get_length (); idx++)
+    {
+      const json::value *item = states->get (idx);
+      if (0)
+	{
+	  fprintf (stderr, "got state %i: ", idx);
+	  item->dump (stderr);
+	  fprintf (stderr, "\n");
+	}
+      firehose::state *state = state::from_json (item, out_err);
+      if (!state)
+	{
+	  delete t;
+	  return NULL;
+	}
+      t->m_states.safe_push (state);
+    }
+
+  return t;
+}
+
+/* Filter out the states to just those with notes.  */
+
+void
+trace::filter ()
+{
+  unsigned idx = 0;
+  while (idx < m_states.length ())
+    {
+      if (m_states[idx]->m_notes == NULL)
+	{
+	  delete m_states[idx];
+	  m_states.ordered_remove (idx);
+	}
+      else
+	idx++;
+    }
+}
+
+/* Determine if THIS trace is merely a single state that duplicates
+   the information within ISSUE.  */
+
+bool
+trace::is_redundant_p (const issue& issue) const
+{
+  if (m_states.length () > 1)
+    return false;
+  if (m_states.length () < 1)
+    return true;
+
+  state *s0 = m_states[0];
+
+  if (s0->m_location != issue.m_location)
+    return false;
+  if (s0->m_notes)
+    if (0 != strcmp (s0->m_notes, issue.m_message))
+      return false;
+
+  /* Single state, with same location, and same message as ISSUE.  */
+  return true;
+}
+
+/* firehose::result's ctor.  */
+
+result::result ()
+: m_message (NULL), m_location (UNKNOWN_LOCATION)
+{
+}
+
+/* firehose::result's dtor.  */
+
+result::~result ()
+{
+  free (m_message);
+}
+
+/* Attempt to allocate a new firehose::result based on JV, which ought to be a
+   serialization of a firehose.model.Result python object.
+
+   Return the new state if successful.
+   Otherwise return NULL and write an error message to OUT_ERR
+   (which must be freed by the caller).  */
+
+result *
+result::from_json (const json::value *jv, char *&out_err)
+{
+  const char *type;
+  if (!jv->get_string_by_key ("type", type, out_err))
+    return NULL;
+  result *result = NULL;
+  if (0 == strcmp (type, "Issue"))
+    {
+      result = issue::from_json (jv, out_err);
+    }
+  if (0 == strcmp (type, "Info"))
+    {
+      result = info::from_json (jv, out_err);
+    }
+  if (0 == strcmp (type, "Failure"))
+    {
+      result = failure::from_json (jv, out_err);
+    }
+  if (!result)
+    {
+      out_err = xstrdup ("unrecognized type of result");
+      delete result;
+      return NULL;
+    }
+
+  /* Extract the results's message's text to m_message.  */
+  const json::value *message;
+  if (!jv->get_value_by_key ("message", message, out_err))
+    {
+      delete result;
+      return NULL;
+    }
+  const char *message_text;
+  if (!message->get_string_by_key ("text", message_text, out_err))
+    {
+      delete result;
+      return NULL;
+    }
+  result->m_message = xstrdup (message_text);
+
+  /* Extract the result's location to m_location.  */
+  if (!get_location (jv, result->m_location, out_err))
+    {
+      delete result;
+      return NULL;
+    }
+
+  return result;
+}
+
+/* firehose::issue's ctor.  */
+
+issue::issue () : result (), m_testid (NULL), m_trace (NULL)
+{
+}
+
+/* firehose::issue's dtor.  */
+
+issue::~issue ()
+{
+  free (m_testid);
+  delete m_trace;
+}
+
+/* Attempt to allocate a new firehose::issue based on JV, which ought to be a
+   serialization of a firehose.model.Issue python object.
+
+   Return the new state if successful.
+   Otherwise return NULL and write an error message to OUT_ERR
+   (which must be freed by the caller).  */
+
+issue *
+issue::from_json (const json::value *jv, char *&out_err)
+{
+  issue *r = new issue ();
+
+  /* FIXME: get any testid.  */
+  const char *testid_text = NULL;
+  if (!jv->get_optional_string_by_key ("testid", testid_text, out_err))
+    {
+      delete r;
+      return NULL;
+    }
+  if (testid_text)
+    r->m_testid = xstrdup (testid_text);
+
+  /* Get any trace as m_trace.  */
+  const json::value *trace = jv->as_object ()->get_if_nonnull ("trace");
+  if (trace)
+    {
+      r->m_trace = trace::from_json (trace, out_err);
+      if (!r->m_trace)
+	{
+	  delete r;
+	  return NULL;
+	}
+    }
+
+  return r;
+}
+
+/* firehose::info's ctor.  */
+
+info::info () : result (), m_infoid (NULL)
+{
+}
+
+/* firehose::info's dtor.  */
+
+info::~info ()
+{
+  free (m_infoid);
+}
+
+/* Attempt to allocate a new firehose::info based on JV, which ought to be a
+   serialization of a firehose.model.Info python object.
+
+   Return the new state if successful.
+   Otherwise return NULL and write an error message to OUT_ERR
+   (which must be freed by the caller).  */
+
+info *
+info::from_json (const json::value *jv, char *&out_err)
+{
+  info *r = new info ();
+
+  /* FIXME: get any infoid.  */
+  const char *infoid_text = NULL;
+  if (!jv->get_optional_string_by_key ("infoid", infoid_text, out_err))
+    {
+      delete r;
+      return NULL;
+    }
+  if (infoid_text)
+    r->m_infoid = xstrdup (infoid_text);
+
+  return r;
+}
+
+/* firehose::failure's ctor.  */
+
+failure::failure () : result (), m_failureid (NULL)
+{
+}
+
+/* firehose::failure's dtor.  */
+
+failure::~failure ()
+{
+  free (m_failureid);
+}
+
+/* Attempt to allocate a new firehose::failure based on JV, which ought to be a
+   serialization of a firehose.model.Failure python object.
+
+   Return the new state if successful.
+   Otherwise return NULL and write an error message to OUT_ERR
+   (which must be freed by the caller).  */
+
+failure *
+failure::from_json (const json::value *jv, char *&out_err)
+{
+  failure *r = new failure ();
+
+  /* FIXME: get any failureid.  */
+  const char *failureid_text = NULL;
+  if (!jv->get_optional_string_by_key ("failureid", failureid_text, out_err))
+    {
+      delete r;
+      return NULL;
+    }
+  if (failureid_text)
+    r->m_failureid = xstrdup (failureid_text);
+
+  return r;
+}
+
+/* firehose::generator's ctor.  */
+
+generator::generator ()
+: m_name (NULL), m_version (NULL)
+{
+}
+
+/* firehose::generator's dtor.  */
+
+generator::~generator ()
+{
+  free (m_name);
+  free (m_version);
+}
+
+/* Attempt to populate this firehose::generator based on JV, which ought to be a
+   serialization of a firehose.model.Generator python object.
+
+   Return true if successful.
+   Otherwise return false and write an error message to OUT_ERR
+   (which must be freed by the caller).  */
+
+bool
+generator::from_json (const json::value *jv, char *&out_err)
+{
+  const char *name;
+  if (!jv->get_string_by_key ("name", name, out_err))
+    return false;
+  m_name = xstrdup (name);
+
+  const char *version = NULL;
+  if (!jv->get_optional_string_by_key ("version", version, out_err))
+    return false;
+  if (version)
+    m_version = xstrdup (version);
+
+  return true;
+}
+
+/* Attempt to populate this firehose::metadata based on JV, which ought to be a
+   serialization of a firehose.model.Metadata python object.
+
+   Return true if successful.
+   Otherwise return false and write an error message to OUT_ERR
+   (which must be freed by the caller).  */
+
+bool
+metadata::from_json (const json::value *jv, char *&out_err)
+{
+  const json::value *jv_generator = NULL;
+  if (!jv->get_value_by_key ("generator", jv_generator, out_err))
+    return false;
+  if (!m_generator.from_json (jv_generator, out_err))
+    return false;
+
+  return true;
+}
+
+/* firehose::analysis's dtor.  */
+
+analysis::~analysis ()
+{
+  int i;
+  result *result;
+  FOR_EACH_VEC_ELT (m_results, i, result)
+    delete result;
+}
+
+/* Attempt to populate this firehose::analysis based on JV, which ought to be a
+   serialization of a firehose.model.Analysis python object.
+
+   Return true if successful.
+   Otherwise return false and write an error message to OUT_ERR
+   (which must be freed by the caller).  */
+
+bool
+analysis::from_json (const json::value *jv, char *&out_err)
+{
+  const json::value *jv_metadata = NULL;
+  if (!jv->get_value_by_key ("metadata", jv_metadata, out_err))
+    return false;
+  if (!m_metadata.from_json (jv_metadata, out_err))
+    return false;
+
+  const json::array *results;
+  if (!jv->get_array_by_key ("results", results, out_err))
+    return false;
+
+  for (unsigned i = 0; i < results->get_length (); i++)
+    {
+      json::value *item = results->get (i);
+      //error ("%s", item->to_str ());
+      result *r = result::from_json (item, out_err);
+      if (!r)
+	return false;
+      m_results.safe_push (r);
+  }
+
+  // FIXME: custom fields
+  // FIXME: selftests for all of this
+
+  return true;
+}
+
+} // namespace firehose
+
+
+#if CHECKING_P
+
+namespace selftest {
+
+/* Selftests.  */
+
+/* Given JSONFILE, a path relative to SRCDIR/gcc/testsuite/selftests,
+   load the json Firehose file there, populating OUT.
+   Fail if any errors occur.  */
+
+static void
+get_analysis (firehose::analysis &out, const char *jsonfile)
+{
+  char *filename = locate_file (jsonfile);
+  char *buffer = selftest::read_file (SELFTEST_LOCATION, filename);
+  ASSERT_TRUE (buffer != NULL);
+  free (filename);
+
+  char *err = NULL;
+  json::value *jv = json::parse_utf8_string (buffer, &err);
+  free (buffer);
+  ASSERT_TRUE (err == NULL);
+  ASSERT_TRUE (jv != NULL);
+
+  //jv->dump(stderr);
+  out.from_json (jv, err);
+  ASSERT_TRUE (err == NULL);
+  delete jv;
+}
+
+/* Parse a sample JSON output generated via the firehose parser for the
+   clang analyzer's plist output, and verify various properties
+   about it.  */
+
+static void
+test_parsing_clang_analyzer ()
+{
+  firehose::analysis analysis;
+  get_analysis (analysis, "checker-output/test-clang-analyzer.json");
+
+  ASSERT_STREQ ("clang-analyzer", analysis.m_metadata.m_generator.m_name);
+  ASSERT_EQ (NULL, analysis.m_metadata.m_generator.m_version);
+
+  ASSERT_EQ (1, analysis.m_results.length ());
+  firehose::result *r = analysis.m_results[0];
+  ASSERT_EQ (r->get_kind (), firehose::result::FIREHOSE_ISSUE);
+
+  firehose::issue *issue = (firehose::issue *)r;
+  ASSERT_STREQ ("Address of stack memory associated with"
+		" local variable 'tmp' returned to caller",
+		issue->m_message);
+  ASSERT_EQ (NULL, issue->m_testid);
+
+  ASSERT_LOCEQ ("../../src/bogus.c", 5, 3, issue->m_location);
+
+  ASSERT_TRUE (issue->m_trace != NULL);
+  ASSERT_EQ (3, issue->m_trace->m_states.length ());
+  firehose::state *state0 = issue->m_trace->m_states[0];
+  ASSERT_LOCEQ ("../../src/bogus.c", 3, 3, state0->m_location);
+  ASSERT_EQ (NULL, state0->m_notes);
+
+  firehose::state *state1 = issue->m_trace->m_states[1];
+  ASSERT_LOCEQ ("../../src/bogus.c", 5, 3, state1->m_location);
+  ASSERT_EQ (NULL, state1->m_notes);
+
+  firehose::state *state2 = issue->m_trace->m_states[2];
+  ASSERT_LOCEQ ("../../src/bogus.c", 5, 3, state2->m_location);
+  ASSERT_STREQ ("Address of stack memory associated with"
+		" local variable 'tmp' returned to caller",
+		state2->m_notes);
+  ASSERT_FALSE (issue->m_trace->is_redundant_p (*issue));
+
+  /* Verify filtering out non-textual states from the trace.  */
+  issue->m_trace->filter ();
+  ASSERT_EQ (1, issue->m_trace->m_states.length ());
+
+  /* Verify that the filtered trace is redundant.  */
+  ASSERT_TRUE (issue->m_trace->is_redundant_p (*issue));
+}
+
+/* Parse a sample JSON output generated via the firehose parser for
+   cppchecks's output, and verify various properties about it.  */
+
+static void
+test_parsing_cppcheck ()
+{
+  firehose::analysis analysis;
+  get_analysis (analysis, "checker-output/test-cppcheck.json");
+
+  ASSERT_STREQ ("cppcheck", analysis.m_metadata.m_generator.m_name);
+  ASSERT_STREQ ("1.63", analysis.m_metadata.m_generator.m_version);
+
+  ASSERT_EQ (1, analysis.m_results.length ());
+  firehose::result *r = analysis.m_results[0];
+  ASSERT_EQ (r->get_kind (), firehose::result::FIREHOSE_ISSUE);
+
+  firehose::issue *issue = (firehose::issue *)r;
+  ASSERT_STREQ ("Memory leak: ptr_1", issue->m_message);
+  ASSERT_STREQ ("memleak", issue->m_testid);
+
+  ASSERT_LOCEQ ("../../src/test-sources/conditional-leak.c", 11, 0,
+		issue->m_location);
+
+  ASSERT_TRUE (issue->m_trace == NULL);
+}
+
+/* Parse a JSON file describing a failure to run a checker, and verify
+   various properties about it.  */
+
+static void
+test_parsing_failure ()
+{
+  firehose::analysis analysis;
+  get_analysis (analysis, "checker-output/test-failure.json");
+
+  ASSERT_STREQ ("always-fails", analysis.m_metadata.m_generator.m_name);
+
+  ASSERT_EQ (1, analysis.m_results.length ());
+  firehose::result *r = analysis.m_results[0];
+  ASSERT_EQ (r->get_kind (), firehose::result::FIREHOSE_FAILURE);
+
+  firehose::failure *failure = (firehose::failure *)r;
+  ASSERT_STREQ ("Exception running always-fails: [Errno 2]"
+		" No such file or directory:"
+		" '/this/executable/does/not/exist'", failure->m_message);
+  ASSERT_STREQ ("exception", failure->m_failureid);
+}
+
+/* Run all of the selftests within this file.  */
+
+void
+firehose_cc_tests ()
+{
+  test_parsing_clang_analyzer ();
+  test_parsing_cppcheck ();
+  //test_parsing_info ();
+  test_parsing_failure ();
+}
+
+} // namespace selftest
+
+#endif /* #if CHECKING_P */
diff --git a/gcc/firehose.h b/gcc/firehose.h
new file mode 100644
index 0000000..6c67b45
--- /dev/null
+++ b/gcc/firehose.h
@@ -0,0 +1,199 @@
+/* Serialization format for checker results.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_FIREHOSE_H
+#define GCC_FIREHOSE_H
+
+/* "Firehose" is a serialization format for results from code
+   analysis tools:
+
+     http://firehose.readthedocs.io/en/latest/index.html
+
+   (along with a Python module for working with the format).
+
+   This file implements a set of C++ classes modeling the format,
+   with support for populating them from a JSON dump, so that we
+   can lossly serialize diagnostics and other static analysis results.  */
+
+#include "json.h"
+
+namespace firehose {
+
+/* Forward decls.  */
+
+struct issue;
+
+/* A state within a firehose::trace.  */
+
+struct state
+{
+  state ();
+  ~state ();
+
+  static state *from_json (const json::value *jv, char *&out_err);
+
+  location_t m_location;
+  char *m_notes;
+};
+
+/* An optional list of events within an issue that describe the circumstances
+   leading up to a problem.  */
+
+struct trace
+{
+  ~trace ();
+
+  static trace *from_json (const json::value *jv, char *&out_err);
+
+  void filter ();
+
+  /* If we're just left with a single state that duplicates what we
+     already printed for the issue, don't bother printing it.  */
+  bool is_redundant_p (const issue& issue) const;
+
+  auto_vec <state *> m_states;
+};
+
+/* firehose::result is a base class.
+
+   There are three subclasses:
+
+   - a firehose::issue represents a report from an analyzer about a possible
+     problem with the software under test.
+   - a firehose::info represents additional kinds of information generated by
+     an analyzer that isn't a problem per-se e.g. code metrics, licensing info,
+     etc.
+   - a firehose::failure represents a report about a failure of the analyzer
+     itself (e.g. if the analyzer crashed).  */
+
+struct result
+{
+  enum kind
+  {
+    FIREHOSE_ISSUE,
+    FIREHOSE_INFO,
+    FIREHOSE_FAILURE
+  };
+
+  result ();
+  virtual ~result ();
+
+  static result *from_json (const json::value *jv, char *&out_err);
+
+  virtual enum kind get_kind () const = 0;
+
+  char *m_message;
+  location_t m_location;
+};
+
+/* An issue represents a report from an analyzer about a possible problem
+   with the software under test.  */
+
+struct issue : public result
+{
+  issue ();
+  ~issue ();
+
+  static issue *from_json (const json::value *jv, char *&out_err);
+  enum kind get_kind () const FINAL OVERRIDE { return FIREHOSE_ISSUE; }
+
+  char *m_testid;
+  trace *m_trace;
+};
+
+/* An info represents additional kinds of information generated by an analyzer
+   that isn't a problem per-se e.g. code metrics, licensing info,
+   cross-referencing information, etc.  */
+
+struct info : public result
+{
+  info ();
+  ~info ();
+
+  static info *from_json (const json::value *jv, char *&out_err);
+  enum kind get_kind () const FINAL OVERRIDE { return FIREHOSE_INFO; }
+
+  char *m_infoid;
+};
+
+/* A failure represents a report about a failure of the analyzer itself
+   (e.g. if the analyzer crashed).
+
+   If any of these are present then we don't have full coverage.
+
+   For some analyzers this is an all-or-nothing affair: we either get
+   issues reported, or a failure happens (e.g. a segfault of the
+   analysis tool).
+
+   Other analyzers may be more fine-grained: able to report some
+   issues, but choke on some subset of the code under analysis.
+   For example cpychecker runs once per function, and any unhandled
+   Python exceptions only affect one function.  */
+
+struct failure : public result
+{
+  failure ();
+  ~failure ();
+
+  static failure *from_json (const json::value *jv, char *&out_err);
+  enum kind get_kind () const FINAL OVERRIDE { return FIREHOSE_FAILURE; }
+
+  char *m_failureid;
+};
+
+/* A class describing a static analyzer, for use within firehose::metadata.  */
+
+struct generator
+{
+  generator ();
+  ~generator ();
+
+  bool from_json (const json::value *jv, char *&out_err);
+
+  char *m_name;
+  char *m_version;
+};
+
+/* The firehose::metadata class contains metadata about a static analyzer
+   invocation.  */
+
+struct metadata
+{
+  bool from_json (const json::value *jv, char *&out_err);
+
+  generator m_generator;
+};
+
+/* The firehose::analysis class represents one invocation of a code analysis
+   tool.  */
+
+struct analysis
+{
+  ~analysis ();
+
+  bool from_json (const json::value *jv, char *&out_err);
+
+  metadata m_metadata;
+  auto_vec<result *> m_results;
+  //custom_fields *m_custom_fields;
+};
+
+} // namespace firehose
+
+#endif  /* GCC_FIREHOSE_H  */
diff --git a/gcc/selftest-run-tests.c b/gcc/selftest-run-tests.c
index 025e574..8afcd43 100644
--- a/gcc/selftest-run-tests.c
+++ b/gcc/selftest-run-tests.c
@@ -74,6 +74,7 @@ selftest::run_tests ()
   gimple_c_tests ();
   rtl_tests_c_tests ();
   read_rtl_function_c_tests ();
+  firehose_cc_tests ();
 
   /* Higher-level tests, or for components that other selftests don't
      rely on.  */
diff --git a/gcc/selftest.h b/gcc/selftest.h
index 4e8891c..e86ce38 100644
--- a/gcc/selftest.h
+++ b/gcc/selftest.h
@@ -177,6 +177,7 @@ extern void edit_context_c_tests ();
 extern void et_forest_c_tests ();
 extern void fold_const_c_tests ();
 extern void fibonacci_heap_c_tests ();
+extern void firehose_cc_tests ();
 extern void function_tests_c_tests ();
 extern void gimple_c_tests ();
 extern void ggc_tests_c_tests ();
diff --git a/gcc/testsuite/selftests/checker-output/test-clang-analyzer.json b/gcc/testsuite/selftests/checker-output/test-clang-analyzer.json
new file mode 100644
index 0000000..eda9abc
--- /dev/null
+++ b/gcc/testsuite/selftests/checker-output/test-clang-analyzer.json
@@ -0,0 +1,122 @@
+{
+ "customfields": {
+  "scan-build-invocation": "scan-build -v -plist --use-analyzer /usr/bin/clang -o /tmp/tmp8ytuRj gcc -B. -c ../../src/bogus.c", 
+  "returncode": 0, 
+  "stdout": "scan-build: Using '/usr/bin/clang' for static analysis\nscan-build: Emitting reports for this run to '/tmp/tmp8ytuRj/2017-05-24-001755-39710-1'.\nscan-build: Analysis run complete.\nscan-build: Analysis results (plist files) deposited in '/tmp/tmp8ytuRj/2017-05-24-001755-39710-1'\n", 
+  "stderr": "../../src/bogus.c: In function \u2018test\u2019:\n../../src/bogus.c:5:10: warning: function returns address of local variable [-Wreturn-local-addr]\n   return tmp;\n          ^~~\n../../src/bogus.c:5:3: warning: Address of stack memory associated with local variable 'tmp' returned to caller\n  return tmp;\n  ^~~~~~~~~~\n1 warning generated.\n", 
+  "plistpath": "/tmp/tmp8ytuRj/2017-05-24-001755-39710-1/report-DEoPmt.plist"
+ }, 
+ "results": [
+  {
+   "severity": null, 
+   "trace": {
+    "states": [
+     {
+      "notes": null, 
+      "location": {
+       "function": {
+        "name": ""
+       }, 
+       "range_": {
+        "start": {
+         "column": 3, 
+         "line": 3
+        }, 
+        "end": {
+         "column": 6, 
+         "line": 3
+        }
+       }, 
+       "file": {
+        "abspath": null, 
+        "givenpath": "../../src/bogus.c", 
+        "hash_": null
+       }, 
+       "point": null
+      }
+     }, 
+     {
+      "notes": null, 
+      "location": {
+       "function": {
+        "name": ""
+       }, 
+       "range_": {
+        "start": {
+         "column": 3, 
+         "line": 5
+        }, 
+        "end": {
+         "column": 8, 
+         "line": 5
+        }
+       }, 
+       "file": {
+        "abspath": null, 
+        "givenpath": "../../src/bogus.c", 
+        "hash_": null
+       }, 
+       "point": null
+      }
+     }, 
+     {
+      "notes": {
+       "text": "Address of stack memory associated with local variable 'tmp' returned to caller"
+      }, 
+      "location": {
+       "function": {
+        "name": ""
+       }, 
+       "range_": null, 
+       "file": {
+        "abspath": null, 
+        "givenpath": "../../src/bogus.c", 
+        "hash_": null
+       }, 
+       "point": {
+        "column": 3, 
+        "line": 5
+       }
+      }
+     }
+    ]
+   }, 
+   "type": "Issue", 
+   "notes": null, 
+   "testid": null, 
+   "message": {
+    "text": "Address of stack memory associated with local variable 'tmp' returned to caller"
+   }, 
+   "cwe": null, 
+   "customfields": null, 
+   "location": {
+    "function": null, 
+    "range_": null, 
+    "file": {
+     "abspath": null, 
+     "givenpath": "../../src/bogus.c", 
+     "hash_": null
+    }, 
+    "point": {
+     "column": 3, 
+     "line": 5
+    }
+   }
+  }
+ ], 
+ "metadata": {
+  "stats": {
+   "wallclocktime": 0.22788214683532715
+  }, 
+  "sut": null, 
+  "file_": {
+   "abspath": "/home/david/coding-3/gcc-git-static-analysis/build/gcc/../../src/bogus.c", 
+   "givenpath": "../../src/bogus.c", 
+   "hash_": null
+  }, 
+  "generator": {
+   "version": null, 
+   "name": "clang-analyzer"
+  }
+ }
+}
\ No newline at end of file
diff --git a/gcc/testsuite/selftests/checker-output/test-cppcheck.json b/gcc/testsuite/selftests/checker-output/test-cppcheck.json
new file mode 100644
index 0000000..c9651ee
--- /dev/null
+++ b/gcc/testsuite/selftests/checker-output/test-cppcheck.json
@@ -0,0 +1,50 @@
+{
+ "customfields": {
+  "cppcheck-invocation": "cppcheck --xml --xml-version=2 ../../src/test-sources/conditional-leak.c", 
+  "returncode": 0, 
+  "stdout": "Checking ../../src/test-sources/conditional-leak.c...\n", 
+  "stderr": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<results version=\"2\">\n    <cppcheck version=\"1.63\"/>\n    <errors>\n        <error id=\"memleak\" severity=\"error\" msg=\"Memory leak: ptr_1\" verbose=\"Memory leak: ptr_1\">\n            <location file=\"../../src/test-sources/conditional-leak.c\" line=\"11\"/>\n        </error>\n    </errors>\n</results>\n"
+ }, 
+ "results": [
+  {
+   "severity": "error", 
+   "trace": null, 
+   "type": "Issue", 
+   "notes": null, 
+   "testid": "memleak", 
+   "message": {
+    "text": "Memory leak: ptr_1"
+   }, 
+   "cwe": null, 
+   "customfields": null, 
+   "location": {
+    "function": null, 
+    "range_": null, 
+    "file": {
+     "abspath": null, 
+     "givenpath": "../../src/test-sources/conditional-leak.c", 
+     "hash_": null
+    }, 
+    "point": {
+     "column": 0, 
+     "line": 11
+    }
+   }
+  }
+ ], 
+ "metadata": {
+  "stats": {
+   "wallclocktime": 0.006749868392944336
+  }, 
+  "sut": null, 
+  "file_": {
+   "abspath": "/home/david/coding-3/gcc-git-static-analysis/build/gcc/../../src/test-sources/conditional-leak.c", 
+   "givenpath": "../../src/test-sources/conditional-leak.c", 
+   "hash_": null
+  }, 
+  "generator": {
+   "version": "1.63", 
+   "name": "cppcheck"
+  }
+ }
+}
\ No newline at end of file
diff --git a/gcc/testsuite/selftests/checker-output/test-failure.json b/gcc/testsuite/selftests/checker-output/test-failure.json
new file mode 100644
index 0000000..fd07cab
--- /dev/null
+++ b/gcc/testsuite/selftests/checker-output/test-failure.json
@@ -0,0 +1,38 @@
+{
+ "customfields": {
+  "traceback": "Traceback (most recent call last):\n  File \"/home/david/coding-3/gcc-git-static-analysis/src/checkers/checker.py\", line 142, in checked_invoke\n    analysis = self.raw_invoke(gccinv, sourcefile)\n  File \"./checkers/always_fails.py\", line 40, in raw_invoke\n    return self._run_subprocess(sourcefile, args)\n  File \"/home/david/coding-3/gcc-git-static-analysis/src/checkers/checker.py\", line 213, in _run_subprocess\n    stdout=PIPE, stderr=PIPE, env=env)\n  File \"/usr/lib64/python2.7/site-packages/subprocess32.py\", line 812, in __init__\n    restore_signals, start_new_session)\n  File \"/usr/lib64/python2.7/site-packages/subprocess32.py\", line 1557, in _execute_child\n    raise child_exception_type(errno_num, err_msg)\nOSError: [Errno 2] No such file or directory: '/this/executable/does/not/exist'\n"
+ }, 
+ "results": [
+  {
+   "type": "Failure", 
+   "message": {
+    "text": "Exception running always-fails: [Errno 2] No such file or directory: '/this/executable/does/not/exist'"
+   }, 
+   "failureid": "exception", 
+   "location": {
+    "function": null, 
+    "range_": null, 
+    "file": {
+     "abspath": null, 
+     "givenpath": "checkers/test-sources/harmless.c", 
+     "hash_": null
+    }, 
+    "point": null
+   }, 
+   "customfields": null
+  }
+ ], 
+ "metadata": {
+  "stats": null, 
+  "sut": null, 
+  "file_": {
+   "abspath": "/home/david/coding-3/gcc-git-static-analysis/src/checkers/test-sources/harmless.c", 
+   "givenpath": "checkers/test-sources/harmless.c", 
+   "hash_": null
+  }, 
+  "generator": {
+   "version": null, 
+   "name": "always-fails"
+  }
+ }
+}
\ No newline at end of file
-- 
1.8.5.3

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH 11/22] Add checkers/test-sources
  2017-08-04 21:30 [PATCH 00/22] RFC: integrated 3rd-party static analysis support David Malcolm
                   ` (12 preceding siblings ...)
  2017-08-04 21:37 ` [PATCH 15/22] Add checkers/clang_analyzer.py David Malcolm
@ 2017-08-04 21:37 ` David Malcolm
  2017-08-04 21:38 ` [PATCH 14/22] Add checkers/always_fails.py David Malcolm
                   ` (11 subsequent siblings)
  25 siblings, 0 replies; 35+ messages in thread
From: David Malcolm @ 2017-08-04 21:37 UTC (permalink / raw)
  To: gcc-patches; +Cc: David Malcolm

This patch adds a collection of test sources for use by the testsuites
of the various checker harnesses.

checkers/ChangeLog:
	* test-sources/conditional-leak.c: New file.
	* test-sources/cpychecker-demo.c: New file.
	* test-sources/divide-by-zero.c: New file.
	* test-sources/harmless.c: New file.
	* test-sources/multiple-1.c: New file.
	* test-sources/multiple-2.c: New file.
	* test-sources/out-of-bounds.c: New file.
	* test-sources/read-through-null.c: New file.
	* test-sources/return-of-stack-address.c: New file.
	* test-sources/unconditional-file-leak.c: New file.
---
 checkers/test-sources/conditional-leak.c        |  17 ++++
 checkers/test-sources/cpychecker-demo.c         | 110 ++++++++++++++++++++++++
 checkers/test-sources/divide-by-zero.c          |   4 +
 checkers/test-sources/harmless.c                |   9 ++
 checkers/test-sources/multiple-1.c              |   6 ++
 checkers/test-sources/multiple-2.c              |   9 ++
 checkers/test-sources/out-of-bounds.c           |   6 ++
 checkers/test-sources/read-through-null.c       |   4 +
 checkers/test-sources/return-of-stack-address.c |   6 ++
 checkers/test-sources/unconditional-file-leak.c |  10 +++
 10 files changed, 181 insertions(+)
 create mode 100644 checkers/test-sources/conditional-leak.c
 create mode 100644 checkers/test-sources/cpychecker-demo.c
 create mode 100644 checkers/test-sources/divide-by-zero.c
 create mode 100644 checkers/test-sources/harmless.c
 create mode 100644 checkers/test-sources/multiple-1.c
 create mode 100644 checkers/test-sources/multiple-2.c
 create mode 100644 checkers/test-sources/out-of-bounds.c
 create mode 100644 checkers/test-sources/read-through-null.c
 create mode 100644 checkers/test-sources/return-of-stack-address.c
 create mode 100644 checkers/test-sources/unconditional-file-leak.c

diff --git a/checkers/test-sources/conditional-leak.c b/checkers/test-sources/conditional-leak.c
new file mode 100644
index 0000000..2ab46f5
--- /dev/null
+++ b/checkers/test-sources/conditional-leak.c
@@ -0,0 +1,17 @@
+#include <stdlib.h>
+
+void test ()
+{
+  void *ptr_1;
+  void *ptr_2;
+
+  ptr_1 = malloc (64);
+  if (!ptr_1)
+    return;
+  ptr_2 = malloc (64);
+  if (!ptr_2)
+    return;
+
+  free (ptr_2);
+  free (ptr_1);
+}
diff --git a/checkers/test-sources/cpychecker-demo.c b/checkers/test-sources/cpychecker-demo.c
new file mode 100644
index 0000000..b379729
--- /dev/null
+++ b/checkers/test-sources/cpychecker-demo.c
@@ -0,0 +1,110 @@
+/*
+   Copyright 2011 David Malcolm <dmalcolm@redhat.com>
+   Copyright 2011 Red Hat, Inc.
+
+   This is free software: you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see
+   <http://www.gnu.org/licenses/>.
+*/
+
+/* Examples of mistakes made using the Python API */
+#include <Python.h>
+
+extern uint16_t htons(uint16_t hostshort);
+
+PyObject *
+socket_htons(PyObject *self, PyObject *args)
+{
+    unsigned long x1, x2;
+
+    if (!PyArg_ParseTuple(args, "i:htons", &x1)) {
+        return NULL;
+    }
+    x2 = (int)htons((short)x1);
+    return PyInt_FromLong(x2);
+}
+
+PyObject *
+not_enough_varargs(PyObject *self, PyObject *args)
+{
+   if (!PyArg_ParseTuple(args, "i")) {
+       return NULL;
+   }
+   Py_RETURN_NONE;
+}
+
+PyObject *
+too_many_varargs(PyObject *self, PyObject *args)
+{
+    int i, j;
+    if (!PyArg_ParseTuple(args, "i", &i, &j)) {
+	 return NULL;
+    }
+    Py_RETURN_NONE;
+}
+
+PyObject *
+kwargs_example(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+    double x, y;
+    char *keywords[] = {"x", "y"};
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "(ff):kwargs_example", keywords, &x, &y)) {
+	 return NULL;
+    }
+    Py_RETURN_NONE;
+}
+
+
+extern int convert_to_ssize(PyObject *, Py_ssize_t *);
+
+PyObject *
+buggy_converter(PyObject *self, PyObject *args)
+{
+    int i;
+
+    if (!PyArg_ParseTuple(args, "O&", convert_to_ssize, &i)) {
+        return NULL;
+    }
+
+    Py_RETURN_NONE;
+}
+
+PyObject *
+make_a_list_of_random_ints_badly(PyObject *self,
+                                 PyObject *args)
+{
+    PyObject *list, *item;
+    long count, i;
+
+    if (!PyArg_ParseTuple(args, "i", &count)) {
+         return NULL;
+    }
+
+    list = PyList_New(0);
+
+    for (i = 0; i < count; i++) {
+        item = PyLong_FromLong(random());
+        PyList_Append(list, item);
+    }
+
+    return list;
+}
+
+/*
+  PEP-7
+Local variables:
+c-basic-offset: 4
+indent-tabs-mode: nil
+End:
+*/
diff --git a/checkers/test-sources/divide-by-zero.c b/checkers/test-sources/divide-by-zero.c
new file mode 100644
index 0000000..f40692d
--- /dev/null
+++ b/checkers/test-sources/divide-by-zero.c
@@ -0,0 +1,4 @@
+int divide_by_zero (int i)
+{
+  return i / 0;
+}
diff --git a/checkers/test-sources/harmless.c b/checkers/test-sources/harmless.c
new file mode 100644
index 0000000..c29f0cc
--- /dev/null
+++ b/checkers/test-sources/harmless.c
@@ -0,0 +1,9 @@
+#include <stdio.h>
+
+int main (int argc, const char **argv)
+{
+  int i;
+  for (i = 0; i < argc; i++)
+    printf ("argv[%i]: %s\n", i, argv[i]);
+  return 0;
+}
diff --git a/checkers/test-sources/multiple-1.c b/checkers/test-sources/multiple-1.c
new file mode 100644
index 0000000..8d26c69
--- /dev/null
+++ b/checkers/test-sources/multiple-1.c
@@ -0,0 +1,6 @@
+extern int helper (int argc, const char **argv);
+
+int main (int argc, const char **argv)
+{
+  return helper (argc, argv);
+}
diff --git a/checkers/test-sources/multiple-2.c b/checkers/test-sources/multiple-2.c
new file mode 100644
index 0000000..a7a20ac
--- /dev/null
+++ b/checkers/test-sources/multiple-2.c
@@ -0,0 +1,9 @@
+#include <stdio.h>
+
+int helper (int argc, const char **argv)
+{
+  int i;
+  for (i = 0; i < argc; i++)
+    printf ("argv[%i]: %s\n", i, argv[i]);
+  return 0;
+}
diff --git a/checkers/test-sources/out-of-bounds.c b/checkers/test-sources/out-of-bounds.c
new file mode 100644
index 0000000..4137389
--- /dev/null
+++ b/checkers/test-sources/out-of-bounds.c
@@ -0,0 +1,6 @@
+int out_of_bounds (void)
+{
+  int arr[10];
+
+  return arr[15];
+}
diff --git a/checkers/test-sources/read-through-null.c b/checkers/test-sources/read-through-null.c
new file mode 100644
index 0000000..2f0450c
--- /dev/null
+++ b/checkers/test-sources/read-through-null.c
@@ -0,0 +1,4 @@
+int read_through_null (void)
+{
+  return *(int *)0;
+}
diff --git a/checkers/test-sources/return-of-stack-address.c b/checkers/test-sources/return-of-stack-address.c
new file mode 100644
index 0000000..66c8893
--- /dev/null
+++ b/checkers/test-sources/return-of-stack-address.c
@@ -0,0 +1,6 @@
+void *test (void)
+{
+  char tmp[16];
+
+  return tmp;
+}
diff --git a/checkers/test-sources/unconditional-file-leak.c b/checkers/test-sources/unconditional-file-leak.c
new file mode 100644
index 0000000..3c6655c
--- /dev/null
+++ b/checkers/test-sources/unconditional-file-leak.c
@@ -0,0 +1,10 @@
+#include <stdio.h>
+
+void test (const char *filename)
+{
+  int i;
+  FILE *f;
+  f = fopen (filename, "w");
+  for (i = 0; i < 10; i++)
+    fprintf (f, "%i: %i",  i,  i * i);
+}
-- 
1.8.5.3

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH 15/22] Add checkers/clang_analyzer.py
  2017-08-04 21:30 [PATCH 00/22] RFC: integrated 3rd-party static analysis support David Malcolm
                   ` (11 preceding siblings ...)
  2017-08-04 21:37 ` [PATCH 04/22] Add firehose.h/cc David Malcolm
@ 2017-08-04 21:37 ` David Malcolm
  2017-08-04 21:37 ` [PATCH 11/22] Add checkers/test-sources David Malcolm
                   ` (12 subsequent siblings)
  25 siblings, 0 replies; 35+ messages in thread
From: David Malcolm @ 2017-08-04 21:37 UTC (permalink / raw)
  To: gcc-patches; +Cc: David Malcolm

This patch adds a harness for invoking clang's static analyzer:
  https://clang-analyzer.llvm.org/
returning the results in JSON format.

It runs scan-build, then uses firehose.parsers.clanganalyzer.parse_plist
to parse the generated .plist file, turning them into firehose JSON.

checkers/ChangeLog:
	* clang_analyzer.py: New file.
---
 checkers/clang_analyzer.py | 145 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 145 insertions(+)
 create mode 100755 checkers/clang_analyzer.py

diff --git a/checkers/clang_analyzer.py b/checkers/clang_analyzer.py
new file mode 100755
index 0000000..ae41d93
--- /dev/null
+++ b/checkers/clang_analyzer.py
@@ -0,0 +1,145 @@
+#!/usr/bin/env python
+#   Copyright 2012, 2013, 2015, 2017 David Malcolm <dmalcolm@redhat.com>
+#   Copyright 2012, 2013, 2015, 2017 Red Hat, Inc.
+#
+#   This is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by
+#   the Free Software Foundation, either version 3 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#   General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program.  If not, see
+#   <http://www.gnu.org/licenses/>.
+
+import glob
+import os
+import sys
+import tempfile
+
+from gccinvocation import GccInvocation
+from firehose.model import Failure, Issue, Trace
+from firehose.parsers.clanganalyzer import parse_plist
+
+from checker import Checker, CheckerTests, make_file, make_stats, \
+    tool_main
+
+class InvokeClangAnalyzer(Checker):
+    """
+    Checker subclass that invokes the clang analyzer
+    """
+    name = 'clang-analyzer'
+
+    def raw_invoke(self, gccinv, sourcefile):
+        self.resultdir = tempfile.mkdtemp()
+        args = ['scan-build', '-v', '-plist',
+                '--use-analyzer', '/usr/bin/clang', # rhbz 923834
+                '-o', self.resultdir,
+                'gcc'] + gccinv.argv[1:]
+        return self._run_subprocess(sourcefile, args)
+
+    def handle_output(self, result):
+        if result.returncode:
+            analysis = self._make_failed_analysis(result.sourcefile, result.timer,
+                                                  msgtext='Bad exit code running %s' % self.name,
+                                                  failureid='bad-exit-code')
+            self.set_custom_fields(result, analysis)
+            return analysis
+
+        # Given e.g. resultdir='/tmp/tmpQW2l2B', the plist files
+        # are an extra level deep e.g.:
+        #  '/tmp/tmpQW2l2B/2013-01-22-1/report-MlwJri.plist'
+        self.log(self.resultdir)
+        for plistpath in glob.glob(os.path.join(self.resultdir,
+                                                '*/*.plist')):
+            analysis = parse_plist(plistpath,
+                                   file_=make_file(result.sourcefile),
+                                   stats=make_stats(result.timer))
+            self.set_custom_fields(result, analysis)
+            analysis.set_custom_field('plistpath', plistpath)
+            return analysis # could there be more than one?
+
+        # Not found?
+        analysis = self._make_failed_analysis(
+            result.sourcefile, result.timer,
+            msgtext='Unable to locate plist file',
+            failureid='plist-not-found')
+        self.set_custom_fields(result, analysis)
+        return analysis
+
+    def set_custom_fields(self, result, analysis):
+        analysis.set_custom_field('scan-build-invocation',
+                                  ' '.join(result.argv))
+        result.set_custom_fields(analysis)
+
+class ClangAnalyzerTests(CheckerTests):
+    def make_tool(self):
+        return self.make_tool_from_class(InvokeClangAnalyzer)
+
+    def verify_basic_metadata(self, analysis, sourcefile):
+        # Verify basic metadata:
+        self.assert_metadata(analysis, 'clang-analyzer', sourcefile)
+        self.assert_has_custom_field(analysis, 'scan-build-invocation')
+        self.assert_has_custom_field(analysis, 'stdout')
+        self.assert_has_custom_field(analysis, 'stderr')
+
+    def test_file_not_found(self):
+        analysis = self.invoke('does-not-exist.c')
+        #print(analysis)
+        self.assertEqual(len(analysis.results), 1)
+        self.assertIsInstance(analysis.results[0], Failure)
+        self.assertEqual(analysis.results[0].failureid, 'bad-exit-code')
+
+    def test_timeout(self):
+        sourcefile = 'test-sources/harmless.c'
+        tool = self.make_tool()
+        tool.timeout = 0
+        gccinv = GccInvocation(['gcc', sourcefile])
+        analysis = tool.checked_invoke(gccinv, sourcefile)
+        self.assert_metadata(analysis, 'clang-analyzer', sourcefile)
+        self.assertEqual(len(analysis.results), 1)
+        r0 = analysis.results[0]
+        self.assertIsInstance(r0, Failure)
+        self.assertEqual(r0.failureid, 'timeout')
+        self.assert_has_custom_field(analysis, 'timeout')
+        self.assert_has_custom_field(analysis, 'command-line')
+
+    def test_harmless_file(self):
+        analysis = self.invoke('test-sources/harmless.c')
+        self.assertEqual(len(analysis.results), 0)
+
+    def test_read_through_null(self):
+        analysis = self.invoke('test-sources/read-through-null.c')
+        self.assertEqual(len(analysis.results), 1)
+        r0 = analysis.results[0]
+        self.assertIsInstance(r0, Issue)
+        self.assertEqual(r0.testid, 'Dereference of null pointer')
+        self.assertEqual(r0.location.file.givenpath,
+                         'test-sources/read-through-null.c')
+        self.assertEqual(r0.location.point.line, 3)
+        self.assertEqual(r0.message.text,
+                         "Dereference of null pointer")
+        self.assertEqual(r0.severity, None)
+        self.assertIsInstance(r0.trace, Trace)
+
+    def test_out_of_bounds(self):
+        analysis = self.invoke('test-sources/out-of-bounds.c')
+        self.assertEqual(len(analysis.results), 1)
+
+        r0 = analysis.results[0]
+        self.assertIsInstance(r0, Issue)
+        self.assertEqual(r0.testid, 'Garbage return value')
+        self.assertEqual(r0.location.file.givenpath,
+                         'test-sources/out-of-bounds.c')
+        self.assertEqual(r0.location.point.line, 5)
+        self.assertEqual(r0.message.text,
+                         "Undefined or garbage value returned to caller")
+        self.assertEqual(r0.severity, None)
+        self.assertIsInstance(r0.trace, Trace)
+
+if __name__ == '__main__':
+    sys.exit(tool_main(sys.argv, InvokeClangAnalyzer))
-- 
1.8.5.3

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH 13/22] Add checkers/checker.py
  2017-08-04 21:30 [PATCH 00/22] RFC: integrated 3rd-party static analysis support David Malcolm
                   ` (16 preceding siblings ...)
  2017-08-04 21:38 ` [PATCH 06/22] Makefile.in: hack in -lpthread David Malcolm
@ 2017-08-04 21:38 ` David Malcolm
  2017-08-04 21:38 ` [PATCH 12/22] Add -Wrun-analyzers= to common.opt, toplev.c, and invoke.texi David Malcolm
                   ` (7 subsequent siblings)
  25 siblings, 0 replies; 35+ messages in thread
From: David Malcolm @ 2017-08-04 21:38 UTC (permalink / raw)
  To: gcc-patches; +Cc: David Malcolm

This patch:
* creates a new "checkers" top-level directory to hold
  harnesses for 3rd-party code-checking tools.
* adds a "checker.py" Python module for use when implementing
  such harnesses

3rd-party code-checking tools are expected to be passed
command-line arguments by the frontend, and to return a JSON
result on stdout; the job of each harness is to coerce the
output from the tool into the expected JSON output format.

The JSON format to be used is the "Firehose" serialization
format:
  http://firehose.readthedocs.io/en/latest/index.html

checkers/ChangeLog:
	* ChangeLog: New file.
	* checker.py: New file.
---
 checkers/ChangeLog  |   9 ++
 checkers/checker.py | 367 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 376 insertions(+)
 create mode 100644 checkers/ChangeLog
 create mode 100755 checkers/checker.py

diff --git a/checkers/ChangeLog b/checkers/ChangeLog
new file mode 100644
index 0000000..9189883
--- /dev/null
+++ b/checkers/ChangeLog
@@ -0,0 +1,9 @@
+2017-08-03  David Malcolm  <dmalcolm@redhat.com>
+
+	* ChangeLog: New ChangeLog file.
+
+Copyright (C) 2017 Free Software Foundation, Inc.
+
+Copying and distribution of this file, with or without modification,
+are permitted in any medium without royalty provided the copyright
+notice and this notice are preserved.
diff --git a/checkers/checker.py b/checkers/checker.py
new file mode 100755
index 0000000..262bd72
--- /dev/null
+++ b/checkers/checker.py
@@ -0,0 +1,367 @@
+#!/usr/bin/env python
+#   Copyright 2012, 2013, 2015, 2017 David Malcolm <dmalcolm@redhat.com>
+#   Copyright 2012, 2013, 2015, 2017 Red Hat, Inc.
+#
+#   This is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by
+#   the Free Software Foundation, either version 3 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#   General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program.  If not, see
+#   <http://www.gnu.org/licenses/>.
+
+"""
+A "checker" is an executable which takes GCC-style command-line
+arguments and writes a Firehose JSON file to stdout.
+"""
+
+import json
+import logging
+import os
+import re
+import sys
+import tempfile
+import time
+import traceback
+import unittest
+
+if sys.version_info[0] < 3:
+    # http://pypi.python.org/pypi/subprocess32
+    # so that we can use timeouts
+    from subprocess32 import Popen, PIPE, STDOUT, TimeoutExpired
+else:
+    from subprocess import Popen, PIPE, STDOUT, TimeoutExpired
+
+from firehose.model import Analysis, Generator, Metadata, Failure, \
+    Location, File, Message, Issue, Trace
+
+from gccinvocation import GccInvocation
+
+def make_file(givenpath):
+    from firehose.model import File
+    return File(givenpath=givenpath,
+                abspath=None,
+                hash_=None)
+
+def make_stats(timer):
+    from firehose.model import Stats
+    return Stats(wallclocktime=timer.get_elapsed_time())
+
+class Timer:
+    """
+    Simple measurement of wallclock time taken
+    """
+    def __init__(self):
+        self.starttime = time.time()
+
+    def get_elapsed_time(self):
+        """Get elapsed time in seconds as a float"""
+        curtime = time.time()
+        return curtime - self.starttime
+
+    def elapsed_time_as_str(self):
+        """Get elapsed time as a string (with units)"""
+        elapsed = self.get_elapsed_time()
+        result = '%0.3f seconds' % elapsed
+        if elapsed > 120:
+            result += ' (%i minutes)' % int(elapsed / 60)
+        return result
+
+class Context:
+    def __init__(self, name, enable_logging=False, capture_exceptions=True):
+        self.name = name
+        self.enable_logging = enable_logging
+        self.capture_exceptions = capture_exceptions
+        if self.enable_logging:
+            format_ = '%(asctime)s ' + name + ': %(message)s'
+            logging.basicConfig(format=format_,
+                                level=logging.INFO,
+                                stream=sys.stderr)
+            self.log('logging initialized')
+
+        self.stdout = sys.stdout
+        self.stderr = sys.stderr
+        self.returncode = None
+
+    def log(self, msg):
+        if self.enable_logging:
+            logging.info(msg)
+
+    def write_streams(self, toolname, out, err):
+        for line in out.splitlines():
+            self.log('stdout from %r: %s\n' % (toolname, line))
+        for line in err.splitlines():
+            self.log('stderr from %r: %s\n' % (toolname, line))
+
+class SubprocessResult:
+    """
+    A bundle of information relating to a subprocess invocation.
+    """
+    def __init__(self, sourcefile, argv, returncode, out, err, timer):
+        self.sourcefile = sourcefile
+        self.argv = argv
+        self.returncode = returncode
+        self.out = out
+        self.err = err
+        self.timer = timer
+
+    def set_custom_fields(self, analysis):
+        analysis.set_custom_field('returncode', self.returncode)
+        analysis.set_custom_field('stdout', self.out.decode('utf-8'))
+        analysis.set_custom_field('stderr', self.err.decode('utf-8'))
+
+    def __repr__(self):
+        return 'SubprocessResult(%r, %r, %r, %r, %r, %r)' \
+            % (self.sourcefile, self.argv, self.returncode,
+               self.out, self.err, self.timer)
+
+class Checker:
+    def __init__(self, ctxt):
+        self.name = ctxt.name
+        self.timeout = 60
+        self.ctxt = ctxt
+
+    def log(self, msg):
+        self.ctxt.log(msg)
+
+    def raw_invoke(self, gccinv, sourcefile):
+        """
+        Run the tool, with a timeout, returning an Analysis instance.
+        May well raise an exception if something major went wrong.
+        """
+        raise NotImplementedError
+
+    def checked_invoke(self, gccinv, sourcefile):
+        """
+        Call "invoke", handling exceptions.  Return an Analysis instance.
+        """
+        try:
+            self.log('about to invoke: %s with %r' % (self.name, gccinv))
+            analysis = self.raw_invoke(gccinv, sourcefile)
+        except TimeoutExpired:
+            analysis = self._make_failed_analysis(
+                sourcefile, None, msgtext='Timeout running %s' % self.name,
+                failureid='timeout')
+        except Exception as exc:
+            # Capture the exception as a Failure instance.
+            # Alternatively when debugging such failures, it can
+            # be easier to re-raise the exception:
+            if not self.ctxt.capture_exceptions:
+                raise
+            analysis = \
+                self._make_failed_analysis(
+                    sourcefile, None,
+                    msgtext=('Exception running %s: %s'
+                             % (self.name, exc)),
+                    failureid='exception')
+            tb_str = traceback.format_exc()
+            analysis.set_custom_field('traceback', tb_str)
+        if sourcefile:
+            if not analysis.metadata.file_:
+                analysis.metadata.file_ = make_file(givenpath=sourcefile)
+            analysis.metadata.file_.givenpath = sourcefile
+            analysis.metadata.file_.abspath = os.path.join(os.getcwd(),
+                                                           sourcefile)
+        return analysis
+
+    def handle_output(self, result):
+        """
+        Given a SubprocessResult, return an Analysis instance.
+        """
+        raise NotImplementedError
+
+    def _make_failed_analysis(self, sourcefile, t, msgtext, failureid):
+        """
+        Something went wrong; build a failure report.
+        """
+        generator = Generator(name=self.name,
+                              version=None)
+        if t:
+            stats = make_stats(t)
+        else:
+            stats = None
+
+        metadata = Metadata(generator=generator,
+                            sut=None,
+                            file_ = make_file(sourcefile),
+                            stats=stats)
+        file_ = File(givenpath=sourcefile,
+                     abspath=None,
+                     hash_=None)
+        location = Location(file=file_,
+                            function=None,
+                            point=None,
+                            range_=None)
+        message = Message(msgtext)
+        results = [Failure(failureid=failureid,
+                           location=location,
+                           message=message,
+                           customfields=None)]
+        analysis = Analysis(metadata, results)
+        return analysis
+
+    def _run_subprocess(self, sourcefile, argv, env=None):
+        """
+        Support for running the bulk of the side effect in a subprocess,
+        with timeout support.
+        """
+        self.log('%s: _run_subprocess(%r, %r)' % (self.name, sourcefile, argv))
+        if 0:
+            self.log('env: %s' % env)
+        p = Popen(argv,
+                  stdout=PIPE, stderr=PIPE, env=env)
+        try:
+            t = Timer()
+            out, err = p.communicate(timeout=self.timeout)
+            self.ctxt.write_streams(argv[0], out, err)
+            result = SubprocessResult(sourcefile, argv, p.returncode, out, err, t)
+            analysis = self.handle_output(result)
+            return analysis
+        except TimeoutExpired:
+            analysis = self._make_failed_analysis(sourcefile, t,
+                                                  msgtext='Timeout running %s' % self.name,
+                                                  failureid='timeout')
+            analysis.set_custom_field('timeout', self.timeout)
+            analysis.set_custom_field('command-line', ' '.join(argv))
+            return analysis
+
+    def run_subprocess(self, sourcefile, argv, env=None):
+        """
+        Support for running the bulk of the side effect in a subprocess,
+        with timeout support.
+        """
+        self.log('%s: run_subprocess(%r, %r)' % (self.name, sourcefile, argv))
+        if 0:
+            self.log('env: %s' % env)
+        p = Popen(argv, stdout=PIPE, stderr=PIPE, env=env)
+        t = Timer()
+        out, err = p.communicate(timeout=self.timeout)
+        self.ctxt.write_streams(argv[0], out, err)
+        result = SubprocessResult(sourcefile, argv, p.returncode, out, err, t)
+        return result
+
+############################################################################
+# Test suite
+############################################################################
+
+class CheckerTests(unittest.TestCase):
+    def make_ctxt(self, name, capture_exceptions=False):
+        return Context(name, enable_logging=0, capture_exceptions=capture_exceptions)
+
+    def make_tool_from_class(self, tool_class):
+        ctxt = self.make_ctxt(tool_class.name)
+        return tool_class(ctxt)
+
+    def make_tool(self):
+        """Hook for self.make_compiler()"""
+        raise NotImplementedError
+
+    def verify_basic_metadata(self, analysis, sourcefile):
+        """Hook for self.invoke()"""
+        raise NotImplementedError
+
+    def invoke(self, sourcefile, extraargs = None):
+        """Invoke a tool and sanity-check the result"""
+        tool = self.make_tool()
+        argv = ['gcc', '-c', sourcefile]
+        if extraargs:
+            argv += extraargs
+        gccinv = GccInvocation(argv)
+        analysis = tool.checked_invoke(gccinv, sourcefile)
+
+        if 0:
+            print(analysis)
+
+        # Call a subclass hook to check basic metadata:
+        self.verify_basic_metadata(analysis, sourcefile)
+
+        # Verify that we can serialize to XML:
+        xml_bytes = analysis.to_xml_bytes()
+        self.assertTrue(xml_bytes.startswith(b'<analysis>'))
+
+        # Verify it can roundtrip through JSON:
+        js_bytes = json.dumps(analysis.to_json(), indent=1)
+        other = Analysis.from_json(json.loads(js_bytes))
+        #self.assertEqual(analysis, other)
+
+        return analysis
+
+    def assert_metadata(self, analysis,
+                        expected_generator_name, expected_given_path):
+        self.assertEqual(analysis.metadata.generator.name,
+                         expected_generator_name)
+        self.assertEqual(analysis.metadata.file_.givenpath, expected_given_path)
+        self.assertIn(expected_given_path, analysis.metadata.file_.abspath)
+
+    def assert_has_custom_field(self, analysis, name):
+        self.assertTrue(analysis.customfields)
+        self.assertTrue(name in analysis.customfields)
+
+class BuggyCheckerTests(CheckerTests):
+    def make_tool(self):
+        """
+        Override base class impl, so that we can enable
+        exception-capture (and provide a custom tool)
+        """
+        class BuggyChecker(Checker):
+            def raw_invoke(self, gccinv, sourcefile):
+                raise ValueError('test of raising an exception')
+
+        ctxt = self.make_ctxt('buggy', capture_exceptions=True)
+        tool = BuggyChecker(ctxt)
+        return tool
+
+    def verify_basic_metadata(self, analysis, sourcefile):
+         self.assert_metadata(analysis, 'buggy', sourcefile)
+
+    def test_exception_handling(self):
+        analysis = self.invoke('test-sources/harmless.c')
+        #print(analysis)
+        self.assertEqual(len(analysis.results), 1)
+        r0 = analysis.results[0]
+        self.assertIsInstance(r0, Failure)
+        self.assertEqual(r0.failureid, 'exception')
+        self.assertEqual(r0.message.text,
+                         ('Exception running buggy:'
+                          ' test of raising an exception'))
+        self.assertTrue(analysis.customfields['traceback'].startswith(
+            'Traceback (most recent call last):\n'))
+
+############################################################################
+# Entrypoint
+############################################################################
+
+def tool_main(argv, tool_class):
+    """
+    Entrypoint for use by the various per-tool scripts
+    """
+    # If we're invoked with "unittest" as the first param,
+    # run the unit test suite:
+    if len(argv) >= 2:
+        if argv[1] == 'unittest':
+            sys.argv = [argv[0]] + argv[2:]
+            return unittest.main()
+
+    ctxt = Context(tool_class.name, enable_logging=False)
+    tool = tool_class(ctxt)
+
+    ctxt.log('argv: %r' % argv)
+
+    gccinv = GccInvocation(argv)
+    ctxt.log('  gccinv.sources: %r' % gccinv.sources)
+    assert len(gccinv.sources) == 1
+    sourcefile = gccinv.sources[0]
+    ctxt.log('    sourcefile: %r' % sourcefile)
+    analysis = tool.checked_invoke(gccinv, sourcefile)
+    json.dump(analysis.to_json(), sys.stdout, indent=1)
+
+# Run the unit tests
+
+if __name__ == '__main__':
+    sys.exit(unittest.main())
-- 
1.8.5.3

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH 21/22] Add checkers/Makefile
  2017-08-04 21:30 [PATCH 00/22] RFC: integrated 3rd-party static analysis support David Malcolm
                   ` (14 preceding siblings ...)
  2017-08-04 21:38 ` [PATCH 14/22] Add checkers/always_fails.py David Malcolm
@ 2017-08-04 21:38 ` David Malcolm
  2017-08-04 21:38 ` [PATCH 06/22] Makefile.in: hack in -lpthread David Malcolm
                   ` (9 subsequent siblings)
  25 siblings, 0 replies; 35+ messages in thread
From: David Malcolm @ 2017-08-04 21:38 UTC (permalink / raw)
  To: gcc-patches; +Cc: David Malcolm

This patch adds a rather simplistic Makefile to "checkers", purely for
exercising the test suites of the various harness.

checkers/ChangeLog:
	* Makefile: New file.
---
 checkers/Makefile | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 checkers/Makefile

diff --git a/checkers/Makefile b/checkers/Makefile
new file mode 100644
index 0000000..f69dccf
--- /dev/null
+++ b/checkers/Makefile
@@ -0,0 +1,23 @@
+all: check-all
+
+check-all: check-all-python2 check-all-python3
+
+check-all-python2:
+	python2 checker.py
+	python2 clang_analyzer.py unittest
+	python2 cppcheck.py unittest
+	python2 flawfinder.py unittest
+	python2 splint.py unittest
+	python2 always_fails.py unittest
+	python2 ianal.py unittest
+	python2 coverity.py unittest
+
+check-all-python3:
+	python3 checker.py
+	python3 clang_analyzer.py unittest
+	python3 cppcheck.py unittest
+	python3 flawfinder.py unittest
+	python3 splint.py unittest
+	python3 always_fails.py unittest
+	python3 ianal.py unittest
+	python3 coverity.py unittest
-- 
1.8.5.3

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH 12/22] Add -Wrun-analyzers= to common.opt, toplev.c, and invoke.texi
  2017-08-04 21:30 [PATCH 00/22] RFC: integrated 3rd-party static analysis support David Malcolm
                   ` (17 preceding siblings ...)
  2017-08-04 21:38 ` [PATCH 13/22] Add checkers/checker.py David Malcolm
@ 2017-08-04 21:38 ` David Malcolm
  2017-08-04 21:39 ` [PATCH 18/22] Add checkers/flawfinder.py David Malcolm
                   ` (6 subsequent siblings)
  25 siblings, 0 replies; 35+ messages in thread
From: David Malcolm @ 2017-08-04 21:38 UTC (permalink / raw)
  To: gcc-patches; +Cc: David Malcolm

This patch introduces -Wrun-analyzers= and wires it up to the checkers.cc
code, via toplev.c

As noted earlier, I'm not a great fan of the monolithic option syntax
here; maybe there should be some kind of option that can be supplied
multiple times for adding individual checkers.

gcc/ChangeLog:
	* common.opt (Wrun-analyzers=): New option.
	* doc/invoke.texi (Warning Options): Add -Wrun-analyzers=.
	(-Wrun-analyzers=): New option.
	* toplev.c: Include "checkers.h".
	(compile_file): Call checkers_finish.
	(do_compile): Call checkers_start.
---
 gcc/common.opt      | 4 ++++
 gcc/doc/invoke.texi | 8 +++++++-
 gcc/toplev.c        | 9 +++++++++
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/gcc/common.opt b/gcc/common.opt
index 1cb1c83..5a7b47d 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -669,6 +669,10 @@ Wreturn-local-addr
 Common Var(warn_return_local_addr) Init(1) Warning
 Warn about returning a pointer/reference to a local or temporary variable.
 
+Wrun-analyzers=
+Common Joined Var(warn_run_analyzers) Warning
+Run 3rd-party analyzer tools based on the supplied JSON file.
+
 Wshadow
 Common Var(warn_shadow) Warning
 Warn when one variable shadows another.  Same as -Wshadow=global.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 5ae9dc4..6756c07 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -303,7 +303,7 @@ Objective-C and Objective-C++ Dialects}.
 -Wplacement-new  -Wplacement-new=@var{n} @gol
 -Wpointer-arith  -Wpointer-compare  -Wno-pointer-to-int-cast @gol
 -Wno-pragmas  -Wredundant-decls  -Wrestrict  -Wno-return-local-addr @gol
--Wreturn-type  -Wsequence-point  -Wshadow  -Wno-shadow-ivar @gol
+-Wreturn-type  -Wrun-analyzers=  -Wsequence-point  -Wshadow  -Wno-shadow-ivar @gol
 -Wshadow=global,  -Wshadow=local,  -Wshadow=compatible-local @gol
 -Wshift-overflow  -Wshift-overflow=@var{n} @gol
 -Wshift-count-negative  -Wshift-count-overflow  -Wshift-negative-value @gol
@@ -4638,6 +4638,12 @@ exceptions are @code{main} and functions defined in system headers.
 
 This warning is enabled by @option{-Wall}.
 
+@item -Wrun-analyzers=@var{path-to-json}
+@opindex Wrun-analyzers
+Run 3rd-party analyzer tools based on the supplied JSON file.
+
+FIXME: document the format here
+
 @item -Wshift-count-negative
 @opindex Wshift-count-negative
 @opindex Wno-shift-count-negative
diff --git a/gcc/toplev.c b/gcc/toplev.c
index b28f184..4f871e0 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -80,6 +80,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "hsa-common.h"
 #include "edit-context.h"
 #include "tree-pass.h"
+#include "checkers.h"
 
 #if defined(DBX_DEBUGGING_INFO) || defined(XCOFF_DEBUGGING_INFO)
 #include "dbxout.h"
@@ -501,6 +502,11 @@ compile_file (void)
   if (lang_hooks.decls.post_compilation_parsing_cleanups)
     lang_hooks.decls.post_compilation_parsing_cleanups ();
 
+  /* If any 3rd-party analyzers are being run, wait for them to
+     finish, and handle the results.  */
+  if (warn_run_analyzers)
+    checkers_finish ();
+
   if (seen_error ())
     return;
 
@@ -1983,6 +1989,9 @@ do_compile ()
     {
       int i;
 
+      if (warn_run_analyzers)
+	checkers_start (warn_run_analyzers);
+
       timevar_start (TV_PHASE_SETUP);
 
       /* This must be run always, because it is needed to compute the FP
-- 
1.8.5.3

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH 14/22] Add checkers/always_fails.py
  2017-08-04 21:30 [PATCH 00/22] RFC: integrated 3rd-party static analysis support David Malcolm
                   ` (13 preceding siblings ...)
  2017-08-04 21:37 ` [PATCH 11/22] Add checkers/test-sources David Malcolm
@ 2017-08-04 21:38 ` David Malcolm
  2017-08-04 21:38 ` [PATCH 21/22] Add checkers/Makefile David Malcolm
                   ` (10 subsequent siblings)
  25 siblings, 0 replies; 35+ messages in thread
From: David Malcolm @ 2017-08-04 21:38 UTC (permalink / raw)
  To: gcc-patches; +Cc: David Malcolm

This patch adds a "checker" that always fails, for ensuring that
we can handle failed runs of 3rd-party tools.

checkers/ChangeLog:
	* always_fails.py: New file.
---
 checkers/always_fails.py | 57 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100755 checkers/always_fails.py

diff --git a/checkers/always_fails.py b/checkers/always_fails.py
new file mode 100755
index 0000000..35fd4ac
--- /dev/null
+++ b/checkers/always_fails.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+#   Copyright 2012, 2013, 2015, 2017 David Malcolm <dmalcolm@redhat.com>
+#   Copyright 2012, 2013, 2015, 2017 Red Hat, Inc.
+#
+#   This is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by
+#   the Free Software Foundation, either version 3 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#   General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program.  If not, see
+#   <http://www.gnu.org/licenses/>.
+
+import sys
+import unittest
+
+from firehose.model import Failure
+
+from checker import Checker, CheckerTests, tool_main
+
+class AlwaysFails(Checker):
+    """
+    Checker subclass that always fails
+    """
+    name = 'always-fails'
+
+    def raw_invoke(self, gccinv, sourcefile):
+        args = ['/this/executable/does/not/exist', sourcefile]
+        return self._run_subprocess(sourcefile, args)
+
+class AlwaysFailsTests(CheckerTests):
+    def make_tool(self):
+        tool_class = AlwaysFails
+        ctxt = self.make_ctxt(tool_class.name, capture_exceptions=True)
+        return tool_class(ctxt)
+
+    def verify_basic_metadata(self, analysis, sourcefile):
+        # Verify basic metadata:
+        self.assert_metadata(analysis, 'always-fails', sourcefile)
+
+    def test_harmless_file(self):
+        analysis = self.invoke('test-sources/harmless.c')
+        self.assertEqual(len(analysis.results), 1)
+        r0 = analysis.results[0]
+        self.assertIsInstance(r0, Failure)
+        self.assertEqual(r0.failureid, 'exception')
+        self.assertEqual(r0.location.file.givenpath,
+                         'test-sources/harmless.c')
+        self.assertNotEqual(r0.message.text, None)
+
+if __name__ == '__main__':
+    sys.exit(tool_main(sys.argv, AlwaysFails))
-- 
1.8.5.3

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH 06/22] Makefile.in: hack in -lpthread
  2017-08-04 21:30 [PATCH 00/22] RFC: integrated 3rd-party static analysis support David Malcolm
                   ` (15 preceding siblings ...)
  2017-08-04 21:38 ` [PATCH 21/22] Add checkers/Makefile David Malcolm
@ 2017-08-04 21:38 ` David Malcolm
  2017-09-01 18:13   ` Jeff Law
  2017-08-04 21:38 ` [PATCH 13/22] Add checkers/checker.py David Malcolm
                   ` (8 subsequent siblings)
  25 siblings, 1 reply; 35+ messages in thread
From: David Malcolm @ 2017-08-04 21:38 UTC (permalink / raw)
  To: gcc-patches; +Cc: David Malcolm

The checker.cc patch later in the kit can optionally make use of pthread
if available.

Doing it properly would involve some configure checks; this patch simply
hacks in -lpthread into LIB unconditionally for now.

gcc/ChangeLog:
	* Makefile.in (LIB): Hack in -lpthread.
---
 gcc/Makefile.in | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 488f699..9ceb3f3 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1048,10 +1048,11 @@ LIBDEPS= libcommon.a $(CPPLIB) $(LIBIBERTY) $(LIBINTL_DEP) $(LIBICONV_DEP) \
 # even if we are cross-building GCC.
 BUILD_LIBDEPS= $(BUILD_LIBIBERTY)
 
+# FIXME: add some configury for pthread
 # How to link with both our special library facilities
 # and the system's installed libraries.
 LIBS = @LIBS@ libcommon.a $(CPPLIB) $(LIBINTL) $(LIBICONV) $(LIBBACKTRACE) \
-	$(LIBIBERTY) $(LIBDECNUMBER) $(HOST_LIBS)
+	$(LIBIBERTY) $(LIBDECNUMBER) $(HOST_LIBS) -lpthread
 BACKENDLIBS = $(ISLLIBS) $(GMPLIBS) $(PLUGINLIBS) $(HOST_LIBS) \
 	$(ZLIB)
 # Any system libraries needed just for GNAT.
-- 
1.8.5.3

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH 20/22] Add checkers/splint.py
  2017-08-04 21:30 [PATCH 00/22] RFC: integrated 3rd-party static analysis support David Malcolm
                   ` (20 preceding siblings ...)
  2017-08-04 21:39 ` [PATCH 05/22] diagnostic.c/h: add support for external tools David Malcolm
@ 2017-08-04 21:39 ` David Malcolm
  2017-08-05  1:00 ` [PATCH 00/22] RFC: integrated 3rd-party static analysis support Eric Gallager
                   ` (3 subsequent siblings)
  25 siblings, 0 replies; 35+ messages in thread
From: David Malcolm @ 2017-08-04 21:39 UTC (permalink / raw)
  To: gcc-patches; +Cc: David Malcolm

This patch adds a harness for invoking splint:
  http://splint.org/
returning the results in JSON format.

It runs "splint -csv TEMPFILE +csvoverwrite -strict", then uses
  firehose.parsers.splint.parse_splint_csv
and
  firehose.parsers.splint.parse_splint_stderr
to parse the csv and the stderr, turning them into firehose JSON
(stderr is used to get at version information).

checkers/ChangeLog:
	* splint.py: New file.
---
 checkers/splint.py | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100755 checkers/splint.py

diff --git a/checkers/splint.py b/checkers/splint.py
new file mode 100755
index 0000000..e8f79f2
--- /dev/null
+++ b/checkers/splint.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+#   Copyright 2017 David Malcolm <dmalcolm@redhat.com>
+#   Copyright 2017 Red Hat, Inc.
+#
+#   This is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by
+#   the Free Software Foundation, either version 3 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#   General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program.  If not, see
+#   <http://www.gnu.org/licenses/>.
+
+import sys
+import tempfile
+
+from firehose.model import File, Issue
+from firehose.parsers.splint import parse_splint_csv, parse_splint_stderr
+
+from checker import Checker, CheckerTests, tool_main
+
+class InvokeSplint(Checker):
+    """
+    Checker subclass that invokes "splint -strict"
+    """
+    name = 'splint'
+
+    def __init__(self, ctxt):
+        Checker.__init__(self, ctxt)
+        self.tempfile = None
+
+    def __del__(self):
+        del self.tempfile
+
+    def raw_invoke(self, gccinv, sourcefile):
+        self.tempfile = tempfile.NamedTemporaryFile()
+        args = ['splint', '-csv', self.tempfile.name, '+csvoverwrite', '-strict', sourcefile]
+        # FIXME: why is overwrite needed?
+        return self._run_subprocess(sourcefile, args)
+
+    def handle_output(self, result):
+        analysis = parse_splint_csv(self.tempfile.name)
+        analysis.metadata.file_ = File(result.sourcefile, None)
+        analysis.metadata.version = parse_splint_stderr(result.err)
+        self.set_custom_fields(result, analysis)
+        return analysis
+
+    def set_custom_fields(self, result, analysis):
+        analysis.set_custom_field('splint-invocation',
+                                  ' '.join(result.argv))
+        result.set_custom_fields(analysis)
+
+class SplintTests(CheckerTests):
+    def make_tool(self):
+        return self.make_tool_from_class(InvokeSplint)
+
+    def verify_basic_metadata(self, analysis, sourcefile):
+        # Verify basic metadata:
+        self.assert_metadata(analysis, 'splint', sourcefile)
+        self.assert_has_custom_field(analysis, 'splint-invocation')
+        self.assert_has_custom_field(analysis, 'stdout')
+        self.assert_has_custom_field(analysis, 'stderr')
+
+    def test_unconditional_leak(self):
+        analysis = self.invoke('test-sources/unconditional-file-leak.c')
+        self.assertEqual(len(analysis.results), 8)
+        r0 = analysis.results[0]
+        self.assertIsInstance(r0, Issue)
+        self.assertEqual(r0.testid, 'internalglobs')
+
+if __name__ == '__main__':
+    sys.exit(tool_main(sys.argv, InvokeSplint))
-- 
1.8.5.3

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH 05/22] diagnostic.c/h: add support for external tools
  2017-08-04 21:30 [PATCH 00/22] RFC: integrated 3rd-party static analysis support David Malcolm
                   ` (19 preceding siblings ...)
  2017-08-04 21:39 ` [PATCH 18/22] Add checkers/flawfinder.py David Malcolm
@ 2017-08-04 21:39 ` David Malcolm
  2017-09-01 18:18   ` Jeff Law
  2017-08-04 21:39 ` [PATCH 20/22] Add checkers/splint.py David Malcolm
                   ` (4 subsequent siblings)
  25 siblings, 1 reply; 35+ messages in thread
From: David Malcolm @ 2017-08-04 21:39 UTC (permalink / raw)
  To: gcc-patches; +Cc: David Malcolm

This patch adds fields "external_tool" and "external_test_id"
to diagnostic_info, allowing for diagnostics to be marked as
coming from a 3rd-party tool.

Instead of printing the pertinent warning flag e.g.:

  foo.c:10:1: something is wrong [-Wpointer-arith]

the tool "ID" and (optionally) test ID is printed e.g.:

  foo.c:10:1: something is wrong [cppcheck:memleak]

gcc/ChangeLog:
	* diagnostic-show-locus.c: Include "selftest-diagnostic.h".
	(class selftest::test_diagnostic_context): Move to
	selftest-diagnostic.h.
	* diagnostic.c: Include "selftest-diagnostic.h".
	(diagnostic_info::diagnostic_info): New ctor.
	(print_option_information): Handle external_tool and
	external_test_id fields of diagnostic_info.
	(diagnostic_report_diagnostic): Assert that diagnostic->kind is
	not DK_UNSPECIFIED.
	(selftest::dummy_option_name_cb): New function.
	(selftest::assert_option_information): New function.
	(selftest::test_print_option_information): New function.
	(selftest::diagnostic_c_tests): Call
	selftest::test_print_option_information.
	* diagnostic.h (struct diagnostic_info): Add default ctor,
	along with new fields "external_tool" and "external_test_id".
	* selftest-diagnostic.h: New file.
---
 gcc/diagnostic-show-locus.c | 29 +---------------
 gcc/diagnostic.c            | 85 ++++++++++++++++++++++++++++++++++++++++++---
 gcc/diagnostic.h            |  5 +++
 gcc/selftest-diagnostic.h   | 62 +++++++++++++++++++++++++++++++++
 4 files changed, 149 insertions(+), 32 deletions(-)
 create mode 100644 gcc/selftest-diagnostic.h

diff --git a/gcc/diagnostic-show-locus.c b/gcc/diagnostic-show-locus.c
index b0e72e7..08b2e56 100644
--- a/gcc/diagnostic-show-locus.c
+++ b/gcc/diagnostic-show-locus.c
@@ -29,6 +29,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "diagnostic-color.h"
 #include "gcc-rich-location.h"
 #include "selftest.h"
+#include "selftest-diagnostic.h"
 
 #ifdef HAVE_TERMIOS_H
 # include <termios.h>
@@ -1988,34 +1989,6 @@ namespace selftest {
 
 /* Selftests for diagnostic_show_locus.  */
 
-/* Convenience subclass of diagnostic_context for testing
-   diagnostic_show_locus.  */
-
-class test_diagnostic_context : public diagnostic_context
-{
- public:
-  test_diagnostic_context ()
-  {
-    diagnostic_initialize (this, 0);
-    show_caret = true;
-    show_column = true;
-    start_span = start_span_cb;
-  }
-  ~test_diagnostic_context ()
-  {
-    diagnostic_finish (this);
-  }
-
-  /* Implementation of diagnostic_start_span_fn, hiding the
-     real filename (to avoid printing the names of tempfiles).  */
-  static void
-  start_span_cb (diagnostic_context *context, expanded_location exploc)
-  {
-    exploc.file = "FILENAME";
-    default_diagnostic_start_span_fn (context, exploc);
-  }
-};
-
 /* Verify that diagnostic_show_locus works sanely on UNKNOWN_LOCATION.  */
 
 static void
diff --git a/gcc/diagnostic.c b/gcc/diagnostic.c
index bbf5f5c..570f8c2 100644
--- a/gcc/diagnostic.c
+++ b/gcc/diagnostic.c
@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "diagnostic-color.h"
 #include "edit-context.h"
 #include "selftest.h"
+#include "selftest-diagnostic.h"
 
 #ifdef HAVE_TERMIOS_H
 # include <termios.h>
@@ -67,7 +68,17 @@ const char *progname;
 /* A diagnostic_context surrogate for stderr.  */
 static diagnostic_context global_diagnostic_context;
 diagnostic_context *global_dc = &global_diagnostic_context;
+
 \f
+
+/* diagnostic_info's ctor.  */
+
+diagnostic_info::diagnostic_info ()
+: message (), richloc (NULL), x_data (NULL), kind (DK_UNSPECIFIED),
+  option_index (0), external_tool (NULL), external_test_id (NULL)
+{
+}
+
 /* Return a malloc'd string containing MSG formatted a la printf.  The
    caller is responsible for freeing the memory.  */
 char *
@@ -843,6 +854,28 @@ print_option_information (diagnostic_context *context,
 			  const diagnostic_info *diagnostic,
 			  diagnostic_t orig_diag_kind)
 {
+  pretty_printer *pp = context->printer;
+  const char *cs = colorize_start (pp_show_color (pp),
+				   diagnostic_kind_color[diagnostic->kind]);
+  const char *ce = colorize_stop (pp_show_color (pp));
+
+  if (diagnostic->external_tool)
+    {
+      pp_string (pp, " [");
+      pp_string (pp, cs);
+      pp_string (pp, diagnostic->external_tool);
+      pp_string (pp, ce);
+      if (diagnostic->external_test_id)
+	{
+	  pp_character (pp, ':');
+	  pp_string (pp, cs);
+	  pp_string (pp, diagnostic->external_test_id);
+	  pp_string (pp, ce);
+	}
+      pp_character (pp, ']');
+      return;
+    }
+
   char *option_text;
 
   option_text = context->option_name (context, diagnostic->option_index,
@@ -850,12 +883,10 @@ print_option_information (diagnostic_context *context,
 
   if (option_text)
     {
-      pretty_printer *pp = context->printer;
       pp_string (pp, " [");
-      pp_string (pp, colorize_start (pp_show_color (pp),
-				     diagnostic_kind_color[diagnostic->kind]));
+      pp_string (pp, cs);
       pp_string (pp, option_text);
-      pp_string (pp, colorize_stop (pp_show_color (pp)));
+      pp_string (pp, ce);
       pp_character (pp, ']');
       free (option_text);
     }
@@ -875,6 +906,8 @@ diagnostic_report_diagnostic (diagnostic_context *context,
   location_t location = diagnostic_location (diagnostic);
   diagnostic_t orig_diag_kind = diagnostic->kind;
 
+  gcc_assert (diagnostic->kind != DK_UNSPECIFIED);
+
   /* Give preference to being able to inhibit warnings, before they
      get reclassified to something else.  */
   if ((diagnostic->kind == DK_WARNING || diagnostic->kind == DK_PEDWARN)
@@ -1490,6 +1523,49 @@ real_abort (void)
 
 namespace selftest {
 
+/* A dummy diagnostic_context::option_name callback that always
+   returns a copy of "-Woption-name-goes-here".  */
+
+char *
+dummy_option_name_cb (diagnostic_context *, int, diagnostic_t, diagnostic_t)
+{
+  return xstrdup ("-Woption-name-goes-here");
+}
+
+/* Verify that print_option_information on DIAGNOSTIC prints
+   EXPECTED.  */
+
+static void
+assert_option_information (diagnostic_info *diagnostic,
+			   const char *expected)
+{
+  test_diagnostic_context dc;
+  dc.option_name = dummy_option_name_cb;
+  print_option_information (&dc, diagnostic, DK_WARNING);
+  ASSERT_STREQ (expected, pp_formatted_text (dc.printer));
+}
+
+/* Verify that print_option_information does the right thing.  */
+
+static void
+test_print_option_information ()
+{
+  diagnostic_info diagnostic;
+  diagnostic.kind = DK_WARNING;
+
+  /* A built-in warning.  */
+  assert_option_information (&diagnostic, " [-Woption-name-goes-here]");
+
+  /* An external tool, without a test ID.  */
+  diagnostic.external_tool = "external-tool-name";
+  assert_option_information (&diagnostic, " [external-tool-name]");
+
+  /* An external tool, with a test ID.  */
+  diagnostic.external_test_id = "external-test-id";
+  assert_option_information (&diagnostic,
+			     " [external-tool-name:external-test-id]");
+}
+
 /* Helper function for test_print_escaped_string.  */
 
 static void
@@ -1621,6 +1697,7 @@ test_print_parseable_fixits_replace ()
 void
 diagnostic_c_tests ()
 {
+  test_print_option_information ();
   test_print_escaped_string ();
   test_print_parseable_fixits_none ();
   test_print_parseable_fixits_insert ();
diff --git a/gcc/diagnostic.h b/gcc/diagnostic.h
index dbd1703..0127a6c 100644
--- a/gcc/diagnostic.h
+++ b/gcc/diagnostic.h
@@ -29,6 +29,8 @@ along with GCC; see the file COPYING3.  If not see
    list in diagnostic.def.  */
 struct diagnostic_info
 {
+  diagnostic_info ();
+
   /* Text to be formatted.  */
   text_info message;
 
@@ -41,6 +43,9 @@ struct diagnostic_info
   diagnostic_t kind;
   /* Which OPT_* directly controls this diagnostic.  */
   int option_index;
+
+  const char *external_tool;
+  const char *external_test_id;
 };
 
 /* Each time a diagnostic's classification is changed with a pragma,
diff --git a/gcc/selftest-diagnostic.h b/gcc/selftest-diagnostic.h
new file mode 100644
index 0000000..78bc5cf
--- /dev/null
+++ b/gcc/selftest-diagnostic.h
@@ -0,0 +1,62 @@
+/* Support for selftests of diagnostics.
+   Copyright (C) 2016-2017 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_SELFTEST_DIAGNOSTIC_H
+#define GCC_SELFTEST_DIAGNOSTIC_H
+
+/* The selftest code should entirely disappear in a production
+   configuration, hence we guard all of it with #if CHECKING_P.  */
+
+#if CHECKING_P
+
+namespace selftest {
+
+/* Convenience subclass of diagnostic_context for testing
+   diagnostic_show_locus etc.  */
+
+class test_diagnostic_context : public diagnostic_context
+{
+ public:
+  test_diagnostic_context ()
+  {
+    diagnostic_initialize (this, 0);
+    show_caret = true;
+    show_column = true;
+    start_span = start_span_cb;
+  }
+  ~test_diagnostic_context ()
+  {
+    diagnostic_finish (this);
+  }
+
+  /* Implementation of diagnostic_start_span_fn, hiding the
+     real filename (to avoid printing the names of tempfiles).  */
+  static void
+  start_span_cb (diagnostic_context *context, expanded_location exploc)
+  {
+    exploc.file = "FILENAME";
+    default_diagnostic_start_span_fn (context, exploc);
+  }
+};
+
+} /* end of namespace selftest.  */
+
+#endif /* #if CHECKING_P */
+
+#endif /* GCC_SELFTEST_DIAGNOSTIC_H */
-- 
1.8.5.3

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH 18/22] Add checkers/flawfinder.py
  2017-08-04 21:30 [PATCH 00/22] RFC: integrated 3rd-party static analysis support David Malcolm
                   ` (18 preceding siblings ...)
  2017-08-04 21:38 ` [PATCH 12/22] Add -Wrun-analyzers= to common.opt, toplev.c, and invoke.texi David Malcolm
@ 2017-08-04 21:39 ` David Malcolm
  2017-08-04 21:39 ` [PATCH 05/22] diagnostic.c/h: add support for external tools David Malcolm
                   ` (5 subsequent siblings)
  25 siblings, 0 replies; 35+ messages in thread
From: David Malcolm @ 2017-08-04 21:39 UTC (permalink / raw)
  To: gcc-patches; +Cc: David Malcolm

This patch adds a harness for invoking flawfinder:
  https://www.dwheeler.com/flawfinder/
returning the results in JSON format.

It runs "flawfinder", then uses firehose.parsers.flawfinder.parse_file
to parse the stdout, turning it into firehose JSON.

checkers/ChangeLog:
	* flawfinder.py: New file.
---
 checkers/flawfinder.py | 124 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 124 insertions(+)
 create mode 100755 checkers/flawfinder.py

diff --git a/checkers/flawfinder.py b/checkers/flawfinder.py
new file mode 100755
index 0000000..475a513
--- /dev/null
+++ b/checkers/flawfinder.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python
+#   Copyright 2012, 2013, 2015, 2017 David Malcolm <dmalcolm@redhat.com>
+#   Copyright 2012, 2013, 2015, 2017 Red Hat, Inc.
+#
+#   This is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by
+#   the Free Software Foundation, either version 3 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#   General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program.  If not, see
+#   <http://www.gnu.org/licenses/>.
+
+import sys
+import tempfile
+
+from gccinvocation import GccInvocation
+
+from checker import Checker, CheckerTests, make_file, make_stats, \
+    tool_main
+
+from firehose.model import Failure, Issue
+from firehose.parsers.flawfinder import parse_file
+
+class InvokeFlawfinder(Checker):
+    """
+    Checker subclass that invokes "flawfinder"
+    """
+    name = 'flawfinder'
+
+    def raw_invoke(self, gccinv, sourcefile):
+        args = ['flawfinder', sourcefile] # FIXME
+        return self._run_subprocess(sourcefile, args)
+
+    def handle_output(self, result):
+        if result.returncode:
+            analysis = self._make_failed_analysis(result.sourcefile, result.timer,
+                                                  msgtext='Bad exit code running %s' % self.name,
+                                                  failureid='bad-exit-code')
+            self.set_custom_fields(result, analysis)
+            return analysis
+
+        if 0:
+            print('result.err: %r' % result.err)
+            print('result.out: %r' % result.out)
+
+        # (there doesn't seem to be a way to have flawfinder directly
+        # save its output to a given location)
+
+        with tempfile.NamedTemporaryFile() as outfile:
+            outfile.write(result.out)
+            outfile.flush()
+
+            with open(outfile.name) as infile:
+                # Parse stderr into firehose XML format and save:
+                analysis = parse_file(infile)
+                analysis.metadata.file_ = make_file(result.sourcefile)
+                analysis.metadata.stats = make_stats(result.timer)
+                self.set_custom_fields(result, analysis)
+
+                return analysis
+
+    def set_custom_fields(self, result, analysis):
+        analysis.set_custom_field('flawfinder-invocation',
+                                  ' '.join(result.argv))
+        result.set_custom_fields(analysis)
+
+class FlawfinderTests(CheckerTests):
+    def make_tool(self):
+        return self.make_tool_from_class(InvokeFlawfinder)
+
+    def verify_basic_metadata(self, analysis, sourcefile):
+        # Verify basic metadata:
+        self.assert_metadata(analysis, 'flawfinder', sourcefile)
+        self.assert_has_custom_field(analysis, 'flawfinder-invocation')
+        self.assert_has_custom_field(analysis, 'stdout')
+        self.assert_has_custom_field(analysis, 'stderr')
+
+    def test_file_not_found(self):
+        analysis = self.invoke('does-not-exist.c')
+        #print(analysis)
+        self.assertEqual(len(analysis.results), 0)
+
+    def test_timeout(self):
+        sourcefile = 'test-sources/harmless.c'
+        tool = self.make_tool()
+        tool.timeout = 0
+        gccinv = GccInvocation(['gcc', sourcefile])
+        analysis = tool.checked_invoke(gccinv, sourcefile)
+        self.assert_metadata(analysis, 'flawfinder', sourcefile)
+        self.assertEqual(len(analysis.results), 1)
+        r0 = analysis.results[0]
+        self.assertIsInstance(r0, Failure)
+        self.assertEqual(r0.failureid, 'timeout')
+        self.assert_has_custom_field(analysis, 'timeout')
+        self.assert_has_custom_field(analysis, 'command-line')
+
+    def test_harmless_file(self):
+        analysis = self.invoke('test-sources/harmless.c')
+        self.assertEqual(len(analysis.results), 0)
+
+    def test_use_of_random(self):
+        analysis = self.invoke('test-sources/cpychecker-demo.c')
+        self.assertEqual(len(analysis.results), 1)
+        r0 = analysis.results[0]
+        self.assertIsInstance(r0, Issue)
+        self.assertEqual(r0.testid, 'random')
+        self.assertEqual(r0.location.file.givenpath,
+                         'test-sources/cpychecker-demo.c')
+        self.assertEqual(r0.location.point.line, 97)
+        self.assertEqual(r0.message.text,
+                         "This function is not sufficiently random for"
+                         " security-related functions such as key and nonce"
+                         " creation. use a more secure technique for"
+                         " acquiring random values.")
+        self.assertEqual(r0.severity, '3')
+
+if __name__ == '__main__':
+    sys.exit(tool_main(sys.argv, InvokeFlawfinder))
-- 
1.8.5.3

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 00/22] RFC: integrated 3rd-party static analysis support
  2017-08-04 21:30 [PATCH 00/22] RFC: integrated 3rd-party static analysis support David Malcolm
                   ` (21 preceding siblings ...)
  2017-08-04 21:39 ` [PATCH 20/22] Add checkers/splint.py David Malcolm
@ 2017-08-05  1:00 ` Eric Gallager
  2017-08-08  0:23   ` David Malcolm
  2017-08-06 21:21 ` Martin Sebor
                   ` (2 subsequent siblings)
  25 siblings, 1 reply; 35+ messages in thread
From: Eric Gallager @ 2017-08-05  1:00 UTC (permalink / raw)
  To: David Malcolm; +Cc: gcc-patches

On 8/4/17, David Malcolm <dmalcolm@redhat.com> wrote:
> This patch kit clearly isn't ready yet as-is (see e.g. the
> "known unknowns" below), but I'm posting it now in the hope of
> getting early feedback.
>
> Summary
> =======
>
> This patch kit provides an easy way to make integrate 3rd-party static
> analysis tools into gcc, and have them:
> (a) report through gcc's diagnostic subsystem, and
> (b) "watermark" the generated binaries with queryable data on what checkers
>     were run, and what the results were.
>
> Here's an example showing gcc running a bank of 3rd-party checkers on this
> source file:
>
>   #include <stdlib.h>
>
>   void test ()
>   {
>     void *ptr_1;
>     void *ptr_2;
>
>     ptr_1 = malloc (64);
>     if (!ptr_1)
>       return;
>     ptr_2 = malloc (64);
>     if (!ptr_2)
>       return;
>
>     free (ptr_2);
>     free (ptr_1);
>   }
>
> via a simple command-line:
>
>   $ ./xgcc -B. -c conditional-leak.c -Wrun-analyzers=policy.json
>   conditional-leak.c:13:5: error: Potential leak of memory pointed to by
> 'ptr_1' [clang-analyzer:Memory leak]
>        return;
>        ^
>   conditional-leak.c:8:11: note: state 1 of 4: Memory is allocated
>      ptr_1 = malloc (64);
>              ^
>   conditional-leak.c:9:7: note: state 2 of 4: Assuming 'ptr_1' is non-null
>      if (!ptr_1)
>          ^
>   conditional-leak.c:12:7: note: state 3 of 4: Assuming 'ptr_2' is null
>      if (!ptr_2)
>          ^
>   conditional-leak.c:13:5: note: state 4 of 4: Potential leak of memory
> pointed to by 'ptr_1'
>        return;
>        ^
>   conditional-leak.c:13:0: error: Memory leak: ptr_1 [cppcheck:memleak]
>        return;
>
> Of the checkers, clang's static analyzer and cppcheck both identify the
> memory leak; the former also identifies the control flow (the other
> checkers didn't report anything).
>
> The idea is to provide a mechanism to make it easy for developers and
> projects to impose policy on what checkers should be run, and to gate
> the build if certain tests fail.
>
> In this case, the results are treated as hard errors and block the build,
> but policy could allow them to be warnings.
>
> Extensive metadata is captured about what checkers were run, and what
> they emitted, using the "Firehose" interchange format:
>
>   http://firehose.readthedocs.io/en/latest/index.html
>
> In the case where this doesn't block the build, this can be queried via a
>   contrib/get-static-analysis.py
> script, so e.g. you can verify that a setuid binary was indeed compiled
> using all the checkers that you expect it to be.
>
> This can also be used to embed data about the code into the watermark.
> For example, checkers/ianal.py embeds information about "Copyright"
> lines in the source code into the generated binaries, from where it
> can be queried (this example is intended as a proof-of-concept rather
> than as a real license-tracking solution...)
>
>
> Statement of the problem
> ========================
>
> Static analysis is IMHO done too late, if at all: static analysis tools are
> run
> as an optional extra, "on the side", rather than in developers' normal
> workflow, with some kind of "override the compiler and do extra work" hook,
> which may preclude running more than one analyzer at once.  Analysis
> results
> are reviewed (if at all) in some kind of on-the-side tool, rather than when
> the
> code is being edited, or patches being prepared.
>
> It would be better to have an easy way for developers to run analyzer(s)
> as they're doing development, as part of their edit-compile-test cycle
> - analysis problems are reported immediately, and can be acted on
> immediately (e.g. by treating some checker tests as being hard errors).
>
> It would also be good to have a way to run analyzer(s) when packages are
> built, with a variety of precanned policies for analyzers.  For example,
> setuid binaries and network-facing daemons could each be built with a
> higher strictness of checking.
>
> It would also be good to tag binaries with information on what analyzers
> were run, what options they were invoked with, etc.
> Potentially have "dump_file" information from optimization passes stored
> in the metadata also.   Have a tool to query all of this.
>
> This way a distribution can perform a query like:
>
>   "show me all setuid binaries that contain code that wasn't checked
>    with $CHECKER with $TEST set to be a hard error"
>
> Can/should we break the build if there are issues?
> Yes: but have a way to opt-in easily: if the tool is well-integrated with
> the
>     compiler: e.g.
>
> -Wrun-analyzers=/usr/share/analyzers/userspace/network-facing-service
> then upstream developers and packagers can turn on the setting, and see
> what
> breaks, and fix it naturally within an compile-edit-test cycle
>
> This gives a relatively painless way to opt-in to increasing levels of
> strictness (e.g. by an upstream project, or by an individual developer).
>
> Does this slow the build down?
> Yes: but you can choose which analyzers run, and can choose to turn them
> off.
> It ought to parallelize well.  I believe users will prefer to turn them on,
> and have builders burn up the extra CPU cycles.
> This may make much more sense for binary distributions (e.g. Fedora,
> Debian)
> that it does for things like Gentoo.
>
> Example policy files/options might be:
>   -Wrun-analyzers=/usr/share/analyzers/userspace/network-facing-service
>   -Wrun-analyzers=/usr/share/analyzers/userspace/application
>   -Wrun-analyzers=/usr/share/analyzers/userspace/setuid-binary
>   -Wrun-analyzers=/usr/share/analyzers/userspace/default
>   -Wrun-analyzers=/usr/share/analyzers/kernel
>
> or whatnot.
>
> Idea is to provide mechanism, and for the distribution to decide on some
> standard policies.
>
> This may also allow us to sandbox a gcc plugin by running the plugin inside
> another cc1, for plugins that add warnings - if the plugin ICEs, then the
> main
> cc1 isn't affected (useful for doing mass rebuilds of code using an
> experimental plugin).
>
>
> Known unknowns
> ==============
>
> How does one suppress a specific false-positive site?
> Do we need a pragma for it?  (though pragmas ought to already affect some
> of
> the underlying checkers...)
>
> Do we really want .json for the policy format?
> If we're expecting users to edit this, we need great error messages,
> and probably support for comments.  Would YAML or somesuch be better?
> Or have them as individual command-line flags, and the policy files are
> "@" files for gcc.
>
> How to mark which checkers are appropriate for which languages?
>
> (etc; see also all the FIXMEs in the code...)
>
>
> Dependencies
> ============
>
> The "checkers" subdirectory uses Python 2 or 3, and has a few Python
> dependencies, including "firehose" and "gccinvocation".
>
>
> How it works
> ============
>
> If enabled, toplev.c starts each of the various checkers from separate
> threads from near the start of toplev.c, so that the checkers run in
> parallel with each other, and with the bulk of cc1.  Near the end of
> toplev.c it waits for each thread to finish, and reads the stdout,
> which is expected to be in Firehose JSON format.  This is then sent
> through the diagnostic subsystem.
>
> Each "checker" is a harness script, which "knows" how to invoke
> the particular 3rd-party tool, and coerce the output from the tool
> into the common JSON format.
>
> Some notes on the data model can be seen here:
>   http://firehose.readthedocs.io/en/latest/data-model.html
> (though that's expressed as Python objects and XML, rather than
> the JSON format).
>
>
> Successfully bootstrapped&regrtested the combination of the patches
> on x86_64-pc-linux-gnu (though the only testcases are selftest based
> unit-tests, rather than DejaGnu tests).
>
>

General questions:

1. When bootstrapping, did you try adding the new -Wrun-analyzers to
the build system to see what it reports for GCC's own source code?
It'd be worthwhile to do some dogfooding to determine what kind of
results it produces
2. Since -Wrun-analyzers is a warning option, can I turn messages from
it into errors with -Werror=run-analyzers? Will it work with #pragma
GCC diagnostic push/pop?
3. Do we care about duplicated warnings between the analyzers and GCC
at all? I'm just thinking, if I put in a bug report requesting a new
warning for GCC after -Wrun-analyzers is added, will people just close
it saying "Oh you can already get that with -Wrun-analyzers" or
something? That would be disappointing.
4. Along those lines, how responsible exactly will GCC be for issues
with the analyzers it runs? For example, I know splint (from [20/22])
at least is pretty buggy, it crashed 5 times on me when running it
manually over my fork of gdb. Does GCC really want to encourage the
use of potentially buggy external tools that might lead to issues?

...I thought I had a 5th one but I forget... Anyways, I like the idea
overall! Keep up the good work!

> Thoughts?
> Dave
>
>
> David Malcolm (22):
>   Expose assert_loceq outside of input.c; add ASSERT_LOCEQ
>   libcpp: add linemap_position_for_file_line_and_column
>   Add JSON implementation
>   Add firehose.h/cc
>   diagnostic.c/h: add support for external tools
>   Makefile.in: hack in -lpthread
>   Add minimal version of Nick Clifton's annobin code
>   Add GNU_BUILD_ATTRIBUTE_STATIC_ANALYSIS to annobin.h
>   Add selftest::read_file (..., FILE *, ...)
>   Add checkers.h/cc
>   Add checkers/test-sources
>   Add -Wrun-analyzers= to common.opt, toplev.c, and invoke.texi
>   Add checkers/checker.py
>   Add checkers/always_fails.py
>   Add checkers/clang_analyzer.py
>   Add checkers/coverity.py
>   Add checkers/cppcheck.py
>   Add checkers/flawfinder.py
>   Add checkers/ianal.py
>   Add checkers/splint.py
>   Add checkers/Makefile
>   Add contrib/get-static-analysis.py
>
>  checkers/ChangeLog                                 |    9 +
>  checkers/Makefile                                  |   23 +
>  checkers/always_fails.py                           |   57 +
>  checkers/checker.py                                |  367 ++++
>  checkers/clang_analyzer.py                         |  145 ++
>  checkers/coverity.py                               |  141 ++
>  checkers/cppcheck.py                               |  138 ++
>  checkers/flawfinder.py                             |  124 ++
>  checkers/ianal.py                                  |   79 +
>  checkers/splint.py                                 |   77 +
>  checkers/test-sources/conditional-leak.c           |   17 +
>  checkers/test-sources/cpychecker-demo.c            |  110 ++
>  checkers/test-sources/divide-by-zero.c             |    4 +
>  checkers/test-sources/harmless.c                   |    9 +
>  checkers/test-sources/multiple-1.c                 |    6 +
>  checkers/test-sources/multiple-2.c                 |    9 +
>  checkers/test-sources/out-of-bounds.c              |    6 +
>  checkers/test-sources/read-through-null.c          |    4 +
>  checkers/test-sources/return-of-stack-address.c    |    6 +
>  checkers/test-sources/unconditional-file-leak.c    |   10 +
>  contrib/get-static-analysis.py                     |   47 +
>  gcc/Makefile.in                                    |    7 +-
>  gcc/annobin.cc                                     |  185 ++
>  gcc/annobin.h                                      |   45 +
>  gcc/checkers.cc                                    |  736 ++++++++
>  gcc/checkers.h                                     |   26 +
>  gcc/common.opt                                     |    4 +
>  gcc/diagnostic-show-locus.c                        |   29 +-
>  gcc/diagnostic.c                                   |   85 +-
>  gcc/diagnostic.h                                   |    5 +
>  gcc/doc/invoke.texi                                |    8 +-
>  gcc/firehose.cc                                    |  709 ++++++++
>  gcc/firehose.h                                     |  199 ++
>  gcc/input.c                                        |   71 +-
>  gcc/json.cc                                        | 1914
> ++++++++++++++++++++
>  gcc/json.h                                         |  214 +++
>  gcc/selftest-diagnostic.h                          |   62 +
>  gcc/selftest-input.h                               |   54 +
>  gcc/selftest-run-tests.c                           |    3 +
>  gcc/selftest.c                                     |   16 +-
>  gcc/selftest.h                                     |   10 +
>  .../checker-output/test-clang-analyzer.json        |  122 ++
>  .../selftests/checker-output/test-cppcheck.json    |   50 +
>  .../selftests/checker-output/test-failure.json     |   38 +
>  .../selftests/checker-policy/test-policy.json      |    7 +
>  gcc/toplev.c                                       |    9 +
>  libcpp/include/line-map.h                          |    9 +
>  libcpp/line-map.c                                  |   51 +
>  48 files changed, 6001 insertions(+), 55 deletions(-)
>  create mode 100644 checkers/ChangeLog
>  create mode 100644 checkers/Makefile
>  create mode 100755 checkers/always_fails.py
>  create mode 100755 checkers/checker.py
>  create mode 100755 checkers/clang_analyzer.py
>  create mode 100644 checkers/coverity.py
>  create mode 100755 checkers/cppcheck.py
>  create mode 100755 checkers/flawfinder.py
>  create mode 100755 checkers/ianal.py
>  create mode 100755 checkers/splint.py
>  create mode 100644 checkers/test-sources/conditional-leak.c
>  create mode 100644 checkers/test-sources/cpychecker-demo.c
>  create mode 100644 checkers/test-sources/divide-by-zero.c
>  create mode 100644 checkers/test-sources/harmless.c
>  create mode 100644 checkers/test-sources/multiple-1.c
>  create mode 100644 checkers/test-sources/multiple-2.c
>  create mode 100644 checkers/test-sources/out-of-bounds.c
>  create mode 100644 checkers/test-sources/read-through-null.c
>  create mode 100644 checkers/test-sources/return-of-stack-address.c
>  create mode 100644 checkers/test-sources/unconditional-file-leak.c
>  create mode 100644 contrib/get-static-analysis.py
>  create mode 100644 gcc/annobin.cc
>  create mode 100644 gcc/annobin.h
>  create mode 100644 gcc/checkers.cc
>  create mode 100644 gcc/checkers.h
>  create mode 100644 gcc/firehose.cc
>  create mode 100644 gcc/firehose.h
>  create mode 100644 gcc/json.cc
>  create mode 100644 gcc/json.h
>  create mode 100644 gcc/selftest-diagnostic.h
>  create mode 100644 gcc/selftest-input.h
>  create mode 100644
> gcc/testsuite/selftests/checker-output/test-clang-analyzer.json
>  create mode 100644
> gcc/testsuite/selftests/checker-output/test-cppcheck.json
>  create mode 100644
> gcc/testsuite/selftests/checker-output/test-failure.json
>  create mode 100644 gcc/testsuite/selftests/checker-policy/test-policy.json
>
> --
> 1.8.5.3
>
>

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 00/22] RFC: integrated 3rd-party static analysis support
  2017-08-04 21:30 [PATCH 00/22] RFC: integrated 3rd-party static analysis support David Malcolm
                   ` (22 preceding siblings ...)
  2017-08-05  1:00 ` [PATCH 00/22] RFC: integrated 3rd-party static analysis support Eric Gallager
@ 2017-08-06 21:21 ` Martin Sebor
  2017-08-08 17:57 ` Richard Sandiford
  2017-09-01 17:46 ` Jeff Law
  25 siblings, 0 replies; 35+ messages in thread
From: Martin Sebor @ 2017-08-06 21:21 UTC (permalink / raw)
  To: David Malcolm, gcc-patches

On 08/04/2017 04:04 PM, David Malcolm wrote:
> This patch kit clearly isn't ready yet as-is (see e.g. the
> "known unknowns" below), but I'm posting it now in the hope of
> getting early feedback.
>
> Summary
> =======
>
> This patch kit provides an easy way to make integrate 3rd-party static
> analysis tools into gcc, and have them:
> (a) report through gcc's diagnostic subsystem, and
> (b) "watermark" the generated binaries with queryable data on what checkers
>     were run, and what the results were.

I couldn't agree more that static analysis is often done too late,
and that running it before code is checked in is ideal.  At my last
job, running static analysis was required as part of the commit
criteria for every change.  The tooling made sure that each change
was analyzed and defect-free before it was committed and would
reject changes that failed this test.  (A full run on the whole
product would happen on a much less frequent schedule.)

As for this particular project, I'm somewhat divided.  On the one
hand, implementing these kinds of enhancements is an opportunity
to clean up and generalize the existing (internal) design, and
expose latent bugs in the process.  On the other, adding all this
machinery complicates the already complex code base.  Unless it's
fully exercised as part of everyday GCC development it also runs
the risk of bit rotting and further increasing GCC maintenance.

That said, GCC having its own static analyzer (or some other such
tool) that were used as the default implementation with this
feature to fully exercise it during bootstrap would largely
obviate this concern.  It would still require maintenance but
at least it would be fully tested.

As for adding own implementation of JSON (and other components),
to minimize the maintenance costs, I would far prefer to introduce
a dependency on a well-tested and actively maintained third party
library.

Martin

>
> Here's an example showing gcc running a bank of 3rd-party checkers on this
> source file:
>
>   #include <stdlib.h>
>
>   void test ()
>   {
>     void *ptr_1;
>     void *ptr_2;
>
>     ptr_1 = malloc (64);
>     if (!ptr_1)
>       return;
>     ptr_2 = malloc (64);
>     if (!ptr_2)
>       return;
>
>     free (ptr_2);
>     free (ptr_1);
>   }
>
> via a simple command-line:
>
>   $ ./xgcc -B. -c conditional-leak.c -Wrun-analyzers=policy.json
>   conditional-leak.c:13:5: error: Potential leak of memory pointed to by 'ptr_1' [clang-analyzer:Memory leak]
>        return;
>        ^
>   conditional-leak.c:8:11: note: state 1 of 4: Memory is allocated
>      ptr_1 = malloc (64);
>              ^
>   conditional-leak.c:9:7: note: state 2 of 4: Assuming 'ptr_1' is non-null
>      if (!ptr_1)
>          ^
>   conditional-leak.c:12:7: note: state 3 of 4: Assuming 'ptr_2' is null
>      if (!ptr_2)
>          ^
>   conditional-leak.c:13:5: note: state 4 of 4: Potential leak of memory pointed to by 'ptr_1'
>        return;
>        ^
>   conditional-leak.c:13:0: error: Memory leak: ptr_1 [cppcheck:memleak]
>        return;
>
> Of the checkers, clang's static analyzer and cppcheck both identify the
> memory leak; the former also identifies the control flow (the other
> checkers didn't report anything).
>
> The idea is to provide a mechanism to make it easy for developers and
> projects to impose policy on what checkers should be run, and to gate
> the build if certain tests fail.
>
> In this case, the results are treated as hard errors and block the build,
> but policy could allow them to be warnings.
>
> Extensive metadata is captured about what checkers were run, and what
> they emitted, using the "Firehose" interchange format:
>
>   http://firehose.readthedocs.io/en/latest/index.html
>
> In the case where this doesn't block the build, this can be queried via a
>   contrib/get-static-analysis.py
> script, so e.g. you can verify that a setuid binary was indeed compiled
> using all the checkers that you expect it to be.
>
> This can also be used to embed data about the code into the watermark.
> For example, checkers/ianal.py embeds information about "Copyright"
> lines in the source code into the generated binaries, from where it
> can be queried (this example is intended as a proof-of-concept rather
> than as a real license-tracking solution...)
>
>
> Statement of the problem
> ========================
>
> Static analysis is IMHO done too late, if at all: static analysis tools are run
> as an optional extra, "on the side", rather than in developers' normal
> workflow, with some kind of "override the compiler and do extra work" hook,
> which may preclude running more than one analyzer at once.  Analysis results
> are reviewed (if at all) in some kind of on-the-side tool, rather than when the
> code is being edited, or patches being prepared.
>
> It would be better to have an easy way for developers to run analyzer(s)
> as they're doing development, as part of their edit-compile-test cycle
> - analysis problems are reported immediately, and can be acted on
> immediately (e.g. by treating some checker tests as being hard errors).
>
> It would also be good to have a way to run analyzer(s) when packages are
> built, with a variety of precanned policies for analyzers.  For example,
> setuid binaries and network-facing daemons could each be built with a
> higher strictness of checking.
>
> It would also be good to tag binaries with information on what analyzers
> were run, what options they were invoked with, etc.
> Potentially have "dump_file" information from optimization passes stored
> in the metadata also.   Have a tool to query all of this.
>
> This way a distribution can perform a query like:
>
>   "show me all setuid binaries that contain code that wasn't checked
>    with $CHECKER with $TEST set to be a hard error"
>
> Can/should we break the build if there are issues?
> Yes: but have a way to opt-in easily: if the tool is well-integrated with the
>     compiler: e.g.
>         -Wrun-analyzers=/usr/share/analyzers/userspace/network-facing-service
> then upstream developers and packagers can turn on the setting, and see what
> breaks, and fix it naturally within an compile-edit-test cycle
>
> This gives a relatively painless way to opt-in to increasing levels of
> strictness (e.g. by an upstream project, or by an individual developer).
>
> Does this slow the build down?
> Yes: but you can choose which analyzers run, and can choose to turn them off.
> It ought to parallelize well.  I believe users will prefer to turn them on,
> and have builders burn up the extra CPU cycles.
> This may make much more sense for binary distributions (e.g. Fedora, Debian)
> that it does for things like Gentoo.
>
> Example policy files/options might be:
>   -Wrun-analyzers=/usr/share/analyzers/userspace/network-facing-service
>   -Wrun-analyzers=/usr/share/analyzers/userspace/application
>   -Wrun-analyzers=/usr/share/analyzers/userspace/setuid-binary
>   -Wrun-analyzers=/usr/share/analyzers/userspace/default
>   -Wrun-analyzers=/usr/share/analyzers/kernel
>
> or whatnot.
>
> Idea is to provide mechanism, and for the distribution to decide on some
> standard policies.
>
> This may also allow us to sandbox a gcc plugin by running the plugin inside
> another cc1, for plugins that add warnings - if the plugin ICEs, then the main
> cc1 isn't affected (useful for doing mass rebuilds of code using an
> experimental plugin).
>
>
> Known unknowns
> ==============
>
> How does one suppress a specific false-positive site?
> Do we need a pragma for it?  (though pragmas ought to already affect some of
> the underlying checkers...)
>
> Do we really want .json for the policy format?
> If we're expecting users to edit this, we need great error messages,
> and probably support for comments.  Would YAML or somesuch be better?
> Or have them as individual command-line flags, and the policy files are
> "@" files for gcc.
>
> How to mark which checkers are appropriate for which languages?
>
> (etc; see also all the FIXMEs in the code...)
>
>
> Dependencies
> ============
>
> The "checkers" subdirectory uses Python 2 or 3, and has a few Python
> dependencies, including "firehose" and "gccinvocation".
>
>
> How it works
> ============
>
> If enabled, toplev.c starts each of the various checkers from separate
> threads from near the start of toplev.c, so that the checkers run in
> parallel with each other, and with the bulk of cc1.  Near the end of
> toplev.c it waits for each thread to finish, and reads the stdout,
> which is expected to be in Firehose JSON format.  This is then sent
> through the diagnostic subsystem.
>
> Each "checker" is a harness script, which "knows" how to invoke
> the particular 3rd-party tool, and coerce the output from the tool
> into the common JSON format.
>
> Some notes on the data model can be seen here:
>   http://firehose.readthedocs.io/en/latest/data-model.html
> (though that's expressed as Python objects and XML, rather than
> the JSON format).
>
>
> Successfully bootstrapped&regrtested the combination of the patches
> on x86_64-pc-linux-gnu (though the only testcases are selftest based
> unit-tests, rather than DejaGnu tests).
>
>
> Thoughts?
> Dave
>
>
> David Malcolm (22):
>   Expose assert_loceq outside of input.c; add ASSERT_LOCEQ
>   libcpp: add linemap_position_for_file_line_and_column
>   Add JSON implementation
>   Add firehose.h/cc
>   diagnostic.c/h: add support for external tools
>   Makefile.in: hack in -lpthread
>   Add minimal version of Nick Clifton's annobin code
>   Add GNU_BUILD_ATTRIBUTE_STATIC_ANALYSIS to annobin.h
>   Add selftest::read_file (..., FILE *, ...)
>   Add checkers.h/cc
>   Add checkers/test-sources
>   Add -Wrun-analyzers= to common.opt, toplev.c, and invoke.texi
>   Add checkers/checker.py
>   Add checkers/always_fails.py
>   Add checkers/clang_analyzer.py
>   Add checkers/coverity.py
>   Add checkers/cppcheck.py
>   Add checkers/flawfinder.py
>   Add checkers/ianal.py
>   Add checkers/splint.py
>   Add checkers/Makefile
>   Add contrib/get-static-analysis.py
>
>  checkers/ChangeLog                                 |    9 +
>  checkers/Makefile                                  |   23 +
>  checkers/always_fails.py                           |   57 +
>  checkers/checker.py                                |  367 ++++
>  checkers/clang_analyzer.py                         |  145 ++
>  checkers/coverity.py                               |  141 ++
>  checkers/cppcheck.py                               |  138 ++
>  checkers/flawfinder.py                             |  124 ++
>  checkers/ianal.py                                  |   79 +
>  checkers/splint.py                                 |   77 +
>  checkers/test-sources/conditional-leak.c           |   17 +
>  checkers/test-sources/cpychecker-demo.c            |  110 ++
>  checkers/test-sources/divide-by-zero.c             |    4 +
>  checkers/test-sources/harmless.c                   |    9 +
>  checkers/test-sources/multiple-1.c                 |    6 +
>  checkers/test-sources/multiple-2.c                 |    9 +
>  checkers/test-sources/out-of-bounds.c              |    6 +
>  checkers/test-sources/read-through-null.c          |    4 +
>  checkers/test-sources/return-of-stack-address.c    |    6 +
>  checkers/test-sources/unconditional-file-leak.c    |   10 +
>  contrib/get-static-analysis.py                     |   47 +
>  gcc/Makefile.in                                    |    7 +-
>  gcc/annobin.cc                                     |  185 ++
>  gcc/annobin.h                                      |   45 +
>  gcc/checkers.cc                                    |  736 ++++++++
>  gcc/checkers.h                                     |   26 +
>  gcc/common.opt                                     |    4 +
>  gcc/diagnostic-show-locus.c                        |   29 +-
>  gcc/diagnostic.c                                   |   85 +-
>  gcc/diagnostic.h                                   |    5 +
>  gcc/doc/invoke.texi                                |    8 +-
>  gcc/firehose.cc                                    |  709 ++++++++
>  gcc/firehose.h                                     |  199 ++
>  gcc/input.c                                        |   71 +-
>  gcc/json.cc                                        | 1914 ++++++++++++++++++++
>  gcc/json.h                                         |  214 +++
>  gcc/selftest-diagnostic.h                          |   62 +
>  gcc/selftest-input.h                               |   54 +
>  gcc/selftest-run-tests.c                           |    3 +
>  gcc/selftest.c                                     |   16 +-
>  gcc/selftest.h                                     |   10 +
>  .../checker-output/test-clang-analyzer.json        |  122 ++
>  .../selftests/checker-output/test-cppcheck.json    |   50 +
>  .../selftests/checker-output/test-failure.json     |   38 +
>  .../selftests/checker-policy/test-policy.json      |    7 +
>  gcc/toplev.c                                       |    9 +
>  libcpp/include/line-map.h                          |    9 +
>  libcpp/line-map.c                                  |   51 +
>  48 files changed, 6001 insertions(+), 55 deletions(-)
>  create mode 100644 checkers/ChangeLog
>  create mode 100644 checkers/Makefile
>  create mode 100755 checkers/always_fails.py
>  create mode 100755 checkers/checker.py
>  create mode 100755 checkers/clang_analyzer.py
>  create mode 100644 checkers/coverity.py
>  create mode 100755 checkers/cppcheck.py
>  create mode 100755 checkers/flawfinder.py
>  create mode 100755 checkers/ianal.py
>  create mode 100755 checkers/splint.py
>  create mode 100644 checkers/test-sources/conditional-leak.c
>  create mode 100644 checkers/test-sources/cpychecker-demo.c
>  create mode 100644 checkers/test-sources/divide-by-zero.c
>  create mode 100644 checkers/test-sources/harmless.c
>  create mode 100644 checkers/test-sources/multiple-1.c
>  create mode 100644 checkers/test-sources/multiple-2.c
>  create mode 100644 checkers/test-sources/out-of-bounds.c
>  create mode 100644 checkers/test-sources/read-through-null.c
>  create mode 100644 checkers/test-sources/return-of-stack-address.c
>  create mode 100644 checkers/test-sources/unconditional-file-leak.c
>  create mode 100644 contrib/get-static-analysis.py
>  create mode 100644 gcc/annobin.cc
>  create mode 100644 gcc/annobin.h
>  create mode 100644 gcc/checkers.cc
>  create mode 100644 gcc/checkers.h
>  create mode 100644 gcc/firehose.cc
>  create mode 100644 gcc/firehose.h
>  create mode 100644 gcc/json.cc
>  create mode 100644 gcc/json.h
>  create mode 100644 gcc/selftest-diagnostic.h
>  create mode 100644 gcc/selftest-input.h
>  create mode 100644 gcc/testsuite/selftests/checker-output/test-clang-analyzer.json
>  create mode 100644 gcc/testsuite/selftests/checker-output/test-cppcheck.json
>  create mode 100644 gcc/testsuite/selftests/checker-output/test-failure.json
>  create mode 100644 gcc/testsuite/selftests/checker-policy/test-policy.json
>

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 00/22] RFC: integrated 3rd-party static analysis support
  2017-08-05  1:00 ` [PATCH 00/22] RFC: integrated 3rd-party static analysis support Eric Gallager
@ 2017-08-08  0:23   ` David Malcolm
  0 siblings, 0 replies; 35+ messages in thread
From: David Malcolm @ 2017-08-08  0:23 UTC (permalink / raw)
  To: Eric Gallager; +Cc: gcc-patches

On Fri, 2017-08-04 at 21:00 -0400, Eric Gallager wrote:
> On 8/4/17, David Malcolm <dmalcolm@redhat.com> wrote:
> > This patch kit clearly isn't ready yet as-is (see e.g. the
> > "known unknowns" below), but I'm posting it now in the hope of
> > getting early feedback.
> > 
> > Summary
> > =======
> > 
> > This patch kit provides an easy way to make integrate 3rd-party
> > static
> > analysis tools into gcc, and have them:
> > (a) report through gcc's diagnostic subsystem, and
> > (b) "watermark" the generated binaries with queryable data on what
> > checkers
> >     were run, and what the results were.
> > 
> > Here's an example showing gcc running a bank of 3rd-party checkers
> > on this
> > source file:
> > 
> >   #include <stdlib.h>
> > 
> >   void test ()
> >   {
> >     void *ptr_1;
> >     void *ptr_2;
> > 
> >     ptr_1 = malloc (64);
> >     if (!ptr_1)
> >       return;
> >     ptr_2 = malloc (64);
> >     if (!ptr_2)
> >       return;
> > 
> >     free (ptr_2);
> >     free (ptr_1);
> >   }
> > 
> > via a simple command-line:
> > 
> >   $ ./xgcc -B. -c conditional-leak.c -Wrun-analyzers=policy.json
> >   conditional-leak.c:13:5: error: Potential leak of memory pointed
> > to by
> > 'ptr_1' [clang-analyzer:Memory leak]
> >        return;
> >        ^
> >   conditional-leak.c:8:11: note: state 1 of 4: Memory is allocated
> >      ptr_1 = malloc (64);
> >              ^
> >   conditional-leak.c:9:7: note: state 2 of 4: Assuming 'ptr_1' is
> > non-null
> >      if (!ptr_1)
> >          ^
> >   conditional-leak.c:12:7: note: state 3 of 4: Assuming 'ptr_2' is
> > null
> >      if (!ptr_2)
> >          ^
> >   conditional-leak.c:13:5: note: state 4 of 4: Potential leak of
> > memory
> > pointed to by 'ptr_1'
> >        return;
> >        ^
> >   conditional-leak.c:13:0: error: Memory leak: ptr_1
> > [cppcheck:memleak]
> >        return;
> > 
> > Of the checkers, clang's static analyzer and cppcheck both identify
> > the
> > memory leak; the former also identifies the control flow (the other
> > checkers didn't report anything).
> > 
> > The idea is to provide a mechanism to make it easy for developers
> > and
> > projects to impose policy on what checkers should be run, and to
> > gate
> > the build if certain tests fail.
> > 
> > In this case, the results are treated as hard errors and block the
> > build,
> > but policy could allow them to be warnings.
> > 
> > Extensive metadata is captured about what checkers were run, and
> > what
> > they emitted, using the "Firehose" interchange format:
> > 
> >   http://firehose.readthedocs.io/en/latest/index.html
> > 
> > In the case where this doesn't block the build, this can be queried
> > via a
> >   contrib/get-static-analysis.py
> > script, so e.g. you can verify that a setuid binary was indeed
> > compiled
> > using all the checkers that you expect it to be.
> > 
> > This can also be used to embed data about the code into the
> > watermark.
> > For example, checkers/ianal.py embeds information about "Copyright"
> > lines in the source code into the generated binaries, from where it
> > can be queried (this example is intended as a proof-of-concept
> > rather
> > than as a real license-tracking solution...)
> > 
> > 
> > Statement of the problem
> > ========================
> > 
> > Static analysis is IMHO done too late, if at all: static analysis
> > tools are
> > run
> > as an optional extra, "on the side", rather than in developers'
> > normal
> > workflow, with some kind of "override the compiler and do extra
> > work" hook,
> > which may preclude running more than one analyzer at
> > once.  Analysis
> > results
> > are reviewed (if at all) in some kind of on-the-side tool, rather
> > than when
> > the
> > code is being edited, or patches being prepared.
> > 
> > It would be better to have an easy way for developers to run
> > analyzer(s)
> > as they're doing development, as part of their edit-compile-test
> > cycle
> > - analysis problems are reported immediately, and can be acted on
> > immediately (e.g. by treating some checker tests as being hard
> > errors).
> > 
> > It would also be good to have a way to run analyzer(s) when
> > packages are
> > built, with a variety of precanned policies for analyzers.  For
> > example,
> > setuid binaries and network-facing daemons could each be built with
> > a
> > higher strictness of checking.
> > 
> > It would also be good to tag binaries with information on what
> > analyzers
> > were run, what options they were invoked with, etc.
> > Potentially have "dump_file" information from optimization passes
> > stored
> > in the metadata also.   Have a tool to query all of this.
> > 
> > This way a distribution can perform a query like:
> > 
> >   "show me all setuid binaries that contain code that wasn't
> > checked
> >    with $CHECKER with $TEST set to be a hard error"
> > 
> > Can/should we break the build if there are issues?
> > Yes: but have a way to opt-in easily: if the tool is well-
> > integrated with
> > the
> >     compiler: e.g.
> > 
> > -Wrun-analyzers=/usr/share/analyzers/userspace/network-facing-
> > service
> > then upstream developers and packagers can turn on the setting, and
> > see
> > what
> > breaks, and fix it naturally within an compile-edit-test cycle
> > 
> > This gives a relatively painless way to opt-in to increasing levels
> > of
> > strictness (e.g. by an upstream project, or by an individual
> > developer).
> > 
> > Does this slow the build down?
> > Yes: but you can choose which analyzers run, and can choose to turn
> > them
> > off.
> > It ought to parallelize well.  I believe users will prefer to turn
> > them on,
> > and have builders burn up the extra CPU cycles.
> > This may make much more sense for binary distributions (e.g.
> > Fedora,
> > Debian)
> > that it does for things like Gentoo.
> > 
> > Example policy files/options might be:
> >   -Wrun-analyzers=/usr/share/analyzers/userspace/network-facing-
> > service
> >   -Wrun-analyzers=/usr/share/analyzers/userspace/application
> >   -Wrun-analyzers=/usr/share/analyzers/userspace/setuid-binary
> >   -Wrun-analyzers=/usr/share/analyzers/userspace/default
> >   -Wrun-analyzers=/usr/share/analyzers/kernel
> > 
> > or whatnot.
> > 
> > Idea is to provide mechanism, and for the distribution to decide on
> > some
> > standard policies.
> > 
> > This may also allow us to sandbox a gcc plugin by running the
> > plugin inside
> > another cc1, for plugins that add warnings - if the plugin ICEs,
> > then the
> > main
> > cc1 isn't affected (useful for doing mass rebuilds of code using an
> > experimental plugin).
> > 
> > 
> > Known unknowns
> > ==============
> > 
> > How does one suppress a specific false-positive site?
> > Do we need a pragma for it?  (though pragmas ought to already
> > affect some
> > of
> > the underlying checkers...)
> > 
> > Do we really want .json for the policy format?
> > If we're expecting users to edit this, we need great error
> > messages,
> > and probably support for comments.  Would YAML or somesuch be
> > better?
> > Or have them as individual command-line flags, and the policy files
> > are
> > "@" files for gcc.
> > 
> > How to mark which checkers are appropriate for which languages?
> > 
> > (etc; see also all the FIXMEs in the code...)
> > 
> > 
> > Dependencies
> > ============
> > 
> > The "checkers" subdirectory uses Python 2 or 3, and has a few
> > Python
> > dependencies, including "firehose" and "gccinvocation".
> > 
> > 
> > How it works
> > ============
> > 
> > If enabled, toplev.c starts each of the various checkers from
> > separate
> > threads from near the start of toplev.c, so that the checkers run
> > in
> > parallel with each other, and with the bulk of cc1.  Near the end
> > of
> > toplev.c it waits for each thread to finish, and reads the stdout,
> > which is expected to be in Firehose JSON format.  This is then sent
> > through the diagnostic subsystem.
> > 
> > Each "checker" is a harness script, which "knows" how to invoke
> > the particular 3rd-party tool, and coerce the output from the tool
> > into the common JSON format.
> > 
> > Some notes on the data model can be seen here:
> >   http://firehose.readthedocs.io/en/latest/data-model.html
> > (though that's expressed as Python objects and XML, rather than
> > the JSON format).
> > 
> > 
> > Successfully bootstrapped&regrtested the combination of the patches
> > on x86_64-pc-linux-gnu (though the only testcases are selftest
> > based
> > unit-tests, rather than DejaGnu tests).
> > 
> > 
> 
> General questions:
> 
> 1. When bootstrapping, did you try adding the new -Wrun-analyzers to
> the build system to see what it reports for GCC's own source code?
> It'd be worthwhile to do some dogfooding to determine what kind of
> results it produces

I haven't done that yet; the bootstrapping purely exercised the
selftests (and ensured a lack of warnings on the new code).

> 2. Since -Wrun-analyzers is a warning option, can I turn messages
> from
> it into errors with -Werror=run-analyzers? Will it work with #pragma
> GCC diagnostic push/pop?

Neither of those will work as-is.

The option was actually -frun-analyzers until the day before I posted
the kit; changed it to be a "-W" option was a relatively last-minute
thing, and I think I now prefer it to be -f.

I don't like the patch kit's "monolithic" nature of loading a .json
policy file.  I think my preferred approach now is to pass in a -f
option locating a checker script, and to have extra options controlling
it, and for these to be in @ files.  So something like:

$ gcc -c foo.c @some-policy.opt

[etc]

# Policy file can be built up from policies for individual checkers:

$ cat some-policy.opt

@clang-analyzer.opt
@cppcheck.opt

# An "@"-file for one checker:

$ cat clang-analyzer.opt

-frun-checker=/path/to/checker/clang-analyzer.py
  -fchecker-clang-analyzer=error
  -fchecker-clang-analyzer:"something else"=log

or somesuch.  I'd prefer things to default to errors.

I agree that we'd need some kind of interaction with pragmas, for more
fine-grained control over test ids.

Maybe have e.g.:

-frun-checker=/path/to/checker/some-checker.py
  -Wchecker-clang-analyzer=error
  -Wchecker-clang-analyzer:"some specific test"=log
  -Wchecker-clang-analyzer:"some specific test"=ignore

so the option to add a checker is "-f", but then the tests within the
checker are controlled by "-W".

(I'm thinking aloud here).

> 3. Do we care about duplicated warnings between the analyzers and GCC
> at all? I'm just thinking, if I put in a bug report requesting a new
> warning for GCC after -Wrun-analyzers is added, will people just
> close
> it saying "Oh you can already get that with -Wrun-analyzers" or
> something? That would be disappointing.

There's plenty of duplication between e.g. clang-analyzer and cppcheck;
the more tests the better IMHO.

> 4. Along those lines, how responsible exactly will GCC be for issues
> with the analyzers it runs? For example, I know splint (from [20/22])
> at least is pretty buggy, it crashed 5 times on me when running it
> manually over my fork of gdb. Does GCC really want to encourage the
> use of potentially buggy external tools that might lead to issues?

Although the patch kit has 7 checkers, I found that the only Free ones
that seemed useful in my (limited) testing were clang-analyzer and
cppcheck.

My thinking was to provide the mechanism to allow people to run 3rd-
party analysis tools; whether those tools are any good is for the users
to decide.  But yeah, I agree that there's a potential issue with us
shipping "support" for tools that might not be up-to-scratch.

> ...I thought I had a 5th one but I forget... Anyways, I like the idea
> overall! Keep up the good work!

Thanks
Dave

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 00/22] RFC: integrated 3rd-party static analysis support
  2017-08-04 21:30 [PATCH 00/22] RFC: integrated 3rd-party static analysis support David Malcolm
                   ` (23 preceding siblings ...)
  2017-08-06 21:21 ` Martin Sebor
@ 2017-08-08 17:57 ` Richard Sandiford
  2017-09-01 17:46 ` Jeff Law
  25 siblings, 0 replies; 35+ messages in thread
From: Richard Sandiford @ 2017-08-08 17:57 UTC (permalink / raw)
  To: David Malcolm; +Cc: gcc-patches

David Malcolm <dmalcolm@redhat.com> writes:
> This patch kit clearly isn't ready yet as-is (see e.g. the
> "known unknowns" below), but I'm posting it now in the hope of
> getting early feedback.

Looks really useful.  One thing I wasn't sure of from a quick scan was:
what would happen if the input had something like a syntax error?  Would
it suppress the output from the analysers and just print GCC's output,
or would it still print both?

Very minor question in the grand scheme of things, sorry.

Richard

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 00/22] RFC: integrated 3rd-party static analysis support
  2017-08-04 21:30 [PATCH 00/22] RFC: integrated 3rd-party static analysis support David Malcolm
                   ` (24 preceding siblings ...)
  2017-08-08 17:57 ` Richard Sandiford
@ 2017-09-01 17:46 ` Jeff Law
  2017-09-02  2:46   ` Trevor Saunders
  25 siblings, 1 reply; 35+ messages in thread
From: Jeff Law @ 2017-09-01 17:46 UTC (permalink / raw)
  To: David Malcolm, gcc-patches

On 08/04/2017 04:04 PM, David Malcolm wrote:
> This patch kit clearly isn't ready yet as-is (see e.g. the
> "known unknowns" below), but I'm posting it now in the hope of
> getting early feedback.
[ ... ]

> 
> 
> Statement of the problem
> ========================
> 
> Static analysis is IMHO done too late, if at all: static analysis tools are run
> as an optional extra, "on the side", rather than in developers' normal
> workflow, with some kind of "override the compiler and do extra work" hook,
> which may preclude running more than one analyzer at once.  Analysis results
> are reviewed (if at all) in some kind of on-the-side tool, rather than when the
> code is being edited, or patches being prepared.
I'm sure you know my opinions on this stuff.  But for the benefit of the
rest of our readers, I agree, 100% totally on all of this.

For checkers to really be effective, they have to be part of the
standard workflow that we use every day.  Anything else is ultimately a
losing battle.  That's in large part why I continue to support improving
GCC's ability to emit high quality useful warnings about likely
programming errors.

So this raises one very high level question.  By providing this
capability do we undermine further development of GCC's down analysis
capabilities or does it merely allow that development to move to its
most natural place (gcc, llvm/clang, smatch, cppcheck, whatever)
allowing each tool to focus on what it does best?


> 
> It would also be good to tag binaries with information on what analyzers
> were run, what options they were invoked with, etc.
> Potentially have "dump_file" information from optimization passes stored
> in the metadata also.   Have a tool to query all of this.
So as you know this is a real area of interest for Red Hat.  Nick has
been playing in this space with his binary annotation project.  How are
these likely to interact with each other?

[ ... ]

> 
> 
> Known unknowns
> ==============
> 
> How does one suppress a specific false-positive site?
> Do we need a pragma for it?  (though pragmas ought to already affect some of
> the underlying checkers...)
I'm always conflicted on this kind of suppression/marking.  You can
easily end up with a boatload of unmaintainable markers.  But without
them you've got a firehose of useless information.  Sigh.


> 
> 
> Dependencies
> ============
> 
> The "checkers" subdirectory uses Python 2 or 3, and has a few Python
> dependencies, including "firehose" and "gccinvocation".
I'm not sure if there's general buy-in on firehose.  Not sure about
gccinvocation.  So these may need revisiting.

But we certainly need a way to suck in and present information to the
developers.  I'd prefer to re-use existing concepts and code, so JSON
may be the way to go for the interchange format.




JEff

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 01/22] Expose assert_loceq outside of input.c; add ASSERT_LOCEQ
  2017-08-04 21:30 ` [PATCH 01/22] Expose assert_loceq outside of input.c; add ASSERT_LOCEQ David Malcolm
@ 2017-09-01 17:49   ` Jeff Law
  0 siblings, 0 replies; 35+ messages in thread
From: Jeff Law @ 2017-09-01 17:49 UTC (permalink / raw)
  To: David Malcolm, gcc-patches

On 08/04/2017 04:04 PM, David Malcolm wrote:
> gcc/ChangeLog:
> 	* input.c: Include "selftest-input.h".
> 	(selftest::assert_loceq): Remove "static".  Add "report_loc" param
> 	and update assertions to use it.
> 	(selftest::test_accessing_ordinary_linemaps): Use ASSERT_LOCEQ
> 	rather than assert_loceq.
> 	(selftest::test_builtins): Likewise.
> 	* selftest-input.h: New file.
No concerns here.  IMHO this is all probably within an area that I think
you could argue for self-approval.

jeff

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 02/22] libcpp: add linemap_position_for_file_line_and_column
  2017-08-04 21:30 ` [PATCH 02/22] libcpp: add linemap_position_for_file_line_and_column David Malcolm
@ 2017-09-01 17:50   ` Jeff Law
  0 siblings, 0 replies; 35+ messages in thread
From: Jeff Law @ 2017-09-01 17:50 UTC (permalink / raw)
  To: David Malcolm, gcc-patches

On 08/04/2017 04:04 PM, David Malcolm wrote:
> gcc/ChangeLog:
> 	* input.c (selftest::test_making_arbitrary_locations): New function.
> 	(selftest::input_c_tests): Call it.
> 
> libcpp/ChangeLog:
> 	* include/line-map.h (linemap_position_for_file_line_and_column):
> 	New decl.
> 	* line-map.c (linemap_position_for_file_line_and_column): New
> 	function.
SImilarly.  No concerns.  Go with it when it makes sense.

jeff

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 03/22] Add JSON implementation
  2017-08-04 21:30 ` [PATCH 03/22] Add JSON implementation David Malcolm
@ 2017-09-01 17:56   ` Jeff Law
  0 siblings, 0 replies; 35+ messages in thread
From: Jeff Law @ 2017-09-01 17:56 UTC (permalink / raw)
  To: David Malcolm, gcc-patches

On 08/04/2017 04:04 PM, David Malcolm wrote:
> This patch adds support to gcc for reading and writing JSON,
> based on DOM-like trees of json::value instances.
> 
> gcc/ChangeLog:
> 	* Makefile.in (OBJS): Add json.o.
> 	* json.cc: New file.
> 	* json.h: New file.
> 	* selftest-run-tests.c (selftest::run_tests): Call json_cc_tests.
> 	* selftest.h (selftest::json_cc_tests): New decl.
Any chance we can re-use an implementation?  I don't see a lot of value
in providing our own JSON bits.  Is JSON inherently easier/better in
some way over XML or other formats we could be using for the interchange
of data?


jeff

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 06/22] Makefile.in: hack in -lpthread
  2017-08-04 21:38 ` [PATCH 06/22] Makefile.in: hack in -lpthread David Malcolm
@ 2017-09-01 18:13   ` Jeff Law
  0 siblings, 0 replies; 35+ messages in thread
From: Jeff Law @ 2017-09-01 18:13 UTC (permalink / raw)
  To: David Malcolm, gcc-patches

On 08/04/2017 04:04 PM, David Malcolm wrote:
> The checker.cc patch later in the kit can optionally make use of pthread
> if available.
> 
> Doing it properly would involve some configure checks; this patch simply
> hacks in -lpthread into LIB unconditionally for now.
> 
> gcc/ChangeLog:
> 	* Makefile.in (LIB): Hack in -lpthread.
Obviously this would need to be improved before it could move forward.

What's the advantage of firing off a thread vs the usual fork/exec
model?  fork/exec is something we know how to deal with across all the
hosts GCC supports.

jeff

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 07/22] Add minimal version of Nick Clifton's annobin code
  2017-08-04 21:36 ` [PATCH 07/22] Add minimal version of Nick Clifton's annobin code David Malcolm
@ 2017-09-01 18:17   ` Jeff Law
  0 siblings, 0 replies; 35+ messages in thread
From: Jeff Law @ 2017-09-01 18:17 UTC (permalink / raw)
  To: David Malcolm, gcc-patches

On 08/04/2017 04:04 PM, David Malcolm wrote:
> This patch provides a way to "watermark" binaries with
> metadata.  It's used later in the patch kit to watermark
> binaries with static analysis results and metadata.
> 
> See:
>   https://fedoraproject.org/wiki/Toolchain/Watermark
> 
> Note: this is a version of Nick Clifton's "annobin" gcc plugin:
>   https://nickc.fedorapeople.org/
> heavily hacked up by me:
> * removed everything (including plugin support) not needed by
>   later patches in the kit
> * rewritten as an API, rather than as a plugin
> * removed annobin_inform (..., "ICE: ...") calls in favor of
>   gcc_assert.
> * line-wrapped
> * added a annobin_ensure_init to initialize annobin_is_64bit.
> * added #ifndef guard to annobin.h
> 
> It includes the commits:
> * Remove size limit on string passed to annobin_output_string_note
> * Version 2 of spec: Add a GA prefix to all names
So we're really like to have just one annobin that handles Nick's needs
as well as yours.  Having two just seems silly.

The idea behind using a plugin was to allow annobin to continue
development/releases independent of the trunk of GCC.  Ideally we'd be
able to re-use new versions of annobin in older versions of GCC that
have a fairly long lifecycle.

I think you and Nick need to coordinate in this space.

jeff

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 05/22] diagnostic.c/h: add support for external tools
  2017-08-04 21:39 ` [PATCH 05/22] diagnostic.c/h: add support for external tools David Malcolm
@ 2017-09-01 18:18   ` Jeff Law
  0 siblings, 0 replies; 35+ messages in thread
From: Jeff Law @ 2017-09-01 18:18 UTC (permalink / raw)
  To: David Malcolm, gcc-patches

On 08/04/2017 04:04 PM, David Malcolm wrote:
> This patch adds fields "external_tool" and "external_test_id"
> to diagnostic_info, allowing for diagnostics to be marked as
> coming from a 3rd-party tool.
> 
> Instead of printing the pertinent warning flag e.g.:
> 
>   foo.c:10:1: something is wrong [-Wpointer-arith]
> 
> the tool "ID" and (optionally) test ID is printed e.g.:
> 
>   foo.c:10:1: something is wrong [cppcheck:memleak]
> 
> gcc/ChangeLog:
> 	* diagnostic-show-locus.c: Include "selftest-diagnostic.h".
> 	(class selftest::test_diagnostic_context): Move to
> 	selftest-diagnostic.h.
> 	* diagnostic.c: Include "selftest-diagnostic.h".
> 	(diagnostic_info::diagnostic_info): New ctor.
> 	(print_option_information): Handle external_tool and
> 	external_test_id fields of diagnostic_info.
> 	(diagnostic_report_diagnostic): Assert that diagnostic->kind is
> 	not DK_UNSPECIFIED.
> 	(selftest::dummy_option_name_cb): New function.
> 	(selftest::assert_option_information): New function.
> 	(selftest::test_print_option_information): New function.
> 	(selftest::diagnostic_c_tests): Call
> 	selftest::test_print_option_information.
> 	* diagnostic.h (struct diagnostic_info): Add default ctor,
> 	along with new fields "external_tool" and "external_test_id".
> 	* selftest-diagnostic.h: New file.
Seems fairly reasonble and probably in the realm of self-approvable.

jeff

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 00/22] RFC: integrated 3rd-party static analysis support
  2017-09-01 17:46 ` Jeff Law
@ 2017-09-02  2:46   ` Trevor Saunders
  0 siblings, 0 replies; 35+ messages in thread
From: Trevor Saunders @ 2017-09-02  2:46 UTC (permalink / raw)
  To: Jeff Law; +Cc: David Malcolm, gcc-patches

On Fri, Sep 01, 2017 at 11:46:41AM -0600, Jeff Law wrote:
> On 08/04/2017 04:04 PM, David Malcolm wrote:
> > This patch kit clearly isn't ready yet as-is (see e.g. the
> > "known unknowns" below), but I'm posting it now in the hope of
> > getting early feedback.
> [ ... ]
> 
> > 
> > 
> > Statement of the problem
> > ========================
> > 
> > Static analysis is IMHO done too late, if at all: static analysis tools are run
> > as an optional extra, "on the side", rather than in developers' normal
> > workflow, with some kind of "override the compiler and do extra work" hook,
> > which may preclude running more than one analyzer at once.  Analysis results
> > are reviewed (if at all) in some kind of on-the-side tool, rather than when the
> > code is being edited, or patches being prepared.
> I'm sure you know my opinions on this stuff.  But for the benefit of the
> rest of our readers, I agree, 100% totally on all of this.

I more or less agree, though I think arrangements where its run by build
bots and bustage is treated like breaking the build can be reasonable.

> For checkers to really be effective, they have to be part of the
> standard workflow that we use every day.  Anything else is ultimately a
> losing battle.  That's in large part why I continue to support improving
> GCC's ability to emit high quality useful warnings about likely
> programming errors.

I find myself also wondering how much can be done with generic checkers
as opposed to ones specific to a particular project and its style /
idioms.  if nongeneric checkers are important then it should be
important to make it easy for people to write their own checkers.

> So this raises one very high level question.  By providing this
> capability do we undermine further development of GCC's down analysis
> capabilities or does it merely allow that development to move to its
> most natural place (gcc, llvm/clang, smatch, cppcheck, whatever)
> allowing each tool to focus on what it does best?

Given there is already a lot of work going on around llvm/clang checkers
it seems reasonable to expect there would be less insentive to improve
gcc in that area if you could just use the clang based checkers from
gcc.  Of course if that is a good or bad thing is another question.

Most of the static chekers support being invoked on a file or as if
they were a compiler. So it should be easy to write a wrapper script
that runs the checker and then the compiler if you wishh, or have a
build target that does all the static checking.  So its not clear to me how
much users really get out of this work.  In part I wonder if this is
more a social problem to solve than a technical one.

> > Known unknowns
> > ==============
> > 
> > How does one suppress a specific false-positive site?
> > Do we need a pragma for it?  (though pragmas ought to already affect some of
> > the underlying checkers...)
> I'm always conflicted on this kind of suppression/marking.  You can
> easily end up with a boatload of unmaintainable markers.  But without
> them you've got a firehose of useless information.  Sigh.

The other question would be if multiple checkers share a false positive,
how do you nicely disable all of them.  Having 3 different comments /
pragmas to disable the same thing sounds unpleasent.

thanks

Trev

^ permalink raw reply	[flat|nested] 35+ messages in thread

end of thread, other threads:[~2017-09-02  2:46 UTC | newest]

Thread overview: 35+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-04 21:30 [PATCH 00/22] RFC: integrated 3rd-party static analysis support David Malcolm
2017-08-04 21:30 ` [PATCH 02/22] libcpp: add linemap_position_for_file_line_and_column David Malcolm
2017-09-01 17:50   ` Jeff Law
2017-08-04 21:30 ` [PATCH 01/22] Expose assert_loceq outside of input.c; add ASSERT_LOCEQ David Malcolm
2017-09-01 17:49   ` Jeff Law
2017-08-04 21:30 ` [PATCH 03/22] Add JSON implementation David Malcolm
2017-09-01 17:56   ` Jeff Law
2017-08-04 21:36 ` [PATCH 17/22] Add checkers/cppcheck.py David Malcolm
2017-08-04 21:36 ` [PATCH 07/22] Add minimal version of Nick Clifton's annobin code David Malcolm
2017-09-01 18:17   ` Jeff Law
2017-08-04 21:36 ` [PATCH 19/22] Add checkers/ianal.py David Malcolm
2017-08-04 21:36 ` [PATCH 16/22] Add checkers/coverity.py David Malcolm
2017-08-04 21:36 ` [PATCH 10/22] Add checkers.h/cc David Malcolm
2017-08-04 21:36 ` [PATCH 08/22] Add GNU_BUILD_ATTRIBUTE_STATIC_ANALYSIS to annobin.h David Malcolm
2017-08-04 21:36 ` [PATCH 09/22] Add selftest::read_file (..., FILE *, ...) David Malcolm
2017-08-04 21:36 ` [PATCH 22/22] Add contrib/get-static-analysis.py David Malcolm
2017-08-04 21:37 ` [PATCH 04/22] Add firehose.h/cc David Malcolm
2017-08-04 21:37 ` [PATCH 15/22] Add checkers/clang_analyzer.py David Malcolm
2017-08-04 21:37 ` [PATCH 11/22] Add checkers/test-sources David Malcolm
2017-08-04 21:38 ` [PATCH 14/22] Add checkers/always_fails.py David Malcolm
2017-08-04 21:38 ` [PATCH 21/22] Add checkers/Makefile David Malcolm
2017-08-04 21:38 ` [PATCH 06/22] Makefile.in: hack in -lpthread David Malcolm
2017-09-01 18:13   ` Jeff Law
2017-08-04 21:38 ` [PATCH 13/22] Add checkers/checker.py David Malcolm
2017-08-04 21:38 ` [PATCH 12/22] Add -Wrun-analyzers= to common.opt, toplev.c, and invoke.texi David Malcolm
2017-08-04 21:39 ` [PATCH 18/22] Add checkers/flawfinder.py David Malcolm
2017-08-04 21:39 ` [PATCH 05/22] diagnostic.c/h: add support for external tools David Malcolm
2017-09-01 18:18   ` Jeff Law
2017-08-04 21:39 ` [PATCH 20/22] Add checkers/splint.py David Malcolm
2017-08-05  1:00 ` [PATCH 00/22] RFC: integrated 3rd-party static analysis support Eric Gallager
2017-08-08  0:23   ` David Malcolm
2017-08-06 21:21 ` Martin Sebor
2017-08-08 17:57 ` Richard Sandiford
2017-09-01 17:46 ` Jeff Law
2017-09-02  2:46   ` Trevor Saunders

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).