public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] analyzer: enabling Modula-2 Storage/ALLOCATE/DEALLOCATE
@ 2021-04-13  9:52 Gaius Mulley
  2021-04-13 15:12 ` David Malcolm
  0 siblings, 1 reply; 3+ messages in thread
From: Gaius Mulley @ 2021-04-13  9:52 UTC (permalink / raw)
  To: gcc-patches, dmalcolm



Hello David and fellow GCC developers,

[the proposed patches for GCC trunk are at the end of the email]

I've been having way too much fun with your -fanalyzer code and
here are four patches which give the analyzer the ability to
understand the Modula-2 Storage API.

  http://www.nongnu.org/gm2/gm2-libs-isostorage.html
  http://www.nongnu.org/gm2/gm2-libsstorage.html
  http://www.nongnu.org/gm2/gm2-libssysstorage.html

Here it is in action - these short tests have all been added to the
gm2 regression testsuite.  Many are from your C examples - rewritten
in Modula-2.  Ignoring a few poor token position subexpressions from
the front end, which I'm currently improving, it appears to work!

I've rebuilt GCC trunk 10/04/2021 with these patches and no new
regressions are introduced.  I ran a build of: c,c++,go,d,fortran,lto
and checked the before and after regression tests.  The new code is by
default disabled - hence the langhook patches.

While it is true that this is the cart before the horse (the gm2 front
end is still to arrive in the gcc tree).  I thought it might be useful
to post the patches - just in case others might benefit from the code
or point out bugs/flaws in the code!

Anyway thanks for the analyzer - it is great fun reading the code and
addictive trying to improve the accuracy of the error messages.  The
four patches follow after the examples below.

Examples of it in use:

$ cat badlistfree.mod
MODULE badlistfree ;

FROM Storage IMPORT ALLOCATE, DEALLOCATE ;

TYPE
   list = POINTER TO RECORD
                        value: CARDINAL ;
                        next : list ;
                     END ;
VAR
   head: list ;

PROCEDURE badfree (l: list) ;
BEGIN
   DISPOSE (l) ;
   WHILE l^.next # NIL DO
      l := l^.next ;
      DISPOSE (l)
   END
END badfree ;


BEGIN
   NEW (head) ;
   badfree (head) ;
END badlistfree.
$ gm2 -g -c -fanalyzer badlistfree.mod
badlistfree.mod: In function ‘badfree’:
badlistfree.mod:16:24: warning: use after ‘DISPOSE’ of ‘l’ [CWE-416] [-Wanalyzer-use-after-free]
   16 |    WHILE l^.next # NIL DO
      |                        ^~
  ‘badfree’: events 1-2
    |
    |   15 |    DISPOSE (l) ;
    |      |    ^~~~~~~~~~
    |      |    |
    |      |    (1) deallocated here
    |   16 |    WHILE l^.next # NIL DO
    |      |                        ~~
    |      |                        |
    |      |                        (2) use after ‘DISPOSE’ of ‘l’; deallocated at (1)
    |
$ cat disposenoalloc.mod
MODULE disposenoalloc ;

FROM Storage IMPORT ALLOCATE, DEALLOCATE ;
FROM SYSTEM IMPORT ADR ;

TYPE
   list = POINTER TO RECORD
                        value: CARDINAL ;
                        next : list ;
                     END ;
VAR
   head: list ;

BEGIN
   head := ADR (head) ;
   DISPOSE (head)
END disposenoalloc.
$ gm2 -g -c -fanalyzer disposenoalloc.mod
disposenoalloc.mod: In function ‘_M2_disposenoalloc_init’:
disposenoalloc.mod:16:4: warning: ‘DISPOSE’ of ‘head’ which points to memory not on the heap [CWE-590] [-Wanalyzer-free-of-non-heap]
   16 |    DISPOSE (head)
      |    ^~~~~~~~~~~~~
  ‘_M2_disposenoalloc_init’: events 1-3
    |
    |   15 |    head := ADR (head) ;
    |      |                       ^
    |      |                       |
    |      |                       (1) pointer is from here
    |   16 |    DISPOSE (head)
    |      |    ~~~~~~~~~~~~~       
    |      |    |        |
    |      |    |        (2) pointer is from here
    |      |    (3) call to ‘DISPOSE’ here
    |
$ cat testdoubledispose.mod
MODULE testdoubledispose ;

FROM Storage IMPORT ALLOCATE, DEALLOCATE ;

TYPE
   list = POINTER TO RECORD
                        value: CARDINAL ;
                        next : list ;
                     END ;
VAR
   head: list ;
BEGIN
   NEW (head) ;
   DISPOSE (head) ;
   DISPOSE (head) ;
END testdoubledispose.
$ gm2 -g -c -fanalyzer testdoubledispose.mod
testdoubledispose.mod: In function ‘_M2_testdoubledispose_init’:
testdoubledispose.mod:15:4: warning: double-‘DISPOSE’ of ‘head’ [CWE-415] [-Wanalyzer-double-free]
   15 |    DISPOSE (head) ;
      |    ^~~~~~~~~~~~~
  ‘_M2_testdoubledispose_init’: events 1-3
    |
    |   13 |    NEW (head) ;
    |      |    ^~~~~~~~~
    |      |    |
    |      |    (1) allocated here
    |   14 |    DISPOSE (head) ;
    |      |    ~~~~~~~~~~~~~
    |      |    |
    |      |    (2) first ‘DISPOSE’ here
    |   15 |    DISPOSE (head) ;
    |      |    ~~~~~~~~~~~~~
    |      |    |
    |      |    (3) second ‘DISPOSE’ here; first ‘DISPOSE’ was at (2)
    |
$ cat testdoublefree.mod
MODULE testdoublefree ;

FROM libc IMPORT malloc, free ;
FROM SYSTEM IMPORT ADDRESS ;

VAR
   a: ADDRESS ;
BEGIN
   a := malloc (100) ;
   free (a) ;
   free (a)
END testdoublefree.
$ gm2 -g -c -fanalyzer testdoublefree.mod
testdoublefree.mod: In function ‘_M2_testdoublefree_init’:
testdoublefree.mod:11:4: warning: double-‘free’ of ‘a’ [CWE-415] [-Wanalyzer-double-free]
   11 |    free (a)
      |    ^~~~
  ‘_M2_testdoublefree_init’: events 1-3
    |
    |    9 |    a := malloc (100) ;
    |      |         ^~~~~~
    |      |         |
    |      |         (1) allocated here
    |   10 |    free (a) ;
    |      |    ~~~~  
    |      |    |
    |      |    (2) first ‘free’ here
    |   11 |    free (a)
    |      |    ~~~~  
    |      |    |
    |      |    (3) second ‘free’ here; first ‘free’ was at (2)
    |
$ cat useafterdeallocate.mod
MODULE useafterdeallocate ;

FROM Storage IMPORT ALLOCATE, DEALLOCATE ;

TYPE
   ptrType = POINTER TO RECORD
                           foo: CARDINAL ;
                        END ;

VAR
   head: ptrType ;
BEGIN
   NEW (head) ;
   IF head # NIL
   THEN
      head^.foo := 1 ;
      DISPOSE (head) ;
      head^.foo := 2
   END
END useafterdeallocate.
$ gm2 -g -c -fanalyzer useafterdeallocate.mod
useafterdeallocate.mod: In function ‘_M2_useafterdeallocate_init’:
useafterdeallocate.mod:18:17: warning: use after ‘DISPOSE’ of ‘head’ [CWE-416] [-Wanalyzer-use-after-free]
   18 |       head^.foo := 2
      |                 ^~
  ‘_M2_useafterdeallocate_init’: events 1-6
    |
    |   13 |    NEW (head) ;
    |      |    ^~~~~~~~~
    |      |    |
    |      |    (1) allocated here
    |   14 |    IF head # NIL
    |   15 |    THEN
    |      |    ~~~~
    |      |    |
    |      |    (2) assuming ‘head’ is non-NULL
    |      |    (3) following ‘false’ branch...
    |   16 |       head^.foo := 1 ;
    |      |            ~
    |      |            |
    |      |            (4) ...to here
    |   17 |       DISPOSE (head) ;
    |      |       ~~~~~~~~~~~~~
    |      |       |
    |      |       (5) deallocated here
    |   18 |       head^.foo := 2
    |      |                 ~~
    |      |                 |
    |      |                 (6) use after ‘DISPOSE’ of ‘_T30’; deallocated at (5)
    |

and the four file patches:

ChangeLog entries:
==================

* gcc/analyzer/sm-malloc.cc: include langhooks.
(m_storage_deallocate) new field.
(malloc_state_machine::on_deallocator_call) pass_by_reference
parameter added.  Dereference parameter if necessary.
(skip_reference) New function.
(malloc_state_machine::on_allocator_call_pass_by_ref) New method.
(malloc_state_machine::on_stmt) test
lhd_new_dispose_storage_substitution to see if heap allocators
Storage_ALLOCATE or SysStorage_ALLOCATE should be analyzed.  Also
setup heap deallocators Storage_DEALLOCATE and SysStorage_DEALLOCATE.
* langhooks-def.h: (lhd_new_dispose_storage_substitution) New
prototype.  (LANG_HOOKS_NEW_DISPOSE_STORAGE_SUBSTITUTION) New
define which is added to the lang_hooks initializer.
* langhooks.h: (new_dispose_storage_substitution) New
function indirect field for struct lang_hooks.
* gcc/langhooks.c: (lhd_new_dispose_storage_substitution) New default
stub disabling NEW/DISPOSE Storage analysis.

Patches
=======


diff --git a/gcc/analyzer/sm-malloc.cc b/gcc/analyzer/sm-malloc.cc
index 1d5b8601b1f..9f0f544e32c 100644
--- a/gcc/analyzer/sm-malloc.cc
+++ b/gcc/analyzer/sm-malloc.cc
@@ -22,6 +22,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "system.h"
 #include "coretypes.h"
 #include "tree.h"
+#include "langhooks-def.h"
+#include "langhooks.h"
 #include "function.h"
 #include "basic-block.h"
 #include "gimple.h"
@@ -44,6 +46,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "analyzer/region-model.h"
 #include "stringpool.h"
 #include "attribs.h"
+#include "print-tree.h"

 #if ENABLE_ANALYZER

@@ -387,6 +390,7 @@ public:
   standard_deallocator_set m_free;
   standard_deallocator_set m_scalar_delete;
   standard_deallocator_set m_vector_delete;
+  standard_deallocator_set m_storage_deallocate;

   standard_deallocator m_realloc;

@@ -419,13 +423,18 @@ private:
 			    const supernode *node,
 			    const gcall *call,
 			    const deallocator *d,
-			    unsigned argno) const;
+			     unsigned argno,
+			     bool pass_by_reference = false) const;
   void on_realloc_call (sm_context *sm_ctxt,
 			const supernode *node,
 			const gcall *call) const;
   void on_zero_assignment (sm_context *sm_ctxt,
 			   const gimple *stmt,
 			   tree lhs) const;
+  void on_allocator_call_pass_by_ref (sm_context *sm_ctxt,
+				       const gcall *call,
+				       const deallocator_set *deallocators,
+				       bool returns_nonnull = false) const;

   /* A map for consolidating deallocators so that they are
      unique per deallocator FUNCTION_DECL.  */
@@ -1370,6 +1379,7 @@ malloc_state_machine::malloc_state_machine (logger *logger)
   m_free (this, "free", WORDING_FREED),
   m_scalar_delete (this, "delete", WORDING_DELETED),
   m_vector_delete (this, "delete[]", WORDING_DELETED),
+  m_storage_deallocate (this, "DISPOSE", WORDING_DEALLOCATED),
   m_realloc (this, "realloc", WORDING_REALLOCATED)
 {
   gcc_assert (m_start->get_id () == 0);
@@ -1415,7 +1425,7 @@ get_or_create_custom_deallocator_set (tree allocator_fndecl)
     return NULL;

   /* Otherwise, call maybe_create_custom_deallocator_set,
-     memoizing the result.  */
+     memorizing the result.  */
   if (custom_deallocator_set **slot
       = m_custom_deallocator_set_cache.get (allocator_fndecl))
     return *slot;
@@ -1526,6 +1536,15 @@ malloc_state_machine::on_stmt (sm_context *sm_ctxt,
 	    return true;
 	  }

+	 if (lang_hooks.new_dispose_storage_substitution)
+	   /* m2 allocation.  */
+	   if (is_named_call_p (callee_fndecl, "Storage_ALLOCATE", call, 2)
+	       || is_named_call_p (callee_fndecl, "SysStorage_ALLOCATE", call, 2))
+	     {
+	       on_allocator_call_pass_by_ref (sm_ctxt, call, &m_storage_deallocate, false);
+	       return true;
+	     }
+
 	if (is_named_call_p (callee_fndecl, "operator new", call, 1))
 	  on_allocator_call (sm_ctxt, call, &m_scalar_delete);
 	else if (is_named_call_p (callee_fndecl, "operator new []", call, 1))
@@ -1562,6 +1581,16 @@ malloc_state_machine::on_stmt (sm_context *sm_ctxt,
 	    return true;
 	  }

+	 if (lang_hooks.new_dispose_storage_substitution)
+	   /* m2 deallocation.	*/
+	   if (is_named_call_p (callee_fndecl, "Storage_DEALLOCATE", call, 2)
+	       || is_named_call_p (callee_fndecl, "SysStorage_DEALLOCATE", call, 2))
+	     {
+	       on_deallocator_call (sm_ctxt, node, call,
+				    &m_storage_deallocate.m_deallocator, 0, true);
+	       return true;
+	     }
+
 	if (is_named_call_p (callee_fndecl, "realloc", call, 2)
 	    || is_named_call_p (callee_fndecl, "__builtin_realloc", call, 2))
 	  {
@@ -1731,16 +1760,72 @@ malloc_state_machine::on_allocator_call (sm_context *sm_ctxt,
     }
 }

+/* Skips an ADDR_EXPR if seen.	 */
+
+static
+tree
+skip_reference (tree arg)
+{
+  if (TREE_CODE (arg) == ADDR_EXPR)
+      return TREE_OPERAND (arg, 0);
+  return arg;
+}
+
+
+/* Handle allocators which return the value through a pass by reference parameter.  */
+
+void
+malloc_state_machine::on_allocator_call_pass_by_ref (sm_context *sm_ctxt,
+						      const gcall *call,
+						      const deallocator_set *deallocators,
+						      bool returns_nonnull) const
+{
+  if (gimple_call_num_args (call) == 0)
+    return;
+  tree arg = gimple_call_arg (call, 0);
+  if (arg)
+    {
+      /* in Modula-2 the heap allocator API is: ALLOCATE (VAR ADDRESS;
+	  CARDINAL).  So we need to skip the reference or pointer in
+	  the first parameter.	*/
+      tree diag_arg_lvalue = sm_ctxt->get_diagnostic_tree (arg);
+      tree diag_arg_rvalue = skip_reference (diag_arg_lvalue);
+      if (sm_ctxt->get_state (call, diag_arg_rvalue) == m_start)
+	{
+	  sm_ctxt->set_next_state (call, diag_arg_rvalue,
+				   (returns_nonnull
+				    ? deallocators->m_nonnull
+				    : deallocators->m_unchecked));
+	}
+    }
+}
+
 void
 malloc_state_machine::on_deallocator_call (sm_context *sm_ctxt,
 					   const supernode *node,
 					   const gcall *call,
 					   const deallocator *d,
-					   unsigned argno) const
+					    unsigned argno,
+					    bool pass_by_reference) const
 {
   if (argno >= gimple_call_num_args (call))
     return;
   tree arg = gimple_call_arg (call, argno);
+  if (pass_by_reference)
+    {
+      tree diag_arg = sm_ctxt->get_diagnostic_tree (arg);
+      /* in Modula-2 the API is: DEALLOCATE (VAR a: ADDRESS; size:
+	  CARDINAL).  So we need to skip the pointer or reference in
+	  the first parameter.	We also know the pointer is assigned to
+	  NULL.	 In C this could be described as:
+
+	  DEALLOCATE (void **address, unsigned int size)
+	  {
+	     free (*address);
+	     *address = NULL;
+	  }.  */
+      arg = skip_reference (diag_arg);
+    }

   state_t state = sm_ctxt->get_state (call, arg);

@@ -1783,6 +1868,9 @@ malloc_state_machine::on_deallocator_call (sm_context *sm_ctxt,
 					   d->m_name));
       sm_ctxt->set_next_state (call, arg, m_stop);
     }
+  /* in Modula-2 the DEALLOCATE assigns the pointer to NULL.  However
+     we don't do this in the analyzer as it ignores NULL pointers
+     during deallocation.  */
 }

 /* Implementation of realloc(3):
diff --git a/gcc/langhooks-def.h b/gcc/langhooks-def.h
index ae3991c770a..952513c071b 100644
--- a/gcc/langhooks-def.h
+++ b/gcc/langhooks-def.h
@@ -95,6 +95,7 @@ extern const char *lhd_get_substring_location (const substring_loc &,
 extern int lhd_decl_dwarf_attribute (const_tree, int);
 extern int lhd_type_dwarf_attribute (const_tree, int);
 extern void lhd_finalize_early_debug (void);
+extern bool lhd_new_dispose_storage_substitution (void);

 #define LANG_HOOKS_NAME			"GNU unknown"
 #define LANG_HOOKS_IDENTIFIER_SIZE	sizeof (struct lang_identifier)
@@ -147,6 +148,7 @@ extern void lhd_finalize_early_debug (void);
 #define LANG_HOOKS_RUN_LANG_SELFTESTS   lhd_do_nothing
 #define LANG_HOOKS_GET_SUBSTRING_LOCATION lhd_get_substring_location
 #define LANG_HOOKS_FINALIZE_EARLY_DEBUG lhd_finalize_early_debug
+#define LANG_HOOKS_NEW_DISPOSE_STORAGE_SUBSTITUTION lhd_new_dispose_storage_substitution

 /* Attribute hooks.  */
 #define LANG_HOOKS_ATTRIBUTE_TABLE		NULL
@@ -381,7 +383,8 @@ extern void lhd_end_section (void);
   LANG_HOOKS_EMITS_BEGIN_STMT, \
   LANG_HOOKS_RUN_LANG_SELFTESTS, \
   LANG_HOOKS_GET_SUBSTRING_LOCATION, \
-  LANG_HOOKS_FINALIZE_EARLY_DEBUG \
+  LANG_HOOKS_FINALIZE_EARLY_DEBUG, \
+  LANG_HOOKS_NEW_DISPOSE_STORAGE_SUBSTITUTION \
 }

 #endif /* GCC_LANG_HOOKS_DEF_H */
diff --git a/gcc/langhooks.c b/gcc/langhooks.c
index 2354386f7b4..6486c484895 100644
--- a/gcc/langhooks.c
+++ b/gcc/langhooks.c
@@ -896,6 +896,13 @@ lhd_finalize_early_debug (void)
     (*debug_hooks->early_global_decl) (cnode->decl);
 }

+/* Should the analyzer check for NEW/DISPOSE Storage_ALLOCATE/Storage_DEALLOCATE?  */
+
+bool lhd_new_dispose_storage_substitution (void)
+{
+  return false;
+}
+
 /* Returns true if the current lang_hooks represents the GNU C frontend.  */

 bool
diff --git a/gcc/langhooks.h b/gcc/langhooks.h
index 842e605c439..16b368bfbe1 100644
--- a/gcc/langhooks.h
+++ b/gcc/langhooks.h
@@ -611,6 +611,9 @@ struct lang_hooks
   /* Invoked before the early_finish debug hook is invoked.  */
   void (*finalize_early_debug) (void);

+  /* Does the language substitute NEW into ALLOCATE and DISPOSE into DEALLOCATE?  */
+  bool (*new_dispose_storage_substitution) (void);
+
   /* Whenever you add entries here, make sure you adjust langhooks-def.h
      and langhooks.c accordingly.  */
 };

regards,
Gaius

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-04-19 13:13 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-13  9:52 [PATCH] analyzer: enabling Modula-2 Storage/ALLOCATE/DEALLOCATE Gaius Mulley
2021-04-13 15:12 ` David Malcolm
2021-04-19 13:13   ` Gaius Mulley

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).