From: Eric Feng <ef2648@columbia.edu>
To: dmalcolm@redhat.com
Cc: gcc@gcc.gnu.org, gcc-patches@gcc.gnu.org,
Eric Feng <ef2648@columbia.edu>
Subject: [PATCH] analyzer: implement symbolic value support for CPython plugin's refcnt checker [PR107646]
Date: Mon, 4 Sep 2023 22:13:34 -0400 [thread overview]
Message-ID: <20230905021334.39124-1-ef2648@columbia.edu> (raw)
In-Reply-To: <aa56c3d9c65032bf6c406dcdd4853afc4e721cf9.camel@redhat.com>
Hi Dave,
Recently I've been working on symbolic value support for the reference
count checker. I've attached a patch for it below; let me know it looks
OK for trunk. Thanks!
Best,
Eric
---
This patch enhances the reference count checker in the CPython plugin by
adding support for symbolic values. Whereas previously we were only able
to check the reference count of PyObject* objects created in the scope
of the function; we are now able to emit diagnostics on reference count
mismatch of objects that were, for example, passed in as a function
parameter.
rc6.c:6:10: warning: expected ‘obj’ to have reference count: N + ‘1’ but ob_refcnt field is N + ‘2’
6 | return obj;
| ^~~
‘create_py_object2’: event 1
|
| 6 | return obj;
| | ^~~
| | |
| | (1) here
|
gcc/testsuite/ChangeLog:
PR analyzer/107646
* gcc.dg/plugin/analyzer_cpython_plugin.c: Support reference count checking
of symbolic values.
* gcc.dg/plugin/cpython-plugin-test-PyList_Append.c: New test.
* gcc.dg/plugin/plugin.exp: New test.
* gcc.dg/plugin/cpython-plugin-test-refcnt.c: New test.
Signed-off-by: Eric Feng <ef2648@columbia.edu>
---
.../gcc.dg/plugin/analyzer_cpython_plugin.c | 133 +++++++++++-------
.../cpython-plugin-test-PyList_Append.c | 21 ++-
.../plugin/cpython-plugin-test-refcnt.c | 18 +++
gcc/testsuite/gcc.dg/plugin/plugin.exp | 1 +
4 files changed, 118 insertions(+), 55 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c
diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
index bf1982e79c3..d7ecd7fce09 100644
--- a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
+++ b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
@@ -314,17 +314,20 @@ public:
{
diagnostic_metadata m;
bool warned;
- // just assuming constants for now
- auto actual_refcnt
- = m_actual_refcnt->dyn_cast_constant_svalue ()->get_constant ();
- auto ob_refcnt = m_ob_refcnt->dyn_cast_constant_svalue ()->get_constant ();
- warned = warning_meta (rich_loc, m, get_controlling_option (),
- "expected %qE to have "
- "reference count: %qE but ob_refcnt field is: %qE",
- m_reg_tree, actual_refcnt, ob_refcnt);
-
- // location_t loc = rich_loc->get_loc ();
- // foo (loc);
+
+ const auto *actual_refcnt_constant
+ = m_actual_refcnt->dyn_cast_constant_svalue ();
+ const auto *ob_refcnt_constant = m_ob_refcnt->dyn_cast_constant_svalue ();
+ if (!actual_refcnt_constant || !ob_refcnt_constant)
+ return false;
+
+ auto actual_refcnt = actual_refcnt_constant->get_constant ();
+ auto ob_refcnt = ob_refcnt_constant->get_constant ();
+ warned = warning_meta (
+ rich_loc, m, get_controlling_option (),
+ "expected %qE to have "
+ "reference count: N + %qE but ob_refcnt field is N + %qE",
+ m_reg_tree, actual_refcnt, ob_refcnt);
return warned;
}
@@ -336,10 +339,6 @@ public:
private:
- void foo(location_t loc) const
- {
- inform(loc, "something is up right here");
- }
const region *m_base_region;
const svalue *m_ob_refcnt;
const svalue *m_actual_refcnt;
@@ -369,6 +368,19 @@ increment_region_refcnt (hash_map<const region *, int> &map, const region *key)
refcnt = existed ? refcnt + 1 : 1;
}
+static const region *
+get_region_from_svalue (const svalue *sval, region_model_manager *mgr)
+{
+ const auto *region_sval = sval->dyn_cast_region_svalue ();
+ if (region_sval)
+ return region_sval->get_pointee ();
+
+ const auto *initial_sval = sval->dyn_cast_initial_svalue ();
+ if (initial_sval)
+ return mgr->get_symbolic_region (initial_sval);
+
+ return nullptr;
+}
/* Recursively fills in region_to_refcnt with the references owned by
pyobj_ptr_sval. */
@@ -381,20 +393,9 @@ count_pyobj_references (const region_model *model,
if (!pyobj_ptr_sval)
return;
- const auto *pyobj_region_sval = pyobj_ptr_sval->dyn_cast_region_svalue ();
- const auto *pyobj_initial_sval = pyobj_ptr_sval->dyn_cast_initial_svalue ();
- if (!pyobj_region_sval && !pyobj_initial_sval)
- return;
-
- // todo: support initial sval (e.g passed in as parameter)
- if (pyobj_initial_sval)
- {
- // increment_region_refcnt (region_to_refcnt,
- // pyobj_initial_sval->get_region ());
- return;
- }
+ region_model_manager *mgr = model->get_manager ();
- const region *pyobj_region = pyobj_region_sval->get_pointee ();
+ const region *pyobj_region = get_region_from_svalue (pyobj_ptr_sval, mgr);
if (!pyobj_region || seen.contains (pyobj_region))
return;
@@ -409,49 +410,75 @@ count_pyobj_references (const region_model *model,
return;
const auto &retval_binding_map = retval_cluster->get_map ();
-
for (const auto &binding : retval_binding_map)
{
- const svalue *binding_sval = binding.second;
- const svalue *unwrapped_sval = binding_sval->unwrap_any_unmergeable ();
- const region *pointee = unwrapped_sval->maybe_get_region ();
-
- if (pointee && pointee->get_kind () == RK_HEAP_ALLOCATED)
+ const svalue *binding_sval = binding.second->unwrap_any_unmergeable ();
+ if (get_region_from_svalue (binding_sval, mgr))
count_pyobj_references (model, region_to_refcnt, binding_sval, seen);
}
}
+static void
+unwrap_any_ob_refcnt_sval (const svalue *&ob_refcnt_sval)
+{
+ if (ob_refcnt_sval->get_kind () != SK_CONSTANT)
+ {
+ auto unwrap_cast = ob_refcnt_sval->maybe_undo_cast ();
+ if (!unwrap_cast)
+ unwrap_cast = ob_refcnt_sval;
+
+ if (unwrap_cast->get_kind () == SK_BINOP)
+ ob_refcnt_sval = unwrap_cast->dyn_cast_binop_svalue ()->get_arg1 ();
+ }
+}
+
+static void
+handle_refcnt_mismatch (const region_model *old_model,
+ const ana::region *curr_region,
+ const svalue *ob_refcnt_sval,
+ const svalue *actual_refcnt_sval,
+ region_model_context *ctxt)
+{
+ region_model_manager *mgr = old_model->get_manager ();
+ const svalue *curr_reg_sval
+ = mgr->get_ptr_svalue (pyobj_ptr_tree, curr_region);
+ tree reg_tree = old_model->get_representative_tree (curr_reg_sval);
+
+ if (!reg_tree)
+ return;
+
+ const auto &eg = ctxt->get_eg ();
+ refcnt_stmt_finder finder (*eg, reg_tree);
+ auto pd = make_unique<refcnt_mismatch> (curr_region, ob_refcnt_sval,
+ actual_refcnt_sval, reg_tree);
+ if (pd && eg)
+ ctxt->warn (std::move (pd), &finder);
+}
+
/* Compare ob_refcnt field vs the actual reference count of a region */
static void
check_refcnt (const region_model *model,
const region_model *old_model,
region_model_context *ctxt,
const hash_map<const ana::region *,
- int>::iterator::reference_pair region_refcnt)
+ int>::iterator::reference_pair ®ion_refcnt)
{
region_model_manager *mgr = model->get_manager ();
const auto &curr_region = region_refcnt.first;
const auto &actual_refcnt = region_refcnt.second;
+
const svalue *ob_refcnt_sval
= retrieve_ob_refcnt_sval (curr_region, model, ctxt);
+ if (!ob_refcnt_sval)
+ return;
+
+ unwrap_any_ob_refcnt_sval (ob_refcnt_sval);
+
const svalue *actual_refcnt_sval = mgr->get_or_create_int_cst (
ob_refcnt_sval->get_type (), actual_refcnt);
-
if (ob_refcnt_sval != actual_refcnt_sval)
- {
- const svalue *curr_reg_sval
- = mgr->get_ptr_svalue (pyobj_ptr_tree, curr_region);
- tree reg_tree = old_model->get_representative_tree (curr_reg_sval);
- if (!reg_tree)
- return;
-
- const auto &eg = ctxt->get_eg ();
- refcnt_stmt_finder finder (*eg, reg_tree);
- auto pd = make_unique<refcnt_mismatch> (curr_region, ob_refcnt_sval,
- actual_refcnt_sval, reg_tree);
- if (pd && eg)
- ctxt->warn (std::move (pd), &finder);
- }
+ handle_refcnt_mismatch (old_model, curr_region, ob_refcnt_sval,
+ actual_refcnt_sval, ctxt);
}
static void
@@ -493,8 +520,6 @@ count_all_references (const region_model *model,
for (const auto &cluster : *model->get_store ())
{
auto curr_region = cluster.first;
- if (curr_region->get_kind () != RK_HEAP_ALLOCATED)
- continue;
increment_region_refcnt (region_to_refcnt, curr_region);
@@ -505,8 +530,8 @@ count_all_references (const region_model *model,
const svalue *unwrapped_sval
= binding_sval->unwrap_any_unmergeable ();
- // if (unwrapped_sval->get_type () != pyobj_ptr_tree)
- // continue;
+ if (unwrapped_sval->get_type () != pyobj_ptr_tree)
+ continue;
const region *pointee = unwrapped_sval->maybe_get_region ();
if (!pointee || pointee->get_kind () != RK_HEAP_ALLOCATED)
diff --git a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c
index e1efd9efda5..46daf2f8975 100644
--- a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c
+++ b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c
@@ -75,4 +75,23 @@ test_PyListAppend_6 ()
PyObject *list = NULL;
PyList_Append(list, item);
return list;
-}
\ No newline at end of file
+}
+
+PyObject *
+test_PyListAppend_7 (PyObject *item)
+{
+ PyObject *list = PyList_New (0);
+ Py_INCREF(item);
+ PyList_Append(list, item);
+ return list;
+ /* { dg-warning "expected 'item' to have reference count" "" { target *-*-* } .-1 } */
+}
+
+PyObject *
+test_PyListAppend_8 (PyObject *item, PyObject *list)
+{
+ Py_INCREF(item);
+ Py_INCREF(item);
+ PyList_Append(list, item);
+ return list;
+}
diff --git a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c
new file mode 100644
index 00000000000..a7f39509d6d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target analyzer } */
+/* { dg-options "-fanalyzer" } */
+/* { dg-require-python-h "" } */
+
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include "../analyzer/analyzer-decls.h"
+
+PyObject *
+test_refcnt_1 (PyObject *obj)
+{
+ Py_INCREF(obj);
+ Py_INCREF(obj);
+ return obj;
+ /* { dg-warning "expected 'obj' to have reference count" "" { target *-*-* } .-1 } */
+}
diff --git a/gcc/testsuite/gcc.dg/plugin/plugin.exp b/gcc/testsuite/gcc.dg/plugin/plugin.exp
index ed72912309c..87862b4ca00 100644
--- a/gcc/testsuite/gcc.dg/plugin/plugin.exp
+++ b/gcc/testsuite/gcc.dg/plugin/plugin.exp
@@ -162,6 +162,7 @@ set plugin_test_list [list \
taint-antipatterns-1.c } \
{ analyzer_cpython_plugin.c \
cpython-plugin-test-no-Python-h.c \
+ cpython-plugin-test-refcnt.c \
cpython-plugin-test-PyList_Append.c \
cpython-plugin-test-PyList_New.c \
cpython-plugin-test-PyLong_FromLong.c } \
--
2.30.2
next parent reply other threads:[~2023-09-05 2:13 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <aa56c3d9c65032bf6c406dcdd4853afc4e721cf9.camel@redhat.com>
2023-09-05 2:13 ` Eric Feng [this message]
2023-09-07 17:28 ` David Malcolm
2023-09-11 2:12 ` Eric Feng
2023-09-11 19:00 ` David Malcolm
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230905021334.39124-1-ef2648@columbia.edu \
--to=ef2648@columbia.edu \
--cc=dmalcolm@redhat.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=gcc@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).