* [PATCH] analyzer: implement symbolic value support for CPython plugin's refcnt checker [PR107646]
[not found] <aa56c3d9c65032bf6c406dcdd4853afc4e721cf9.camel@redhat.com>
@ 2023-09-05 2:13 ` Eric Feng
2023-09-07 17:28 ` David Malcolm
0 siblings, 1 reply; 4+ messages in thread
From: Eric Feng @ 2023-09-05 2:13 UTC (permalink / raw)
To: dmalcolm; +Cc: gcc, gcc-patches, Eric Feng
Hi Dave,
Recently I've been working on symbolic value support for the reference
count checker. I've attached a patch for it below; let me know it looks
OK for trunk. Thanks!
Best,
Eric
---
This patch enhances the reference count checker in the CPython plugin by
adding support for symbolic values. Whereas previously we were only able
to check the reference count of PyObject* objects created in the scope
of the function; we are now able to emit diagnostics on reference count
mismatch of objects that were, for example, passed in as a function
parameter.
rc6.c:6:10: warning: expected ‘obj’ to have reference count: N + ‘1’ but ob_refcnt field is N + ‘2’
6 | return obj;
| ^~~
‘create_py_object2’: event 1
|
| 6 | return obj;
| | ^~~
| | |
| | (1) here
|
gcc/testsuite/ChangeLog:
PR analyzer/107646
* gcc.dg/plugin/analyzer_cpython_plugin.c: Support reference count checking
of symbolic values.
* gcc.dg/plugin/cpython-plugin-test-PyList_Append.c: New test.
* gcc.dg/plugin/plugin.exp: New test.
* gcc.dg/plugin/cpython-plugin-test-refcnt.c: New test.
Signed-off-by: Eric Feng <ef2648@columbia.edu>
---
.../gcc.dg/plugin/analyzer_cpython_plugin.c | 133 +++++++++++-------
.../cpython-plugin-test-PyList_Append.c | 21 ++-
.../plugin/cpython-plugin-test-refcnt.c | 18 +++
gcc/testsuite/gcc.dg/plugin/plugin.exp | 1 +
4 files changed, 118 insertions(+), 55 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c
diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
index bf1982e79c3..d7ecd7fce09 100644
--- a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
+++ b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
@@ -314,17 +314,20 @@ public:
{
diagnostic_metadata m;
bool warned;
- // just assuming constants for now
- auto actual_refcnt
- = m_actual_refcnt->dyn_cast_constant_svalue ()->get_constant ();
- auto ob_refcnt = m_ob_refcnt->dyn_cast_constant_svalue ()->get_constant ();
- warned = warning_meta (rich_loc, m, get_controlling_option (),
- "expected %qE to have "
- "reference count: %qE but ob_refcnt field is: %qE",
- m_reg_tree, actual_refcnt, ob_refcnt);
-
- // location_t loc = rich_loc->get_loc ();
- // foo (loc);
+
+ const auto *actual_refcnt_constant
+ = m_actual_refcnt->dyn_cast_constant_svalue ();
+ const auto *ob_refcnt_constant = m_ob_refcnt->dyn_cast_constant_svalue ();
+ if (!actual_refcnt_constant || !ob_refcnt_constant)
+ return false;
+
+ auto actual_refcnt = actual_refcnt_constant->get_constant ();
+ auto ob_refcnt = ob_refcnt_constant->get_constant ();
+ warned = warning_meta (
+ rich_loc, m, get_controlling_option (),
+ "expected %qE to have "
+ "reference count: N + %qE but ob_refcnt field is N + %qE",
+ m_reg_tree, actual_refcnt, ob_refcnt);
return warned;
}
@@ -336,10 +339,6 @@ public:
private:
- void foo(location_t loc) const
- {
- inform(loc, "something is up right here");
- }
const region *m_base_region;
const svalue *m_ob_refcnt;
const svalue *m_actual_refcnt;
@@ -369,6 +368,19 @@ increment_region_refcnt (hash_map<const region *, int> &map, const region *key)
refcnt = existed ? refcnt + 1 : 1;
}
+static const region *
+get_region_from_svalue (const svalue *sval, region_model_manager *mgr)
+{
+ const auto *region_sval = sval->dyn_cast_region_svalue ();
+ if (region_sval)
+ return region_sval->get_pointee ();
+
+ const auto *initial_sval = sval->dyn_cast_initial_svalue ();
+ if (initial_sval)
+ return mgr->get_symbolic_region (initial_sval);
+
+ return nullptr;
+}
/* Recursively fills in region_to_refcnt with the references owned by
pyobj_ptr_sval. */
@@ -381,20 +393,9 @@ count_pyobj_references (const region_model *model,
if (!pyobj_ptr_sval)
return;
- const auto *pyobj_region_sval = pyobj_ptr_sval->dyn_cast_region_svalue ();
- const auto *pyobj_initial_sval = pyobj_ptr_sval->dyn_cast_initial_svalue ();
- if (!pyobj_region_sval && !pyobj_initial_sval)
- return;
-
- // todo: support initial sval (e.g passed in as parameter)
- if (pyobj_initial_sval)
- {
- // increment_region_refcnt (region_to_refcnt,
- // pyobj_initial_sval->get_region ());
- return;
- }
+ region_model_manager *mgr = model->get_manager ();
- const region *pyobj_region = pyobj_region_sval->get_pointee ();
+ const region *pyobj_region = get_region_from_svalue (pyobj_ptr_sval, mgr);
if (!pyobj_region || seen.contains (pyobj_region))
return;
@@ -409,49 +410,75 @@ count_pyobj_references (const region_model *model,
return;
const auto &retval_binding_map = retval_cluster->get_map ();
-
for (const auto &binding : retval_binding_map)
{
- const svalue *binding_sval = binding.second;
- const svalue *unwrapped_sval = binding_sval->unwrap_any_unmergeable ();
- const region *pointee = unwrapped_sval->maybe_get_region ();
-
- if (pointee && pointee->get_kind () == RK_HEAP_ALLOCATED)
+ const svalue *binding_sval = binding.second->unwrap_any_unmergeable ();
+ if (get_region_from_svalue (binding_sval, mgr))
count_pyobj_references (model, region_to_refcnt, binding_sval, seen);
}
}
+static void
+unwrap_any_ob_refcnt_sval (const svalue *&ob_refcnt_sval)
+{
+ if (ob_refcnt_sval->get_kind () != SK_CONSTANT)
+ {
+ auto unwrap_cast = ob_refcnt_sval->maybe_undo_cast ();
+ if (!unwrap_cast)
+ unwrap_cast = ob_refcnt_sval;
+
+ if (unwrap_cast->get_kind () == SK_BINOP)
+ ob_refcnt_sval = unwrap_cast->dyn_cast_binop_svalue ()->get_arg1 ();
+ }
+}
+
+static void
+handle_refcnt_mismatch (const region_model *old_model,
+ const ana::region *curr_region,
+ const svalue *ob_refcnt_sval,
+ const svalue *actual_refcnt_sval,
+ region_model_context *ctxt)
+{
+ region_model_manager *mgr = old_model->get_manager ();
+ const svalue *curr_reg_sval
+ = mgr->get_ptr_svalue (pyobj_ptr_tree, curr_region);
+ tree reg_tree = old_model->get_representative_tree (curr_reg_sval);
+
+ if (!reg_tree)
+ return;
+
+ const auto &eg = ctxt->get_eg ();
+ refcnt_stmt_finder finder (*eg, reg_tree);
+ auto pd = make_unique<refcnt_mismatch> (curr_region, ob_refcnt_sval,
+ actual_refcnt_sval, reg_tree);
+ if (pd && eg)
+ ctxt->warn (std::move (pd), &finder);
+}
+
/* Compare ob_refcnt field vs the actual reference count of a region */
static void
check_refcnt (const region_model *model,
const region_model *old_model,
region_model_context *ctxt,
const hash_map<const ana::region *,
- int>::iterator::reference_pair region_refcnt)
+ int>::iterator::reference_pair ®ion_refcnt)
{
region_model_manager *mgr = model->get_manager ();
const auto &curr_region = region_refcnt.first;
const auto &actual_refcnt = region_refcnt.second;
+
const svalue *ob_refcnt_sval
= retrieve_ob_refcnt_sval (curr_region, model, ctxt);
+ if (!ob_refcnt_sval)
+ return;
+
+ unwrap_any_ob_refcnt_sval (ob_refcnt_sval);
+
const svalue *actual_refcnt_sval = mgr->get_or_create_int_cst (
ob_refcnt_sval->get_type (), actual_refcnt);
-
if (ob_refcnt_sval != actual_refcnt_sval)
- {
- const svalue *curr_reg_sval
- = mgr->get_ptr_svalue (pyobj_ptr_tree, curr_region);
- tree reg_tree = old_model->get_representative_tree (curr_reg_sval);
- if (!reg_tree)
- return;
-
- const auto &eg = ctxt->get_eg ();
- refcnt_stmt_finder finder (*eg, reg_tree);
- auto pd = make_unique<refcnt_mismatch> (curr_region, ob_refcnt_sval,
- actual_refcnt_sval, reg_tree);
- if (pd && eg)
- ctxt->warn (std::move (pd), &finder);
- }
+ handle_refcnt_mismatch (old_model, curr_region, ob_refcnt_sval,
+ actual_refcnt_sval, ctxt);
}
static void
@@ -493,8 +520,6 @@ count_all_references (const region_model *model,
for (const auto &cluster : *model->get_store ())
{
auto curr_region = cluster.first;
- if (curr_region->get_kind () != RK_HEAP_ALLOCATED)
- continue;
increment_region_refcnt (region_to_refcnt, curr_region);
@@ -505,8 +530,8 @@ count_all_references (const region_model *model,
const svalue *unwrapped_sval
= binding_sval->unwrap_any_unmergeable ();
- // if (unwrapped_sval->get_type () != pyobj_ptr_tree)
- // continue;
+ if (unwrapped_sval->get_type () != pyobj_ptr_tree)
+ continue;
const region *pointee = unwrapped_sval->maybe_get_region ();
if (!pointee || pointee->get_kind () != RK_HEAP_ALLOCATED)
diff --git a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c
index e1efd9efda5..46daf2f8975 100644
--- a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c
+++ b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c
@@ -75,4 +75,23 @@ test_PyListAppend_6 ()
PyObject *list = NULL;
PyList_Append(list, item);
return list;
-}
\ No newline at end of file
+}
+
+PyObject *
+test_PyListAppend_7 (PyObject *item)
+{
+ PyObject *list = PyList_New (0);
+ Py_INCREF(item);
+ PyList_Append(list, item);
+ return list;
+ /* { dg-warning "expected 'item' to have reference count" "" { target *-*-* } .-1 } */
+}
+
+PyObject *
+test_PyListAppend_8 (PyObject *item, PyObject *list)
+{
+ Py_INCREF(item);
+ Py_INCREF(item);
+ PyList_Append(list, item);
+ return list;
+}
diff --git a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c
new file mode 100644
index 00000000000..a7f39509d6d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target analyzer } */
+/* { dg-options "-fanalyzer" } */
+/* { dg-require-python-h "" } */
+
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include "../analyzer/analyzer-decls.h"
+
+PyObject *
+test_refcnt_1 (PyObject *obj)
+{
+ Py_INCREF(obj);
+ Py_INCREF(obj);
+ return obj;
+ /* { dg-warning "expected 'obj' to have reference count" "" { target *-*-* } .-1 } */
+}
diff --git a/gcc/testsuite/gcc.dg/plugin/plugin.exp b/gcc/testsuite/gcc.dg/plugin/plugin.exp
index ed72912309c..87862b4ca00 100644
--- a/gcc/testsuite/gcc.dg/plugin/plugin.exp
+++ b/gcc/testsuite/gcc.dg/plugin/plugin.exp
@@ -162,6 +162,7 @@ set plugin_test_list [list \
taint-antipatterns-1.c } \
{ analyzer_cpython_plugin.c \
cpython-plugin-test-no-Python-h.c \
+ cpython-plugin-test-refcnt.c \
cpython-plugin-test-PyList_Append.c \
cpython-plugin-test-PyList_New.c \
cpython-plugin-test-PyLong_FromLong.c } \
--
2.30.2
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] analyzer: implement symbolic value support for CPython plugin's refcnt checker [PR107646]
2023-09-05 2:13 ` [PATCH] analyzer: implement symbolic value support for CPython plugin's refcnt checker [PR107646] Eric Feng
@ 2023-09-07 17:28 ` David Malcolm
2023-09-11 2:12 ` Eric Feng
0 siblings, 1 reply; 4+ messages in thread
From: David Malcolm @ 2023-09-07 17:28 UTC (permalink / raw)
To: Eric Feng; +Cc: gcc, gcc-patches
On Mon, 2023-09-04 at 22:13 -0400, Eric Feng wrote:
> Hi Dave,
Hi Eric, thanks for the patch.
>
> Recently I've been working on symbolic value support for the reference
> count checker. I've attached a patch for it below; let me know it looks
> OK for trunk. Thanks!
>
> Best,
> Eric
>
> ---
>
> This patch enhances the reference count checker in the CPython plugin by
> adding support for symbolic values. Whereas previously we were only able
> to check the reference count of PyObject* objects created in the scope
> of the function; we are now able to emit diagnostics on reference count
> mismatch of objects that were, for example, passed in as a function
> parameter.
>
> rc6.c:6:10: warning: expected ‘obj’ to have reference count: N + ‘1’ but ob_refcnt field is N + ‘2’
> 6 | return obj;
> | ^~~
[...snip...]
> create mode 100644 gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c
>
> diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> index bf1982e79c3..d7ecd7fce09 100644
> --- a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> +++ b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> @@ -314,17 +314,20 @@ public:
> {
> diagnostic_metadata m;
> bool warned;
> - // just assuming constants for now
> - auto actual_refcnt
> - = m_actual_refcnt->dyn_cast_constant_svalue ()->get_constant ();
> - auto ob_refcnt = m_ob_refcnt->dyn_cast_constant_svalue ()->get_constant ();
> - warned = warning_meta (rich_loc, m, get_controlling_option (),
> - "expected %qE to have "
> - "reference count: %qE but ob_refcnt field is: %qE",
> - m_reg_tree, actual_refcnt, ob_refcnt);
> -
> - // location_t loc = rich_loc->get_loc ();
> - // foo (loc);
> +
> + const auto *actual_refcnt_constant
> + = m_actual_refcnt->dyn_cast_constant_svalue ();
> + const auto *ob_refcnt_constant = m_ob_refcnt->dyn_cast_constant_svalue ();
> + if (!actual_refcnt_constant || !ob_refcnt_constant)
> + return false;
> +
> + auto actual_refcnt = actual_refcnt_constant->get_constant ();
> + auto ob_refcnt = ob_refcnt_constant->get_constant ();
> + warned = warning_meta (
> + rich_loc, m, get_controlling_option (),
> + "expected %qE to have "
> + "reference count: N + %qE but ob_refcnt field is N + %qE",
> + m_reg_tree, actual_refcnt, ob_refcnt);
> return warned;
I know you're emulating the old behavior I implemented way back in
cpychecker, but I don't like that behavior :(
Specifically, although the patch improves the behavior for symbolic
values, it regresses the precision of wording for the concrete values
case. If we have e.g. a concrete ob_refcnt of 2, whereas we only have
1 pointer, then it's more readable to say:
warning: expected ‘obj’ to have reference count: ‘1’ but ob_refcnt
field is ‘2’
than:
warning: expected ‘obj’ to have reference count: N + ‘1’ but ob_refcnt field is N + ‘2’
...and we shouldn't quote concrete numbers, the message should be:
warning: expected ‘obj’ to have reference count of 1 but ob_refcnt field is 2
or better:
warning: ‘*obj’ is pointed to by 1 pointer but 'ob_refcnt' field is 2
Can you move the unwrapping of the svalue from the tests below into the
emit vfunc? That way the m_actual_refcnt doesn't have to be a
constant_svalue; you could have logic in the emit vfunc to print
readable messages based on what kind of svalue it is.
Rather than 'N', it might be better to say 'initial'; how about:
warning: ‘*obj’ is pointed to by 0 additional pointers but 'ob_refcnt' field has increased by 1
warning: ‘*obj’ is pointed to by 1 additional pointer but 'ob_refcnt' field has increased by 2
warning: ‘*obj’ is pointed to by 1 additional pointer but 'ob_refcnt' field is unchanged
warning: ‘*obj’ is pointed to by 2 additional pointers but 'ob_refcnt' field has decreased by 1
warning: ‘*obj’ is pointed to by 1 fewer pointers but 'ob_refcnt' field is unchanged
and similar?
Maybe have a flag that tracks whether we're talking about a concrete
value that's absolute versus a concrete value that's relative to the
initial value?
[...snip...]
> @@ -369,6 +368,19 @@ increment_region_refcnt (hash_map<const region *, int> &map, const region *key)
> refcnt = existed ? refcnt + 1 : 1;
> }
>
> +static const region *
> +get_region_from_svalue (const svalue *sval, region_model_manager *mgr)
> +{
> + const auto *region_sval = sval->dyn_cast_region_svalue ();
> + if (region_sval)
> + return region_sval->get_pointee ();
> +
> + const auto *initial_sval = sval->dyn_cast_initial_svalue ();
> + if (initial_sval)
> + return mgr->get_symbolic_region (initial_sval);
> +
> + return nullptr;
> +}
This is dereferencing a pointer, right?
Can the caller use region_model::deref_rvalue instead?
[...snip...]
> +static void
> +unwrap_any_ob_refcnt_sval (const svalue *&ob_refcnt_sval)
> +{
> + if (ob_refcnt_sval->get_kind () != SK_CONSTANT)
> + {
> + auto unwrap_cast = ob_refcnt_sval->maybe_undo_cast ();
> + if (!unwrap_cast)
> + unwrap_cast = ob_refcnt_sval;
> +
> + if (unwrap_cast->get_kind () == SK_BINOP)
> + ob_refcnt_sval = unwrap_cast->dyn_cast_binop_svalue ()->get_arg1 ();
This would be better spelled as:
if (const binop_svalue *binop_sval = unwrap_cast->dyn_cast_binop_svalue ())
ob_refcnt_sval = binop_sval->get_arg1 ();
[...snip...]
> /* Compare ob_refcnt field vs the actual reference count of a region */
> static void
> check_refcnt (const region_model *model,
> const region_model *old_model,
> region_model_context *ctxt,
> const hash_map<const ana::region *,
> - int>::iterator::reference_pair region_refcnt)
> + int>::iterator::reference_pair ®ion_refcnt)
Could really use a typedef for
const hash_map<const ana::region *, int>
to simplify this code.
> {
> region_model_manager *mgr = model->get_manager ();
> const auto &curr_region = region_refcnt.first;
> const auto &actual_refcnt = region_refcnt.second;
> +
> const svalue *ob_refcnt_sval
> = retrieve_ob_refcnt_sval (curr_region, model, ctxt);
> + if (!ob_refcnt_sval)
> + return;
> +
> + unwrap_any_ob_refcnt_sval (ob_refcnt_sval);
As noted above, can the diagnostic store the pre-unwrapped
ob_refcnt_sval? Might mean you have to do the unwrapping both here,
and later when displaying the diagnostic. Or (probably best) track
both the original and unwrapped ob_refcnt_sval, and store both in the
pending_diagnostic.
> +
> const svalue *actual_refcnt_sval = mgr->get_or_create_int_cst (
> ob_refcnt_sval->get_type (), actual_refcnt);
> -
> if (ob_refcnt_sval != actual_refcnt_sval)
> - {
> - const svalue *curr_reg_sval
> - = mgr->get_ptr_svalue (pyobj_ptr_tree, curr_region);
> - tree reg_tree = old_model->get_representative_tree (curr_reg_sval);
> - if (!reg_tree)
> - return;
> -
> - const auto &eg = ctxt->get_eg ();
> - refcnt_stmt_finder finder (*eg, reg_tree);
> - auto pd = make_unique<refcnt_mismatch> (curr_region, ob_refcnt_sval,
> - actual_refcnt_sval, reg_tree);
> - if (pd && eg)
> - ctxt->warn (std::move (pd), &finder);
> - }
> + handle_refcnt_mismatch (old_model, curr_region, ob_refcnt_sval,
> + actual_refcnt_sval, ctxt);
> }
>
> static void
> @@ -493,8 +520,6 @@ count_all_references (const region_model *model,
> for (const auto &cluster : *model->get_store ())
> {
> auto curr_region = cluster.first;
> - if (curr_region->get_kind () != RK_HEAP_ALLOCATED)
> - continue;
>
> increment_region_refcnt (region_to_refcnt, curr_region);
>
> @@ -505,8 +530,8 @@ count_all_references (const region_model *model,
>
> const svalue *unwrapped_sval
> = binding_sval->unwrap_any_unmergeable ();
> - // if (unwrapped_sval->get_type () != pyobj_ptr_tree)
> - // continue;
> + if (unwrapped_sval->get_type () != pyobj_ptr_tree)
> + continue;
We'll probably want a smarter test for this, that the type "inherits"
C-style from PyObject (e.g. PyLongObject).
>
> const region *pointee = unwrapped_sval->maybe_get_region ();
> if (!pointee || pointee->get_kind () != RK_HEAP_ALLOCATED)
> diff --git a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c
> index e1efd9efda5..46daf2f8975 100644
> --- a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c
> +++ b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c
> @@ -75,4 +75,23 @@ test_PyListAppend_6 ()
> PyObject *list = NULL;
> PyList_Append(list, item);
> return list;
> -}
> \ No newline at end of file
> +}
> +
> +PyObject *
> +test_PyListAppend_7 (PyObject *item)
> +{
> + PyObject *list = PyList_New (0);
> + Py_INCREF(item);
> + PyList_Append(list, item);
> + return list;
> + /* { dg-warning "expected 'item' to have reference count" "" { target *-*-* } .-1 } */
It would be good if these dg-warning directives regexp contained the
actual and expected counts; I find I can't easily tell what the
intended output is meant to be.
> +}
> +
> +PyObject *
> +test_PyListAppend_8 (PyObject *item, PyObject *list)
> +{
> + Py_INCREF(item);
> + Py_INCREF(item);
> + PyList_Append(list, item);
> + return list;
> +}
Should we complain here about item->ob_refcnt being too high?
> diff --git a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c
> new file mode 100644
> index 00000000000..a7f39509d6d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target analyzer } */
> +/* { dg-options "-fanalyzer" } */
> +/* { dg-require-python-h "" } */
> +
> +
> +#define PY_SSIZE_T_CLEAN
> +#include <Python.h>
> +#include "../analyzer/analyzer-decls.h"
> +
> +PyObject *
> +test_refcnt_1 (PyObject *obj)
> +{
> + Py_INCREF(obj);
> + Py_INCREF(obj);
> + return obj;
> + /* { dg-warning "expected 'obj' to have reference count" "" { target *-*-* } .-1 } */
Likewise, it would be better for the dg-warning directive's expressed
the expected "actual vs expected" values.
[...snip...]
Thanks again for the patch, hope this is constructive
Dave
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] analyzer: implement symbolic value support for CPython plugin's refcnt checker [PR107646]
2023-09-07 17:28 ` David Malcolm
@ 2023-09-11 2:12 ` Eric Feng
2023-09-11 19:00 ` David Malcolm
0 siblings, 1 reply; 4+ messages in thread
From: Eric Feng @ 2023-09-11 2:12 UTC (permalink / raw)
To: David Malcolm; +Cc: gcc, gcc-patches
[-- Attachment #1: Type: text/plain, Size: 12148 bytes --]
On Thu, Sep 7, 2023 at 1:28 PM David Malcolm <dmalcolm@redhat.com> wrote:
> On Mon, 2023-09-04 at 22:13 -0400, Eric Feng wrote:
>
> > Hi Dave,
>
> Hi Eric, thanks for the patch.
>
> >
> > Recently I've been working on symbolic value support for the reference
> > count checker. I've attached a patch for it below; let me know it looks
> > OK for trunk. Thanks!
> >
> > Best,
> > Eric
> >
> > ---
> >
> > This patch enhances the reference count checker in the CPython plugin by
> > adding support for symbolic values. Whereas previously we were only able
> > to check the reference count of PyObject* objects created in the scope
> > of the function; we are now able to emit diagnostics on reference count
> > mismatch of objects that were, for example, passed in as a function
> > parameter.
> >
> > rc6.c:6:10: warning: expected ‘obj’ to have reference count: N + ‘1’ but
> ob_refcnt field is N + ‘2’
> > 6 | return obj;
> > | ^~~
>
> [...snip...]
>
> > create mode 100644
> gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c
> >
> > diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> > index bf1982e79c3..d7ecd7fce09 100644
> > --- a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> > +++ b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> > @@ -314,17 +314,20 @@ public:
> > {
> > diagnostic_metadata m;
> > bool warned;
> > - // just assuming constants for now
> > - auto actual_refcnt
> > - = m_actual_refcnt->dyn_cast_constant_svalue ()->get_constant ();
> > - auto ob_refcnt = m_ob_refcnt->dyn_cast_constant_svalue
> ()->get_constant ();
> > - warned = warning_meta (rich_loc, m, get_controlling_option (),
> > - "expected %qE to have "
> > - "reference count: %qE but ob_refcnt field is:
> %qE",
> > - m_reg_tree, actual_refcnt, ob_refcnt);
> > -
> > - // location_t loc = rich_loc->get_loc ();
> > - // foo (loc);
> > +
> > + const auto *actual_refcnt_constant
> > + = m_actual_refcnt->dyn_cast_constant_svalue ();
> > + const auto *ob_refcnt_constant =
> m_ob_refcnt->dyn_cast_constant_svalue ();
> > + if (!actual_refcnt_constant || !ob_refcnt_constant)
> > + return false;
> > +
> > + auto actual_refcnt = actual_refcnt_constant->get_constant ();
> > + auto ob_refcnt = ob_refcnt_constant->get_constant ();
> > + warned = warning_meta (
> > + rich_loc, m, get_controlling_option (),
> > + "expected %qE to have "
> > + "reference count: N + %qE but ob_refcnt field is N + %qE",
> > + m_reg_tree, actual_refcnt, ob_refcnt);
> > return warned;
>
> I know you're emulating the old behavior I implemented way back in
> cpychecker, but I don't like that behavior :(
>
> Specifically, although the patch improves the behavior for symbolic
> values, it regresses the precision of wording for the concrete values
> case. If we have e.g. a concrete ob_refcnt of 2, whereas we only have
> 1 pointer, then it's more readable to say:
>
> warning: expected ‘obj’ to have reference count: ‘1’ but ob_refcnt
> field is ‘2’
>
> than:
>
> warning: expected ‘obj’ to have reference count: N + ‘1’ but ob_refcnt
> field is N + ‘2’
>
> ...and we shouldn't quote concrete numbers, the message should be:
>
> warning: expected ‘obj’ to have reference count of 1 but ob_refcnt field
> is 2
> or better:
>
> warning: ‘*obj’ is pointed to by 1 pointer but 'ob_refcnt' field is 2
>
>
> Can you move the unwrapping of the svalue from the tests below into the
> emit vfunc? That way the m_actual_refcnt doesn't have to be a
> constant_svalue; you could have logic in the emit vfunc to print
> readable messages based on what kind of svalue it is.
>
> Rather than 'N', it might be better to say 'initial'; how about:
>
> warning: ‘*obj’ is pointed to by 0 additional pointers but 'ob_refcnt'
> field has increased by 1
> warning: ‘*obj’ is pointed to by 1 additional pointer but 'ob_refcnt'
> field has increased by 2
> warning: ‘*obj’ is pointed to by 1 additional pointer but 'ob_refcnt'
> field is unchanged
> warning: ‘*obj’ is pointed to by 2 additional pointers but 'ob_refcnt'
> field has decreased by 1
> warning: ‘*obj’ is pointed to by 1 fewer pointers but 'ob_refcnt' field
> is unchanged
>
> and similar?
>
That makes sense to me as well (indeed I was just emulating the old
behavior)! Will experiment and keep you posted on a revised patch with this
in mind. This is somewhat of a minor detail but can we emit ‘*obj’ as
bolded text in the diagnostic message? Currently, I can emit this
(including the asterisk) like so: '*%E'. But unlike using %qE, it doesn't
bold the body of the single quotations. Is this possible?
>
> Maybe have a flag that tracks whether we're talking about a concrete
> value that's absolute versus a concrete value that's relative to the
> initial value?
>
>
> [...snip...]
>
>
> > @@ -369,6 +368,19 @@ increment_region_refcnt (hash_map<const region *,
> int> &map, const region *key)
> > refcnt = existed ? refcnt + 1 : 1;
> > }
> >
> > +static const region *
> > +get_region_from_svalue (const svalue *sval, region_model_manager *mgr)
> > +{
> > + const auto *region_sval = sval->dyn_cast_region_svalue ();
> > + if (region_sval)
> > + return region_sval->get_pointee ();
> > +
> > + const auto *initial_sval = sval->dyn_cast_initial_svalue ();
> > + if (initial_sval)
> > + return mgr->get_symbolic_region (initial_sval);
> > +
> > + return nullptr;
> > +}
>
> This is dereferencing a pointer, right?
>
> Can the caller use region_model::deref_rvalue instead?
>
>
> [...snip...]
>
> > +static void
> > +unwrap_any_ob_refcnt_sval (const svalue *&ob_refcnt_sval)
> > +{
> > + if (ob_refcnt_sval->get_kind () != SK_CONSTANT)
> > + {
> > + auto unwrap_cast = ob_refcnt_sval->maybe_undo_cast ();
> > + if (!unwrap_cast)
> > + unwrap_cast = ob_refcnt_sval;
> > +
> > + if (unwrap_cast->get_kind () == SK_BINOP)
> > + ob_refcnt_sval = unwrap_cast->dyn_cast_binop_svalue ()->get_arg1
> ();
>
> This would be better spelled as:
>
> if (const binop_svalue *binop_sval =
> unwrap_cast->dyn_cast_binop_svalue ())
> ob_refcnt_sval = binop_sval->get_arg1 ();
>
> [...snip...]
>
> > /* Compare ob_refcnt field vs the actual reference count of a region */
> > static void
> > check_refcnt (const region_model *model,
> > const region_model *old_model,
> > region_model_context *ctxt,
> > const hash_map<const ana::region *,
> > - int>::iterator::reference_pair region_refcnt)
> > + int>::iterator::reference_pair ®ion_refcnt)
>
> Could really use a typedef for
> const hash_map<const ana::region *, int>
> to simplify this code.
>
> > {
> > region_model_manager *mgr = model->get_manager ();
> > const auto &curr_region = region_refcnt.first;
> > const auto &actual_refcnt = region_refcnt.second;
> > +
> > const svalue *ob_refcnt_sval
> > = retrieve_ob_refcnt_sval (curr_region, model, ctxt);
> > + if (!ob_refcnt_sval)
> > + return;
> > +
> > + unwrap_any_ob_refcnt_sval (ob_refcnt_sval);
>
> As noted above, can the diagnostic store the pre-unwrapped
> ob_refcnt_sval? Might mean you have to do the unwrapping both here,
> and later when displaying the diagnostic. Or (probably best) track
> both the original and unwrapped ob_refcnt_sval, and store both in the
> pending_diagnostic.
>
> > +
> > const svalue *actual_refcnt_sval = mgr->get_or_create_int_cst (
> > ob_refcnt_sval->get_type (), actual_refcnt);
> > -
> > if (ob_refcnt_sval != actual_refcnt_sval)
> > - {
> > - const svalue *curr_reg_sval
> > - = mgr->get_ptr_svalue (pyobj_ptr_tree, curr_region);
> > - tree reg_tree = old_model->get_representative_tree
> (curr_reg_sval);
> > - if (!reg_tree)
> > - return;
> > -
> > - const auto &eg = ctxt->get_eg ();
> > - refcnt_stmt_finder finder (*eg, reg_tree);
> > - auto pd = make_unique<refcnt_mismatch> (curr_region,
> ob_refcnt_sval,
> > - actual_refcnt_sval,
> reg_tree);
> > - if (pd && eg)
> > - ctxt->warn (std::move (pd), &finder);
> > - }
> > + handle_refcnt_mismatch (old_model, curr_region, ob_refcnt_sval,
> > + actual_refcnt_sval, ctxt);
> > }
> >
> > static void
> > @@ -493,8 +520,6 @@ count_all_references (const region_model *model,
> > for (const auto &cluster : *model->get_store ())
> > {
> > auto curr_region = cluster.first;
> > - if (curr_region->get_kind () != RK_HEAP_ALLOCATED)
> > - continue;
> >
> > increment_region_refcnt (region_to_refcnt, curr_region);
> >
> > @@ -505,8 +530,8 @@ count_all_references (const region_model *model,
> >
> > const svalue *unwrapped_sval
> > = binding_sval->unwrap_any_unmergeable ();
> > - // if (unwrapped_sval->get_type () != pyobj_ptr_tree)
> > - // continue;
> > + if (unwrapped_sval->get_type () != pyobj_ptr_tree)
> > + continue;
>
> We'll probably want a smarter test for this, that the type "inherits"
> C-style from PyObject (e.g. PyLongObject).
>
>
> >
> > const region *pointee = unwrapped_sval->maybe_get_region ();
> > if (!pointee || pointee->get_kind () != RK_HEAP_ALLOCATED)
> > diff --git
> a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c
> b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c
> > index e1efd9efda5..46daf2f8975 100644
> > --- a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c
> > +++ b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c
> > @@ -75,4 +75,23 @@ test_PyListAppend_6 ()
> > PyObject *list = NULL;
> > PyList_Append(list, item);
> > return list;
> > -}
> > \ No newline at end of file
> > +}
> > +
> > +PyObject *
> > +test_PyListAppend_7 (PyObject *item)
> > +{
> > + PyObject *list = PyList_New (0);
> > + Py_INCREF(item);
> > + PyList_Append(list, item);
> > + return list;
> > + /* { dg-warning "expected 'item' to have reference count" "" { target
> *-*-* } .-1 } */
>
> It would be good if these dg-warning directives regexp contained the
> actual and expected counts; I find I can't easily tell what the
> intended output is meant to be.
>
>
> > +}
> > +
> > +PyObject *
> > +test_PyListAppend_8 (PyObject *item, PyObject *list)
> > +{
> > + Py_INCREF(item);
> > + Py_INCREF(item);
> > + PyList_Append(list, item);
> > + return list;
> > +}
>
> Should we complain here about item->ob_refcnt being too high?
>
> > diff --git a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c
> b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c
> > new file mode 100644
> > index 00000000000..a7f39509d6d
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c
> > @@ -0,0 +1,18 @@
> > +/* { dg-do compile } */
> > +/* { dg-require-effective-target analyzer } */
> > +/* { dg-options "-fanalyzer" } */
> > +/* { dg-require-python-h "" } */
> > +
> > +
> > +#define PY_SSIZE_T_CLEAN
> > +#include <Python.h>
> > +#include "../analyzer/analyzer-decls.h"
> > +
> > +PyObject *
> > +test_refcnt_1 (PyObject *obj)
> > +{
> > + Py_INCREF(obj);
> > + Py_INCREF(obj);
> > + return obj;
> > + /* { dg-warning "expected 'obj' to have reference count" "" { target
> *-*-* } .-1 } */
>
> Likewise, it would be better for the dg-warning directive's expressed
> the expected "actual vs expected" values.
>
> [...snip...]
>
> Thanks again for the patch, hope this is constructive
>
> Dave
>
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] analyzer: implement symbolic value support for CPython plugin's refcnt checker [PR107646]
2023-09-11 2:12 ` Eric Feng
@ 2023-09-11 19:00 ` David Malcolm
0 siblings, 0 replies; 4+ messages in thread
From: David Malcolm @ 2023-09-11 19:00 UTC (permalink / raw)
To: Eric Feng; +Cc: gcc, gcc-patches
On Sun, 2023-09-10 at 22:12 -0400, Eric Feng wrote:
> On Thu, Sep 7, 2023 at 1:28 PM David Malcolm <dmalcolm@redhat.com>
> wrote:
>
> > On Mon, 2023-09-04 at 22:13 -0400, Eric Feng wrote:
> >
[...snip...]
> >
> >
> > I know you're emulating the old behavior I implemented way back in
> > cpychecker, but I don't like that behavior :(
> >
> > Specifically, although the patch improves the behavior for symbolic
> > values, it regresses the precision of wording for the concrete
> > values
> > case. If we have e.g. a concrete ob_refcnt of 2, whereas we only
> > have
> > 1 pointer, then it's more readable to say:
> >
> > warning: expected ‘obj’ to have reference count: ‘1’ but
> > ob_refcnt
> > field is ‘2’
> >
> > than:
> >
> > warning: expected ‘obj’ to have reference count: N + ‘1’ but
> > ob_refcnt
> > field is N + ‘2’
> >
> > ...and we shouldn't quote concrete numbers, the message should be:
> >
> > warning: expected ‘obj’ to have reference count of 1 but
> > ob_refcnt field
> > is 2
>
>
> > or better:
> >
> > warning: ‘*obj’ is pointed to by 1 pointer but 'ob_refcnt' field
> > is 2
> >
> >
> > Can you move the unwrapping of the svalue from the tests below into
> > the
> > emit vfunc? That way the m_actual_refcnt doesn't have to be a
> > constant_svalue; you could have logic in the emit vfunc to print
> > readable messages based on what kind of svalue it is.
> >
> > Rather than 'N', it might be better to say 'initial'; how about:
> >
> > warning: ‘*obj’ is pointed to by 0 additional pointers but
> > 'ob_refcnt'
> > field has increased by 1
> > warning: ‘*obj’ is pointed to by 1 additional pointer but
> > 'ob_refcnt'
> > field has increased by 2
> > warning: ‘*obj’ is pointed to by 1 additional pointer but
> > 'ob_refcnt'
> > field is unchanged
> > warning: ‘*obj’ is pointed to by 2 additional pointers but
> > 'ob_refcnt'
> > field has decreased by 1
> > warning: ‘*obj’ is pointed to by 1 fewer pointers but 'ob_refcnt'
> > field
> > is unchanged
> >
> > and similar?
> >
>
> That makes sense to me as well (indeed I was just emulating the old
> behavior)! Will experiment and keep you posted on a revised patch
> with this
> in mind. This is somewhat of a minor detail but can we emit ‘*obj’
> as
> bolded text in the diagnostic message? Currently, I can emit this
> (including the asterisk) like so: '*%E'. But unlike using %qE, it
> doesn't
> bold the body of the single quotations. Is this possible?
Yes.
You could use %< and %> to get the colorized (and localized) quotes
(see pretty-print.cc), but better would probably be to pass a tree for
the *obj, rather than obj. You can make this by building a MEM_REF
tree node wrapping the pointer (you can see an example of this in the
RK_SYMBOLIC case of region_model::get_representative_path_var_1).
Dave
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2023-09-11 19:00 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
[not found] <aa56c3d9c65032bf6c406dcdd4853afc4e721cf9.camel@redhat.com>
2023-09-05 2:13 ` [PATCH] analyzer: implement symbolic value support for CPython plugin's refcnt checker [PR107646] Eric Feng
2023-09-07 17:28 ` David Malcolm
2023-09-11 2:12 ` Eric Feng
2023-09-11 19:00 ` David Malcolm
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).