* [PATCH] analyzer: implement symbolic value support for CPython plugin's refcnt checker [PR107646] [not found] <aa56c3d9c65032bf6c406dcdd4853afc4e721cf9.camel@redhat.com> @ 2023-09-05 2:13 ` Eric Feng 2023-09-07 17:28 ` David Malcolm 0 siblings, 1 reply; 4+ messages in thread From: Eric Feng @ 2023-09-05 2:13 UTC (permalink / raw) To: dmalcolm; +Cc: gcc, gcc-patches, Eric Feng Hi Dave, Recently I've been working on symbolic value support for the reference count checker. I've attached a patch for it below; let me know it looks OK for trunk. Thanks! Best, Eric --- This patch enhances the reference count checker in the CPython plugin by adding support for symbolic values. Whereas previously we were only able to check the reference count of PyObject* objects created in the scope of the function; we are now able to emit diagnostics on reference count mismatch of objects that were, for example, passed in as a function parameter. rc6.c:6:10: warning: expected ‘obj’ to have reference count: N + ‘1’ but ob_refcnt field is N + ‘2’ 6 | return obj; | ^~~ ‘create_py_object2’: event 1 | | 6 | return obj; | | ^~~ | | | | | (1) here | gcc/testsuite/ChangeLog: PR analyzer/107646 * gcc.dg/plugin/analyzer_cpython_plugin.c: Support reference count checking of symbolic values. * gcc.dg/plugin/cpython-plugin-test-PyList_Append.c: New test. * gcc.dg/plugin/plugin.exp: New test. * gcc.dg/plugin/cpython-plugin-test-refcnt.c: New test. Signed-off-by: Eric Feng <ef2648@columbia.edu> --- .../gcc.dg/plugin/analyzer_cpython_plugin.c | 133 +++++++++++------- .../cpython-plugin-test-PyList_Append.c | 21 ++- .../plugin/cpython-plugin-test-refcnt.c | 18 +++ gcc/testsuite/gcc.dg/plugin/plugin.exp | 1 + 4 files changed, 118 insertions(+), 55 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c index bf1982e79c3..d7ecd7fce09 100644 --- a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c +++ b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c @@ -314,17 +314,20 @@ public: { diagnostic_metadata m; bool warned; - // just assuming constants for now - auto actual_refcnt - = m_actual_refcnt->dyn_cast_constant_svalue ()->get_constant (); - auto ob_refcnt = m_ob_refcnt->dyn_cast_constant_svalue ()->get_constant (); - warned = warning_meta (rich_loc, m, get_controlling_option (), - "expected %qE to have " - "reference count: %qE but ob_refcnt field is: %qE", - m_reg_tree, actual_refcnt, ob_refcnt); - - // location_t loc = rich_loc->get_loc (); - // foo (loc); + + const auto *actual_refcnt_constant + = m_actual_refcnt->dyn_cast_constant_svalue (); + const auto *ob_refcnt_constant = m_ob_refcnt->dyn_cast_constant_svalue (); + if (!actual_refcnt_constant || !ob_refcnt_constant) + return false; + + auto actual_refcnt = actual_refcnt_constant->get_constant (); + auto ob_refcnt = ob_refcnt_constant->get_constant (); + warned = warning_meta ( + rich_loc, m, get_controlling_option (), + "expected %qE to have " + "reference count: N + %qE but ob_refcnt field is N + %qE", + m_reg_tree, actual_refcnt, ob_refcnt); return warned; } @@ -336,10 +339,6 @@ public: private: - void foo(location_t loc) const - { - inform(loc, "something is up right here"); - } const region *m_base_region; const svalue *m_ob_refcnt; const svalue *m_actual_refcnt; @@ -369,6 +368,19 @@ increment_region_refcnt (hash_map<const region *, int> &map, const region *key) refcnt = existed ? refcnt + 1 : 1; } +static const region * +get_region_from_svalue (const svalue *sval, region_model_manager *mgr) +{ + const auto *region_sval = sval->dyn_cast_region_svalue (); + if (region_sval) + return region_sval->get_pointee (); + + const auto *initial_sval = sval->dyn_cast_initial_svalue (); + if (initial_sval) + return mgr->get_symbolic_region (initial_sval); + + return nullptr; +} /* Recursively fills in region_to_refcnt with the references owned by pyobj_ptr_sval. */ @@ -381,20 +393,9 @@ count_pyobj_references (const region_model *model, if (!pyobj_ptr_sval) return; - const auto *pyobj_region_sval = pyobj_ptr_sval->dyn_cast_region_svalue (); - const auto *pyobj_initial_sval = pyobj_ptr_sval->dyn_cast_initial_svalue (); - if (!pyobj_region_sval && !pyobj_initial_sval) - return; - - // todo: support initial sval (e.g passed in as parameter) - if (pyobj_initial_sval) - { - // increment_region_refcnt (region_to_refcnt, - // pyobj_initial_sval->get_region ()); - return; - } + region_model_manager *mgr = model->get_manager (); - const region *pyobj_region = pyobj_region_sval->get_pointee (); + const region *pyobj_region = get_region_from_svalue (pyobj_ptr_sval, mgr); if (!pyobj_region || seen.contains (pyobj_region)) return; @@ -409,49 +410,75 @@ count_pyobj_references (const region_model *model, return; const auto &retval_binding_map = retval_cluster->get_map (); - for (const auto &binding : retval_binding_map) { - const svalue *binding_sval = binding.second; - const svalue *unwrapped_sval = binding_sval->unwrap_any_unmergeable (); - const region *pointee = unwrapped_sval->maybe_get_region (); - - if (pointee && pointee->get_kind () == RK_HEAP_ALLOCATED) + const svalue *binding_sval = binding.second->unwrap_any_unmergeable (); + if (get_region_from_svalue (binding_sval, mgr)) count_pyobj_references (model, region_to_refcnt, binding_sval, seen); } } +static void +unwrap_any_ob_refcnt_sval (const svalue *&ob_refcnt_sval) +{ + if (ob_refcnt_sval->get_kind () != SK_CONSTANT) + { + auto unwrap_cast = ob_refcnt_sval->maybe_undo_cast (); + if (!unwrap_cast) + unwrap_cast = ob_refcnt_sval; + + if (unwrap_cast->get_kind () == SK_BINOP) + ob_refcnt_sval = unwrap_cast->dyn_cast_binop_svalue ()->get_arg1 (); + } +} + +static void +handle_refcnt_mismatch (const region_model *old_model, + const ana::region *curr_region, + const svalue *ob_refcnt_sval, + const svalue *actual_refcnt_sval, + region_model_context *ctxt) +{ + region_model_manager *mgr = old_model->get_manager (); + const svalue *curr_reg_sval + = mgr->get_ptr_svalue (pyobj_ptr_tree, curr_region); + tree reg_tree = old_model->get_representative_tree (curr_reg_sval); + + if (!reg_tree) + return; + + const auto &eg = ctxt->get_eg (); + refcnt_stmt_finder finder (*eg, reg_tree); + auto pd = make_unique<refcnt_mismatch> (curr_region, ob_refcnt_sval, + actual_refcnt_sval, reg_tree); + if (pd && eg) + ctxt->warn (std::move (pd), &finder); +} + /* Compare ob_refcnt field vs the actual reference count of a region */ static void check_refcnt (const region_model *model, const region_model *old_model, region_model_context *ctxt, const hash_map<const ana::region *, - int>::iterator::reference_pair region_refcnt) + int>::iterator::reference_pair ®ion_refcnt) { region_model_manager *mgr = model->get_manager (); const auto &curr_region = region_refcnt.first; const auto &actual_refcnt = region_refcnt.second; + const svalue *ob_refcnt_sval = retrieve_ob_refcnt_sval (curr_region, model, ctxt); + if (!ob_refcnt_sval) + return; + + unwrap_any_ob_refcnt_sval (ob_refcnt_sval); + const svalue *actual_refcnt_sval = mgr->get_or_create_int_cst ( ob_refcnt_sval->get_type (), actual_refcnt); - if (ob_refcnt_sval != actual_refcnt_sval) - { - const svalue *curr_reg_sval - = mgr->get_ptr_svalue (pyobj_ptr_tree, curr_region); - tree reg_tree = old_model->get_representative_tree (curr_reg_sval); - if (!reg_tree) - return; - - const auto &eg = ctxt->get_eg (); - refcnt_stmt_finder finder (*eg, reg_tree); - auto pd = make_unique<refcnt_mismatch> (curr_region, ob_refcnt_sval, - actual_refcnt_sval, reg_tree); - if (pd && eg) - ctxt->warn (std::move (pd), &finder); - } + handle_refcnt_mismatch (old_model, curr_region, ob_refcnt_sval, + actual_refcnt_sval, ctxt); } static void @@ -493,8 +520,6 @@ count_all_references (const region_model *model, for (const auto &cluster : *model->get_store ()) { auto curr_region = cluster.first; - if (curr_region->get_kind () != RK_HEAP_ALLOCATED) - continue; increment_region_refcnt (region_to_refcnt, curr_region); @@ -505,8 +530,8 @@ count_all_references (const region_model *model, const svalue *unwrapped_sval = binding_sval->unwrap_any_unmergeable (); - // if (unwrapped_sval->get_type () != pyobj_ptr_tree) - // continue; + if (unwrapped_sval->get_type () != pyobj_ptr_tree) + continue; const region *pointee = unwrapped_sval->maybe_get_region (); if (!pointee || pointee->get_kind () != RK_HEAP_ALLOCATED) diff --git a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c index e1efd9efda5..46daf2f8975 100644 --- a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c +++ b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c @@ -75,4 +75,23 @@ test_PyListAppend_6 () PyObject *list = NULL; PyList_Append(list, item); return list; -} \ No newline at end of file +} + +PyObject * +test_PyListAppend_7 (PyObject *item) +{ + PyObject *list = PyList_New (0); + Py_INCREF(item); + PyList_Append(list, item); + return list; + /* { dg-warning "expected 'item' to have reference count" "" { target *-*-* } .-1 } */ +} + +PyObject * +test_PyListAppend_8 (PyObject *item, PyObject *list) +{ + Py_INCREF(item); + Py_INCREF(item); + PyList_Append(list, item); + return list; +} diff --git a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c new file mode 100644 index 00000000000..a7f39509d6d --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target analyzer } */ +/* { dg-options "-fanalyzer" } */ +/* { dg-require-python-h "" } */ + + +#define PY_SSIZE_T_CLEAN +#include <Python.h> +#include "../analyzer/analyzer-decls.h" + +PyObject * +test_refcnt_1 (PyObject *obj) +{ + Py_INCREF(obj); + Py_INCREF(obj); + return obj; + /* { dg-warning "expected 'obj' to have reference count" "" { target *-*-* } .-1 } */ +} diff --git a/gcc/testsuite/gcc.dg/plugin/plugin.exp b/gcc/testsuite/gcc.dg/plugin/plugin.exp index ed72912309c..87862b4ca00 100644 --- a/gcc/testsuite/gcc.dg/plugin/plugin.exp +++ b/gcc/testsuite/gcc.dg/plugin/plugin.exp @@ -162,6 +162,7 @@ set plugin_test_list [list \ taint-antipatterns-1.c } \ { analyzer_cpython_plugin.c \ cpython-plugin-test-no-Python-h.c \ + cpython-plugin-test-refcnt.c \ cpython-plugin-test-PyList_Append.c \ cpython-plugin-test-PyList_New.c \ cpython-plugin-test-PyLong_FromLong.c } \ -- 2.30.2 ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] analyzer: implement symbolic value support for CPython plugin's refcnt checker [PR107646] 2023-09-05 2:13 ` [PATCH] analyzer: implement symbolic value support for CPython plugin's refcnt checker [PR107646] Eric Feng @ 2023-09-07 17:28 ` David Malcolm 2023-09-11 2:12 ` Eric Feng 0 siblings, 1 reply; 4+ messages in thread From: David Malcolm @ 2023-09-07 17:28 UTC (permalink / raw) To: Eric Feng; +Cc: gcc, gcc-patches On Mon, 2023-09-04 at 22:13 -0400, Eric Feng wrote: > Hi Dave, Hi Eric, thanks for the patch. > > Recently I've been working on symbolic value support for the reference > count checker. I've attached a patch for it below; let me know it looks > OK for trunk. Thanks! > > Best, > Eric > > --- > > This patch enhances the reference count checker in the CPython plugin by > adding support for symbolic values. Whereas previously we were only able > to check the reference count of PyObject* objects created in the scope > of the function; we are now able to emit diagnostics on reference count > mismatch of objects that were, for example, passed in as a function > parameter. > > rc6.c:6:10: warning: expected ‘obj’ to have reference count: N + ‘1’ but ob_refcnt field is N + ‘2’ > 6 | return obj; > | ^~~ [...snip...] > create mode 100644 gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c > > diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c > index bf1982e79c3..d7ecd7fce09 100644 > --- a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c > +++ b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c > @@ -314,17 +314,20 @@ public: > { > diagnostic_metadata m; > bool warned; > - // just assuming constants for now > - auto actual_refcnt > - = m_actual_refcnt->dyn_cast_constant_svalue ()->get_constant (); > - auto ob_refcnt = m_ob_refcnt->dyn_cast_constant_svalue ()->get_constant (); > - warned = warning_meta (rich_loc, m, get_controlling_option (), > - "expected %qE to have " > - "reference count: %qE but ob_refcnt field is: %qE", > - m_reg_tree, actual_refcnt, ob_refcnt); > - > - // location_t loc = rich_loc->get_loc (); > - // foo (loc); > + > + const auto *actual_refcnt_constant > + = m_actual_refcnt->dyn_cast_constant_svalue (); > + const auto *ob_refcnt_constant = m_ob_refcnt->dyn_cast_constant_svalue (); > + if (!actual_refcnt_constant || !ob_refcnt_constant) > + return false; > + > + auto actual_refcnt = actual_refcnt_constant->get_constant (); > + auto ob_refcnt = ob_refcnt_constant->get_constant (); > + warned = warning_meta ( > + rich_loc, m, get_controlling_option (), > + "expected %qE to have " > + "reference count: N + %qE but ob_refcnt field is N + %qE", > + m_reg_tree, actual_refcnt, ob_refcnt); > return warned; I know you're emulating the old behavior I implemented way back in cpychecker, but I don't like that behavior :( Specifically, although the patch improves the behavior for symbolic values, it regresses the precision of wording for the concrete values case. If we have e.g. a concrete ob_refcnt of 2, whereas we only have 1 pointer, then it's more readable to say: warning: expected ‘obj’ to have reference count: ‘1’ but ob_refcnt field is ‘2’ than: warning: expected ‘obj’ to have reference count: N + ‘1’ but ob_refcnt field is N + ‘2’ ...and we shouldn't quote concrete numbers, the message should be: warning: expected ‘obj’ to have reference count of 1 but ob_refcnt field is 2 or better: warning: ‘*obj’ is pointed to by 1 pointer but 'ob_refcnt' field is 2 Can you move the unwrapping of the svalue from the tests below into the emit vfunc? That way the m_actual_refcnt doesn't have to be a constant_svalue; you could have logic in the emit vfunc to print readable messages based on what kind of svalue it is. Rather than 'N', it might be better to say 'initial'; how about: warning: ‘*obj’ is pointed to by 0 additional pointers but 'ob_refcnt' field has increased by 1 warning: ‘*obj’ is pointed to by 1 additional pointer but 'ob_refcnt' field has increased by 2 warning: ‘*obj’ is pointed to by 1 additional pointer but 'ob_refcnt' field is unchanged warning: ‘*obj’ is pointed to by 2 additional pointers but 'ob_refcnt' field has decreased by 1 warning: ‘*obj’ is pointed to by 1 fewer pointers but 'ob_refcnt' field is unchanged and similar? Maybe have a flag that tracks whether we're talking about a concrete value that's absolute versus a concrete value that's relative to the initial value? [...snip...] > @@ -369,6 +368,19 @@ increment_region_refcnt (hash_map<const region *, int> &map, const region *key) > refcnt = existed ? refcnt + 1 : 1; > } > > +static const region * > +get_region_from_svalue (const svalue *sval, region_model_manager *mgr) > +{ > + const auto *region_sval = sval->dyn_cast_region_svalue (); > + if (region_sval) > + return region_sval->get_pointee (); > + > + const auto *initial_sval = sval->dyn_cast_initial_svalue (); > + if (initial_sval) > + return mgr->get_symbolic_region (initial_sval); > + > + return nullptr; > +} This is dereferencing a pointer, right? Can the caller use region_model::deref_rvalue instead? [...snip...] > +static void > +unwrap_any_ob_refcnt_sval (const svalue *&ob_refcnt_sval) > +{ > + if (ob_refcnt_sval->get_kind () != SK_CONSTANT) > + { > + auto unwrap_cast = ob_refcnt_sval->maybe_undo_cast (); > + if (!unwrap_cast) > + unwrap_cast = ob_refcnt_sval; > + > + if (unwrap_cast->get_kind () == SK_BINOP) > + ob_refcnt_sval = unwrap_cast->dyn_cast_binop_svalue ()->get_arg1 (); This would be better spelled as: if (const binop_svalue *binop_sval = unwrap_cast->dyn_cast_binop_svalue ()) ob_refcnt_sval = binop_sval->get_arg1 (); [...snip...] > /* Compare ob_refcnt field vs the actual reference count of a region */ > static void > check_refcnt (const region_model *model, > const region_model *old_model, > region_model_context *ctxt, > const hash_map<const ana::region *, > - int>::iterator::reference_pair region_refcnt) > + int>::iterator::reference_pair ®ion_refcnt) Could really use a typedef for const hash_map<const ana::region *, int> to simplify this code. > { > region_model_manager *mgr = model->get_manager (); > const auto &curr_region = region_refcnt.first; > const auto &actual_refcnt = region_refcnt.second; > + > const svalue *ob_refcnt_sval > = retrieve_ob_refcnt_sval (curr_region, model, ctxt); > + if (!ob_refcnt_sval) > + return; > + > + unwrap_any_ob_refcnt_sval (ob_refcnt_sval); As noted above, can the diagnostic store the pre-unwrapped ob_refcnt_sval? Might mean you have to do the unwrapping both here, and later when displaying the diagnostic. Or (probably best) track both the original and unwrapped ob_refcnt_sval, and store both in the pending_diagnostic. > + > const svalue *actual_refcnt_sval = mgr->get_or_create_int_cst ( > ob_refcnt_sval->get_type (), actual_refcnt); > - > if (ob_refcnt_sval != actual_refcnt_sval) > - { > - const svalue *curr_reg_sval > - = mgr->get_ptr_svalue (pyobj_ptr_tree, curr_region); > - tree reg_tree = old_model->get_representative_tree (curr_reg_sval); > - if (!reg_tree) > - return; > - > - const auto &eg = ctxt->get_eg (); > - refcnt_stmt_finder finder (*eg, reg_tree); > - auto pd = make_unique<refcnt_mismatch> (curr_region, ob_refcnt_sval, > - actual_refcnt_sval, reg_tree); > - if (pd && eg) > - ctxt->warn (std::move (pd), &finder); > - } > + handle_refcnt_mismatch (old_model, curr_region, ob_refcnt_sval, > + actual_refcnt_sval, ctxt); > } > > static void > @@ -493,8 +520,6 @@ count_all_references (const region_model *model, > for (const auto &cluster : *model->get_store ()) > { > auto curr_region = cluster.first; > - if (curr_region->get_kind () != RK_HEAP_ALLOCATED) > - continue; > > increment_region_refcnt (region_to_refcnt, curr_region); > > @@ -505,8 +530,8 @@ count_all_references (const region_model *model, > > const svalue *unwrapped_sval > = binding_sval->unwrap_any_unmergeable (); > - // if (unwrapped_sval->get_type () != pyobj_ptr_tree) > - // continue; > + if (unwrapped_sval->get_type () != pyobj_ptr_tree) > + continue; We'll probably want a smarter test for this, that the type "inherits" C-style from PyObject (e.g. PyLongObject). > > const region *pointee = unwrapped_sval->maybe_get_region (); > if (!pointee || pointee->get_kind () != RK_HEAP_ALLOCATED) > diff --git a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c > index e1efd9efda5..46daf2f8975 100644 > --- a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c > +++ b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c > @@ -75,4 +75,23 @@ test_PyListAppend_6 () > PyObject *list = NULL; > PyList_Append(list, item); > return list; > -} > \ No newline at end of file > +} > + > +PyObject * > +test_PyListAppend_7 (PyObject *item) > +{ > + PyObject *list = PyList_New (0); > + Py_INCREF(item); > + PyList_Append(list, item); > + return list; > + /* { dg-warning "expected 'item' to have reference count" "" { target *-*-* } .-1 } */ It would be good if these dg-warning directives regexp contained the actual and expected counts; I find I can't easily tell what the intended output is meant to be. > +} > + > +PyObject * > +test_PyListAppend_8 (PyObject *item, PyObject *list) > +{ > + Py_INCREF(item); > + Py_INCREF(item); > + PyList_Append(list, item); > + return list; > +} Should we complain here about item->ob_refcnt being too high? > diff --git a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c > new file mode 100644 > index 00000000000..a7f39509d6d > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target analyzer } */ > +/* { dg-options "-fanalyzer" } */ > +/* { dg-require-python-h "" } */ > + > + > +#define PY_SSIZE_T_CLEAN > +#include <Python.h> > +#include "../analyzer/analyzer-decls.h" > + > +PyObject * > +test_refcnt_1 (PyObject *obj) > +{ > + Py_INCREF(obj); > + Py_INCREF(obj); > + return obj; > + /* { dg-warning "expected 'obj' to have reference count" "" { target *-*-* } .-1 } */ Likewise, it would be better for the dg-warning directive's expressed the expected "actual vs expected" values. [...snip...] Thanks again for the patch, hope this is constructive Dave ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] analyzer: implement symbolic value support for CPython plugin's refcnt checker [PR107646] 2023-09-07 17:28 ` David Malcolm @ 2023-09-11 2:12 ` Eric Feng 2023-09-11 19:00 ` David Malcolm 0 siblings, 1 reply; 4+ messages in thread From: Eric Feng @ 2023-09-11 2:12 UTC (permalink / raw) To: David Malcolm; +Cc: gcc, gcc-patches [-- Attachment #1: Type: text/plain, Size: 12148 bytes --] On Thu, Sep 7, 2023 at 1:28 PM David Malcolm <dmalcolm@redhat.com> wrote: > On Mon, 2023-09-04 at 22:13 -0400, Eric Feng wrote: > > > Hi Dave, > > Hi Eric, thanks for the patch. > > > > > Recently I've been working on symbolic value support for the reference > > count checker. I've attached a patch for it below; let me know it looks > > OK for trunk. Thanks! > > > > Best, > > Eric > > > > --- > > > > This patch enhances the reference count checker in the CPython plugin by > > adding support for symbolic values. Whereas previously we were only able > > to check the reference count of PyObject* objects created in the scope > > of the function; we are now able to emit diagnostics on reference count > > mismatch of objects that were, for example, passed in as a function > > parameter. > > > > rc6.c:6:10: warning: expected ‘obj’ to have reference count: N + ‘1’ but > ob_refcnt field is N + ‘2’ > > 6 | return obj; > > | ^~~ > > [...snip...] > > > create mode 100644 > gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c > > > > diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c > b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c > > index bf1982e79c3..d7ecd7fce09 100644 > > --- a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c > > +++ b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c > > @@ -314,17 +314,20 @@ public: > > { > > diagnostic_metadata m; > > bool warned; > > - // just assuming constants for now > > - auto actual_refcnt > > - = m_actual_refcnt->dyn_cast_constant_svalue ()->get_constant (); > > - auto ob_refcnt = m_ob_refcnt->dyn_cast_constant_svalue > ()->get_constant (); > > - warned = warning_meta (rich_loc, m, get_controlling_option (), > > - "expected %qE to have " > > - "reference count: %qE but ob_refcnt field is: > %qE", > > - m_reg_tree, actual_refcnt, ob_refcnt); > > - > > - // location_t loc = rich_loc->get_loc (); > > - // foo (loc); > > + > > + const auto *actual_refcnt_constant > > + = m_actual_refcnt->dyn_cast_constant_svalue (); > > + const auto *ob_refcnt_constant = > m_ob_refcnt->dyn_cast_constant_svalue (); > > + if (!actual_refcnt_constant || !ob_refcnt_constant) > > + return false; > > + > > + auto actual_refcnt = actual_refcnt_constant->get_constant (); > > + auto ob_refcnt = ob_refcnt_constant->get_constant (); > > + warned = warning_meta ( > > + rich_loc, m, get_controlling_option (), > > + "expected %qE to have " > > + "reference count: N + %qE but ob_refcnt field is N + %qE", > > + m_reg_tree, actual_refcnt, ob_refcnt); > > return warned; > > I know you're emulating the old behavior I implemented way back in > cpychecker, but I don't like that behavior :( > > Specifically, although the patch improves the behavior for symbolic > values, it regresses the precision of wording for the concrete values > case. If we have e.g. a concrete ob_refcnt of 2, whereas we only have > 1 pointer, then it's more readable to say: > > warning: expected ‘obj’ to have reference count: ‘1’ but ob_refcnt > field is ‘2’ > > than: > > warning: expected ‘obj’ to have reference count: N + ‘1’ but ob_refcnt > field is N + ‘2’ > > ...and we shouldn't quote concrete numbers, the message should be: > > warning: expected ‘obj’ to have reference count of 1 but ob_refcnt field > is 2 > or better: > > warning: ‘*obj’ is pointed to by 1 pointer but 'ob_refcnt' field is 2 > > > Can you move the unwrapping of the svalue from the tests below into the > emit vfunc? That way the m_actual_refcnt doesn't have to be a > constant_svalue; you could have logic in the emit vfunc to print > readable messages based on what kind of svalue it is. > > Rather than 'N', it might be better to say 'initial'; how about: > > warning: ‘*obj’ is pointed to by 0 additional pointers but 'ob_refcnt' > field has increased by 1 > warning: ‘*obj’ is pointed to by 1 additional pointer but 'ob_refcnt' > field has increased by 2 > warning: ‘*obj’ is pointed to by 1 additional pointer but 'ob_refcnt' > field is unchanged > warning: ‘*obj’ is pointed to by 2 additional pointers but 'ob_refcnt' > field has decreased by 1 > warning: ‘*obj’ is pointed to by 1 fewer pointers but 'ob_refcnt' field > is unchanged > > and similar? > That makes sense to me as well (indeed I was just emulating the old behavior)! Will experiment and keep you posted on a revised patch with this in mind. This is somewhat of a minor detail but can we emit ‘*obj’ as bolded text in the diagnostic message? Currently, I can emit this (including the asterisk) like so: '*%E'. But unlike using %qE, it doesn't bold the body of the single quotations. Is this possible? > > Maybe have a flag that tracks whether we're talking about a concrete > value that's absolute versus a concrete value that's relative to the > initial value? > > > [...snip...] > > > > @@ -369,6 +368,19 @@ increment_region_refcnt (hash_map<const region *, > int> &map, const region *key) > > refcnt = existed ? refcnt + 1 : 1; > > } > > > > +static const region * > > +get_region_from_svalue (const svalue *sval, region_model_manager *mgr) > > +{ > > + const auto *region_sval = sval->dyn_cast_region_svalue (); > > + if (region_sval) > > + return region_sval->get_pointee (); > > + > > + const auto *initial_sval = sval->dyn_cast_initial_svalue (); > > + if (initial_sval) > > + return mgr->get_symbolic_region (initial_sval); > > + > > + return nullptr; > > +} > > This is dereferencing a pointer, right? > > Can the caller use region_model::deref_rvalue instead? > > > [...snip...] > > > +static void > > +unwrap_any_ob_refcnt_sval (const svalue *&ob_refcnt_sval) > > +{ > > + if (ob_refcnt_sval->get_kind () != SK_CONSTANT) > > + { > > + auto unwrap_cast = ob_refcnt_sval->maybe_undo_cast (); > > + if (!unwrap_cast) > > + unwrap_cast = ob_refcnt_sval; > > + > > + if (unwrap_cast->get_kind () == SK_BINOP) > > + ob_refcnt_sval = unwrap_cast->dyn_cast_binop_svalue ()->get_arg1 > (); > > This would be better spelled as: > > if (const binop_svalue *binop_sval = > unwrap_cast->dyn_cast_binop_svalue ()) > ob_refcnt_sval = binop_sval->get_arg1 (); > > [...snip...] > > > /* Compare ob_refcnt field vs the actual reference count of a region */ > > static void > > check_refcnt (const region_model *model, > > const region_model *old_model, > > region_model_context *ctxt, > > const hash_map<const ana::region *, > > - int>::iterator::reference_pair region_refcnt) > > + int>::iterator::reference_pair ®ion_refcnt) > > Could really use a typedef for > const hash_map<const ana::region *, int> > to simplify this code. > > > { > > region_model_manager *mgr = model->get_manager (); > > const auto &curr_region = region_refcnt.first; > > const auto &actual_refcnt = region_refcnt.second; > > + > > const svalue *ob_refcnt_sval > > = retrieve_ob_refcnt_sval (curr_region, model, ctxt); > > + if (!ob_refcnt_sval) > > + return; > > + > > + unwrap_any_ob_refcnt_sval (ob_refcnt_sval); > > As noted above, can the diagnostic store the pre-unwrapped > ob_refcnt_sval? Might mean you have to do the unwrapping both here, > and later when displaying the diagnostic. Or (probably best) track > both the original and unwrapped ob_refcnt_sval, and store both in the > pending_diagnostic. > > > + > > const svalue *actual_refcnt_sval = mgr->get_or_create_int_cst ( > > ob_refcnt_sval->get_type (), actual_refcnt); > > - > > if (ob_refcnt_sval != actual_refcnt_sval) > > - { > > - const svalue *curr_reg_sval > > - = mgr->get_ptr_svalue (pyobj_ptr_tree, curr_region); > > - tree reg_tree = old_model->get_representative_tree > (curr_reg_sval); > > - if (!reg_tree) > > - return; > > - > > - const auto &eg = ctxt->get_eg (); > > - refcnt_stmt_finder finder (*eg, reg_tree); > > - auto pd = make_unique<refcnt_mismatch> (curr_region, > ob_refcnt_sval, > > - actual_refcnt_sval, > reg_tree); > > - if (pd && eg) > > - ctxt->warn (std::move (pd), &finder); > > - } > > + handle_refcnt_mismatch (old_model, curr_region, ob_refcnt_sval, > > + actual_refcnt_sval, ctxt); > > } > > > > static void > > @@ -493,8 +520,6 @@ count_all_references (const region_model *model, > > for (const auto &cluster : *model->get_store ()) > > { > > auto curr_region = cluster.first; > > - if (curr_region->get_kind () != RK_HEAP_ALLOCATED) > > - continue; > > > > increment_region_refcnt (region_to_refcnt, curr_region); > > > > @@ -505,8 +530,8 @@ count_all_references (const region_model *model, > > > > const svalue *unwrapped_sval > > = binding_sval->unwrap_any_unmergeable (); > > - // if (unwrapped_sval->get_type () != pyobj_ptr_tree) > > - // continue; > > + if (unwrapped_sval->get_type () != pyobj_ptr_tree) > > + continue; > > We'll probably want a smarter test for this, that the type "inherits" > C-style from PyObject (e.g. PyLongObject). > > > > > > const region *pointee = unwrapped_sval->maybe_get_region (); > > if (!pointee || pointee->get_kind () != RK_HEAP_ALLOCATED) > > diff --git > a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c > b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c > > index e1efd9efda5..46daf2f8975 100644 > > --- a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c > > +++ b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c > > @@ -75,4 +75,23 @@ test_PyListAppend_6 () > > PyObject *list = NULL; > > PyList_Append(list, item); > > return list; > > -} > > \ No newline at end of file > > +} > > + > > +PyObject * > > +test_PyListAppend_7 (PyObject *item) > > +{ > > + PyObject *list = PyList_New (0); > > + Py_INCREF(item); > > + PyList_Append(list, item); > > + return list; > > + /* { dg-warning "expected 'item' to have reference count" "" { target > *-*-* } .-1 } */ > > It would be good if these dg-warning directives regexp contained the > actual and expected counts; I find I can't easily tell what the > intended output is meant to be. > > > > +} > > + > > +PyObject * > > +test_PyListAppend_8 (PyObject *item, PyObject *list) > > +{ > > + Py_INCREF(item); > > + Py_INCREF(item); > > + PyList_Append(list, item); > > + return list; > > +} > > Should we complain here about item->ob_refcnt being too high? > > > diff --git a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c > b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c > > new file mode 100644 > > index 00000000000..a7f39509d6d > > --- /dev/null > > +++ b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c > > @@ -0,0 +1,18 @@ > > +/* { dg-do compile } */ > > +/* { dg-require-effective-target analyzer } */ > > +/* { dg-options "-fanalyzer" } */ > > +/* { dg-require-python-h "" } */ > > + > > + > > +#define PY_SSIZE_T_CLEAN > > +#include <Python.h> > > +#include "../analyzer/analyzer-decls.h" > > + > > +PyObject * > > +test_refcnt_1 (PyObject *obj) > > +{ > > + Py_INCREF(obj); > > + Py_INCREF(obj); > > + return obj; > > + /* { dg-warning "expected 'obj' to have reference count" "" { target > *-*-* } .-1 } */ > > Likewise, it would be better for the dg-warning directive's expressed > the expected "actual vs expected" values. > > [...snip...] > > Thanks again for the patch, hope this is constructive > > Dave > > ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] analyzer: implement symbolic value support for CPython plugin's refcnt checker [PR107646] 2023-09-11 2:12 ` Eric Feng @ 2023-09-11 19:00 ` David Malcolm 0 siblings, 0 replies; 4+ messages in thread From: David Malcolm @ 2023-09-11 19:00 UTC (permalink / raw) To: Eric Feng; +Cc: gcc, gcc-patches On Sun, 2023-09-10 at 22:12 -0400, Eric Feng wrote: > On Thu, Sep 7, 2023 at 1:28 PM David Malcolm <dmalcolm@redhat.com> > wrote: > > > On Mon, 2023-09-04 at 22:13 -0400, Eric Feng wrote: > > [...snip...] > > > > > > I know you're emulating the old behavior I implemented way back in > > cpychecker, but I don't like that behavior :( > > > > Specifically, although the patch improves the behavior for symbolic > > values, it regresses the precision of wording for the concrete > > values > > case. If we have e.g. a concrete ob_refcnt of 2, whereas we only > > have > > 1 pointer, then it's more readable to say: > > > > warning: expected ‘obj’ to have reference count: ‘1’ but > > ob_refcnt > > field is ‘2’ > > > > than: > > > > warning: expected ‘obj’ to have reference count: N + ‘1’ but > > ob_refcnt > > field is N + ‘2’ > > > > ...and we shouldn't quote concrete numbers, the message should be: > > > > warning: expected ‘obj’ to have reference count of 1 but > > ob_refcnt field > > is 2 > > > > or better: > > > > warning: ‘*obj’ is pointed to by 1 pointer but 'ob_refcnt' field > > is 2 > > > > > > Can you move the unwrapping of the svalue from the tests below into > > the > > emit vfunc? That way the m_actual_refcnt doesn't have to be a > > constant_svalue; you could have logic in the emit vfunc to print > > readable messages based on what kind of svalue it is. > > > > Rather than 'N', it might be better to say 'initial'; how about: > > > > warning: ‘*obj’ is pointed to by 0 additional pointers but > > 'ob_refcnt' > > field has increased by 1 > > warning: ‘*obj’ is pointed to by 1 additional pointer but > > 'ob_refcnt' > > field has increased by 2 > > warning: ‘*obj’ is pointed to by 1 additional pointer but > > 'ob_refcnt' > > field is unchanged > > warning: ‘*obj’ is pointed to by 2 additional pointers but > > 'ob_refcnt' > > field has decreased by 1 > > warning: ‘*obj’ is pointed to by 1 fewer pointers but 'ob_refcnt' > > field > > is unchanged > > > > and similar? > > > > That makes sense to me as well (indeed I was just emulating the old > behavior)! Will experiment and keep you posted on a revised patch > with this > in mind. This is somewhat of a minor detail but can we emit ‘*obj’ > as > bolded text in the diagnostic message? Currently, I can emit this > (including the asterisk) like so: '*%E'. But unlike using %qE, it > doesn't > bold the body of the single quotations. Is this possible? Yes. You could use %< and %> to get the colorized (and localized) quotes (see pretty-print.cc), but better would probably be to pass a tree for the *obj, rather than obj. You can make this by building a MEM_REF tree node wrapping the pointer (you can see an example of this in the RK_SYMBOLIC case of region_model::get_representative_path_var_1). Dave ^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2023-09-11 19:00 UTC | newest] Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- [not found] <aa56c3d9c65032bf6c406dcdd4853afc4e721cf9.camel@redhat.com> 2023-09-05 2:13 ` [PATCH] analyzer: implement symbolic value support for CPython plugin's refcnt checker [PR107646] Eric Feng 2023-09-07 17:28 ` David Malcolm 2023-09-11 2:12 ` Eric Feng 2023-09-11 19:00 ` David Malcolm
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).