[PATCH] analyzer: implement symbolic value support for CPython plugin's refcnt checker [PR107646]

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH] analyzer: implement symbolic value support for CPython plugin's refcnt checker [PR107646]
       [not found] <aa56c3d9c65032bf6c406dcdd4853afc4e721cf9.camel@redhat.com>
@ 2023-09-05  2:13 ` Eric Feng
  2023-09-07 17:28   ` David Malcolm
  0 siblings, 1 reply; 4+ messages in thread
From: Eric Feng @ 2023-09-05  2:13 UTC (permalink / raw)
  To: dmalcolm; +Cc: gcc, gcc-patches, Eric Feng

Hi Dave,

Recently I've been working on symbolic value support for the reference
count checker. I've attached a patch for it below; let me know it looks
OK for trunk. Thanks!

Best,
Eric

---

This patch enhances the reference count checker in the CPython plugin by
adding support for symbolic values. Whereas previously we were only able
to check the reference count of PyObject* objects created in the scope
of the function; we are now able to emit diagnostics on reference count
mismatch of objects that were, for example, passed in as a function
parameter.

rc6.c:6:10: warning: expected ‘obj’ to have reference count: N + ‘1’ but ob_refcnt field is N + ‘2’
    6 |   return obj;
      |          ^~~
  ‘create_py_object2’: event 1
    |
    |    6 |   return obj;
    |      |          ^~~
    |      |          |
    |      |          (1) here
    |


gcc/testsuite/ChangeLog:
	PR analyzer/107646
	* gcc.dg/plugin/analyzer_cpython_plugin.c: Support reference count checking
	of symbolic values.
	* gcc.dg/plugin/cpython-plugin-test-PyList_Append.c: New test.
	* gcc.dg/plugin/plugin.exp: New test.
	* gcc.dg/plugin/cpython-plugin-test-refcnt.c: New test.

Signed-off-by: Eric Feng <ef2648@columbia.edu>

---
 .../gcc.dg/plugin/analyzer_cpython_plugin.c   | 133 +++++++++++-------
 .../cpython-plugin-test-PyList_Append.c       |  21 ++-
 .../plugin/cpython-plugin-test-refcnt.c       |  18 +++
 gcc/testsuite/gcc.dg/plugin/plugin.exp        |   1 +
 4 files changed, 118 insertions(+), 55 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c

diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
index bf1982e79c3..d7ecd7fce09 100644
--- a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
+++ b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
@@ -314,17 +314,20 @@ public:
   {
     diagnostic_metadata m;
     bool warned;
-    // just assuming constants for now
-    auto actual_refcnt
-	= m_actual_refcnt->dyn_cast_constant_svalue ()->get_constant ();
-    auto ob_refcnt = m_ob_refcnt->dyn_cast_constant_svalue ()->get_constant ();
-    warned = warning_meta (rich_loc, m, get_controlling_option (),
-			   "expected %qE to have "
-			   "reference count: %qE but ob_refcnt field is: %qE",
-			   m_reg_tree, actual_refcnt, ob_refcnt);
-
-    // location_t loc = rich_loc->get_loc ();
-    // foo (loc);
+
+    const auto *actual_refcnt_constant
+	= m_actual_refcnt->dyn_cast_constant_svalue ();
+    const auto *ob_refcnt_constant = m_ob_refcnt->dyn_cast_constant_svalue ();
+    if (!actual_refcnt_constant || !ob_refcnt_constant)
+      return false;
+
+    auto actual_refcnt = actual_refcnt_constant->get_constant ();
+    auto ob_refcnt = ob_refcnt_constant->get_constant ();
+    warned = warning_meta (
+	rich_loc, m, get_controlling_option (),
+	"expected %qE to have "
+	"reference count: N + %qE but ob_refcnt field is N + %qE",
+	m_reg_tree, actual_refcnt, ob_refcnt);
     return warned;
   }
 
@@ -336,10 +339,6 @@ public:
 
 private:
 
-  void foo(location_t loc) const 
-  {
-    inform(loc, "something is up right here");
-  }
   const region *m_base_region;
   const svalue *m_ob_refcnt;
   const svalue *m_actual_refcnt;
@@ -369,6 +368,19 @@ increment_region_refcnt (hash_map<const region *, int> &map, const region *key)
   refcnt = existed ? refcnt + 1 : 1;
 }
 
+static const region *
+get_region_from_svalue (const svalue *sval, region_model_manager *mgr)
+{
+  const auto *region_sval = sval->dyn_cast_region_svalue ();
+  if (region_sval)
+    return region_sval->get_pointee ();
+
+  const auto *initial_sval = sval->dyn_cast_initial_svalue ();
+  if (initial_sval)
+    return mgr->get_symbolic_region (initial_sval);
+
+  return nullptr;
+}
 
 /* Recursively fills in region_to_refcnt with the references owned by
    pyobj_ptr_sval.  */
@@ -381,20 +393,9 @@ count_pyobj_references (const region_model *model,
   if (!pyobj_ptr_sval)
     return;
 
-  const auto *pyobj_region_sval = pyobj_ptr_sval->dyn_cast_region_svalue ();
-  const auto *pyobj_initial_sval = pyobj_ptr_sval->dyn_cast_initial_svalue ();
-  if (!pyobj_region_sval && !pyobj_initial_sval)
-    return;
-
-  // todo: support initial sval (e.g passed in as parameter)
-  if (pyobj_initial_sval)
-    {
-      //     increment_region_refcnt (region_to_refcnt,
-      // 		       pyobj_initial_sval->get_region ());
-      return;
-    }
+  region_model_manager *mgr = model->get_manager ();
 
-  const region *pyobj_region = pyobj_region_sval->get_pointee ();
+  const region *pyobj_region = get_region_from_svalue (pyobj_ptr_sval, mgr);
   if (!pyobj_region || seen.contains (pyobj_region))
     return;
 
@@ -409,49 +410,75 @@ count_pyobj_references (const region_model *model,
     return;
 
   const auto &retval_binding_map = retval_cluster->get_map ();
-
   for (const auto &binding : retval_binding_map)
     {
-      const svalue *binding_sval = binding.second;
-      const svalue *unwrapped_sval = binding_sval->unwrap_any_unmergeable ();
-      const region *pointee = unwrapped_sval->maybe_get_region ();
-
-      if (pointee && pointee->get_kind () == RK_HEAP_ALLOCATED)
+      const svalue *binding_sval = binding.second->unwrap_any_unmergeable ();
+      if (get_region_from_svalue (binding_sval, mgr))
 	count_pyobj_references (model, region_to_refcnt, binding_sval, seen);
     }
 }
 
+static void
+unwrap_any_ob_refcnt_sval (const svalue *&ob_refcnt_sval)
+{
+  if (ob_refcnt_sval->get_kind () != SK_CONSTANT)
+    {
+      auto unwrap_cast = ob_refcnt_sval->maybe_undo_cast ();
+      if (!unwrap_cast)
+	unwrap_cast = ob_refcnt_sval;
+
+      if (unwrap_cast->get_kind () == SK_BINOP)
+	ob_refcnt_sval = unwrap_cast->dyn_cast_binop_svalue ()->get_arg1 ();
+    }
+}
+
+static void
+handle_refcnt_mismatch (const region_model *old_model,
+			const ana::region *curr_region,
+			const svalue *ob_refcnt_sval,
+			const svalue *actual_refcnt_sval,
+			region_model_context *ctxt)
+{
+  region_model_manager *mgr = old_model->get_manager ();
+  const svalue *curr_reg_sval
+      = mgr->get_ptr_svalue (pyobj_ptr_tree, curr_region);
+  tree reg_tree = old_model->get_representative_tree (curr_reg_sval);
+
+  if (!reg_tree)
+    return;
+
+  const auto &eg = ctxt->get_eg ();
+  refcnt_stmt_finder finder (*eg, reg_tree);
+  auto pd = make_unique<refcnt_mismatch> (curr_region, ob_refcnt_sval,
+					  actual_refcnt_sval, reg_tree);
+  if (pd && eg)
+    ctxt->warn (std::move (pd), &finder);
+}
+
 /* Compare ob_refcnt field vs the actual reference count of a region */
 static void
 check_refcnt (const region_model *model,
 	      const region_model *old_model,
 	      region_model_context *ctxt,
 	      const hash_map<const ana::region *,
-			     int>::iterator::reference_pair region_refcnt)
+			     int>::iterator::reference_pair &region_refcnt)
 {
   region_model_manager *mgr = model->get_manager ();
   const auto &curr_region = region_refcnt.first;
   const auto &actual_refcnt = region_refcnt.second;
+
   const svalue *ob_refcnt_sval
       = retrieve_ob_refcnt_sval (curr_region, model, ctxt);
+  if (!ob_refcnt_sval)
+    return;
+
+  unwrap_any_ob_refcnt_sval (ob_refcnt_sval);
+
   const svalue *actual_refcnt_sval = mgr->get_or_create_int_cst (
       ob_refcnt_sval->get_type (), actual_refcnt);
-
   if (ob_refcnt_sval != actual_refcnt_sval)
-    {
-      const svalue *curr_reg_sval
-	  = mgr->get_ptr_svalue (pyobj_ptr_tree, curr_region);
-      tree reg_tree = old_model->get_representative_tree (curr_reg_sval);
-      if (!reg_tree)
-	return;
-
-      const auto &eg = ctxt->get_eg ();
-      refcnt_stmt_finder finder (*eg, reg_tree);
-      auto pd = make_unique<refcnt_mismatch> (curr_region, ob_refcnt_sval,
-					      actual_refcnt_sval, reg_tree);
-      if (pd && eg)
-	ctxt->warn (std::move (pd), &finder);
-    }
+    handle_refcnt_mismatch (old_model, curr_region, ob_refcnt_sval,
+			    actual_refcnt_sval, ctxt);
 }
 
 static void
@@ -493,8 +520,6 @@ count_all_references (const region_model *model,
   for (const auto &cluster : *model->get_store ())
     {
       auto curr_region = cluster.first;
-      if (curr_region->get_kind () != RK_HEAP_ALLOCATED)
-	continue;
 
       increment_region_refcnt (region_to_refcnt, curr_region);
 
@@ -505,8 +530,8 @@ count_all_references (const region_model *model,
 
 	  const svalue *unwrapped_sval
 	      = binding_sval->unwrap_any_unmergeable ();
-	  // if (unwrapped_sval->get_type () != pyobj_ptr_tree)
-	  // continue;
+	  if (unwrapped_sval->get_type () != pyobj_ptr_tree)
+	  continue;
 
 	  const region *pointee = unwrapped_sval->maybe_get_region ();
 	  if (!pointee || pointee->get_kind () != RK_HEAP_ALLOCATED)
diff --git a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c
index e1efd9efda5..46daf2f8975 100644
--- a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c
+++ b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c
@@ -75,4 +75,23 @@ test_PyListAppend_6 ()
   PyObject *list = NULL;
   PyList_Append(list, item);
   return list;
-}
\ No newline at end of file
+}
+
+PyObject *
+test_PyListAppend_7 (PyObject *item)
+{
+  PyObject *list = PyList_New (0);
+  Py_INCREF(item);
+  PyList_Append(list, item);
+  return list;
+  /* { dg-warning "expected 'item' to have reference count" "" { target *-*-* } .-1 } */
+}
+
+PyObject *
+test_PyListAppend_8 (PyObject *item, PyObject *list)
+{
+  Py_INCREF(item);
+  Py_INCREF(item);
+  PyList_Append(list, item);
+  return list;
+}
diff --git a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c
new file mode 100644
index 00000000000..a7f39509d6d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target analyzer } */
+/* { dg-options "-fanalyzer" } */
+/* { dg-require-python-h "" } */
+
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include "../analyzer/analyzer-decls.h"
+
+PyObject *
+test_refcnt_1 (PyObject *obj)
+{
+  Py_INCREF(obj);
+  Py_INCREF(obj);
+  return obj;
+  /* { dg-warning "expected 'obj' to have reference count" "" { target *-*-* } .-1 } */
+}
diff --git a/gcc/testsuite/gcc.dg/plugin/plugin.exp b/gcc/testsuite/gcc.dg/plugin/plugin.exp
index ed72912309c..87862b4ca00 100644
--- a/gcc/testsuite/gcc.dg/plugin/plugin.exp
+++ b/gcc/testsuite/gcc.dg/plugin/plugin.exp
@@ -162,6 +162,7 @@ set plugin_test_list [list \
 	  taint-antipatterns-1.c } \
     { analyzer_cpython_plugin.c \
 	  cpython-plugin-test-no-Python-h.c \
+	  cpython-plugin-test-refcnt.c \
 	  cpython-plugin-test-PyList_Append.c \
 	  cpython-plugin-test-PyList_New.c \
 	  cpython-plugin-test-PyLong_FromLong.c } \
-- 
2.30.2


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] analyzer: implement symbolic value support for CPython plugin's refcnt checker [PR107646]
  2023-09-05  2:13 ` [PATCH] analyzer: implement symbolic value support for CPython plugin's refcnt checker [PR107646] Eric Feng
@ 2023-09-07 17:28   ` David Malcolm
  2023-09-11  2:12     ` Eric Feng
  0 siblings, 1 reply; 4+ messages in thread
From: David Malcolm @ 2023-09-07 17:28 UTC (permalink / raw)
  To: Eric Feng; +Cc: gcc, gcc-patches

On Mon, 2023-09-04 at 22:13 -0400, Eric Feng wrote:

> Hi Dave,

Hi Eric, thanks for the patch.

> 
> Recently I've been working on symbolic value support for the reference
> count checker. I've attached a patch for it below; let me know it looks
> OK for trunk. Thanks!
> 
> Best,
> Eric
> 
> ---
> 
> This patch enhances the reference count checker in the CPython plugin by
> adding support for symbolic values. Whereas previously we were only able
> to check the reference count of PyObject* objects created in the scope
> of the function; we are now able to emit diagnostics on reference count
> mismatch of objects that were, for example, passed in as a function
> parameter.
> 
> rc6.c:6:10: warning: expected ‘obj’ to have reference count: N + ‘1’ but ob_refcnt field is N + ‘2’
>     6 |   return obj;
>       |          ^~~

[...snip...]

>  create mode 100644 gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c
> 
> diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> index bf1982e79c3..d7ecd7fce09 100644
> --- a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> +++ b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> @@ -314,17 +314,20 @@ public:
>    {
>      diagnostic_metadata m;
>      bool warned;
> -    // just assuming constants for now
> -    auto actual_refcnt
> -	= m_actual_refcnt->dyn_cast_constant_svalue ()->get_constant ();
> -    auto ob_refcnt = m_ob_refcnt->dyn_cast_constant_svalue ()->get_constant ();
> -    warned = warning_meta (rich_loc, m, get_controlling_option (),
> -			   "expected %qE to have "
> -			   "reference count: %qE but ob_refcnt field is: %qE",
> -			   m_reg_tree, actual_refcnt, ob_refcnt);
> -
> -    // location_t loc = rich_loc->get_loc ();
> -    // foo (loc);
> +
> +    const auto *actual_refcnt_constant
> +	= m_actual_refcnt->dyn_cast_constant_svalue ();
> +    const auto *ob_refcnt_constant = m_ob_refcnt->dyn_cast_constant_svalue ();
> +    if (!actual_refcnt_constant || !ob_refcnt_constant)
> +      return false;
> +
> +    auto actual_refcnt = actual_refcnt_constant->get_constant ();
> +    auto ob_refcnt = ob_refcnt_constant->get_constant ();
> +    warned = warning_meta (
> +	rich_loc, m, get_controlling_option (),
> +	"expected %qE to have "
> +	"reference count: N + %qE but ob_refcnt field is N + %qE",
> +	m_reg_tree, actual_refcnt, ob_refcnt);
>      return warned;

I know you're emulating the old behavior I implemented way back in
cpychecker, but I don't like that behavior :(

Specifically, although the patch improves the behavior for symbolic
values, it regresses the precision of wording for the concrete values
case.  If we have e.g. a concrete ob_refcnt of 2, whereas we only have
1 pointer, then it's more readable to say:

  warning: expected ‘obj’ to have reference count: ‘1’ but ob_refcnt
field is ‘2’

than:

  warning: expected ‘obj’ to have reference count: N + ‘1’ but ob_refcnt field is N + ‘2’

...and we shouldn't quote concrete numbers, the message should be:

  warning: expected ‘obj’ to have reference count of 1 but ob_refcnt field is 2

or better:

  warning: ‘*obj’ is pointed to by 1 pointer but 'ob_refcnt' field is 2


Can you move the unwrapping of the svalue from the tests below into the
emit vfunc?  That way the m_actual_refcnt doesn't have to be a
constant_svalue; you could have logic in the emit vfunc to print
readable messages based on what kind of svalue it is.

Rather than 'N', it might be better to say 'initial'; how about:

  warning: ‘*obj’ is pointed to by 0 additional pointers but 'ob_refcnt' field has increased by 1
  warning: ‘*obj’ is pointed to by 1 additional pointer but 'ob_refcnt' field has increased by 2
  warning: ‘*obj’ is pointed to by 1 additional pointer but 'ob_refcnt' field is unchanged
  warning: ‘*obj’ is pointed to by 2 additional pointers but 'ob_refcnt' field has decreased by 1
  warning: ‘*obj’ is pointed to by 1 fewer pointers but 'ob_refcnt' field is unchanged

and similar?

Maybe have a flag that tracks whether we're talking about a concrete
value that's absolute versus a concrete value that's relative to the
initial value?


[...snip...]


> @@ -369,6 +368,19 @@ increment_region_refcnt (hash_map<const region *, int> &map, const region *key)
>    refcnt = existed ? refcnt + 1 : 1;
>  }
>  
> +static const region *
> +get_region_from_svalue (const svalue *sval, region_model_manager *mgr)
> +{
> +  const auto *region_sval = sval->dyn_cast_region_svalue ();
> +  if (region_sval)
> +    return region_sval->get_pointee ();
> +
> +  const auto *initial_sval = sval->dyn_cast_initial_svalue ();
> +  if (initial_sval)
> +    return mgr->get_symbolic_region (initial_sval);
> +
> +  return nullptr;
> +}

This is dereferencing a pointer, right?

Can the caller use region_model::deref_rvalue instead?


[...snip...]

> +static void
> +unwrap_any_ob_refcnt_sval (const svalue *&ob_refcnt_sval)
> +{
> +  if (ob_refcnt_sval->get_kind () != SK_CONSTANT)
> +    {
> +      auto unwrap_cast = ob_refcnt_sval->maybe_undo_cast ();
> +      if (!unwrap_cast)
> +	unwrap_cast = ob_refcnt_sval;
> +
> +      if (unwrap_cast->get_kind () == SK_BINOP)
> +	ob_refcnt_sval = unwrap_cast->dyn_cast_binop_svalue ()->get_arg1 ();

This would be better spelled as:

         if (const binop_svalue *binop_sval = unwrap_cast->dyn_cast_binop_svalue ())
	    ob_refcnt_sval = binop_sval->get_arg1 ();

[...snip...]

>  /* Compare ob_refcnt field vs the actual reference count of a region */
>  static void
>  check_refcnt (const region_model *model,
>  	      const region_model *old_model,
>  	      region_model_context *ctxt,
>  	      const hash_map<const ana::region *,
> -			     int>::iterator::reference_pair region_refcnt)
> +			     int>::iterator::reference_pair &region_refcnt)

Could really use a typedef for
  const hash_map<const ana::region *, int>
to simplify this code.

>  {
>    region_model_manager *mgr = model->get_manager ();
>    const auto &curr_region = region_refcnt.first;
>    const auto &actual_refcnt = region_refcnt.second;
> +
>    const svalue *ob_refcnt_sval
>        = retrieve_ob_refcnt_sval (curr_region, model, ctxt);
> +  if (!ob_refcnt_sval)
> +    return;
> +
> +  unwrap_any_ob_refcnt_sval (ob_refcnt_sval);

As noted above, can the diagnostic store the pre-unwrapped
ob_refcnt_sval?  Might mean you have to do the unwrapping both here,
and later when displaying the diagnostic.  Or (probably best) track
both the original and unwrapped ob_refcnt_sval, and store both in the
pending_diagnostic.

> +
>    const svalue *actual_refcnt_sval = mgr->get_or_create_int_cst (
>        ob_refcnt_sval->get_type (), actual_refcnt);
> -
>    if (ob_refcnt_sval != actual_refcnt_sval)
> -    {
> -      const svalue *curr_reg_sval
> -	  = mgr->get_ptr_svalue (pyobj_ptr_tree, curr_region);
> -      tree reg_tree = old_model->get_representative_tree (curr_reg_sval);
> -      if (!reg_tree)
> -	return;
> -
> -      const auto &eg = ctxt->get_eg ();
> -      refcnt_stmt_finder finder (*eg, reg_tree);
> -      auto pd = make_unique<refcnt_mismatch> (curr_region, ob_refcnt_sval,
> -					      actual_refcnt_sval, reg_tree);
> -      if (pd && eg)
> -	ctxt->warn (std::move (pd), &finder);
> -    }
> +    handle_refcnt_mismatch (old_model, curr_region, ob_refcnt_sval,
> +			    actual_refcnt_sval, ctxt);
>  }
>  
>  static void
> @@ -493,8 +520,6 @@ count_all_references (const region_model *model,
>    for (const auto &cluster : *model->get_store ())
>      {
>        auto curr_region = cluster.first;
> -      if (curr_region->get_kind () != RK_HEAP_ALLOCATED)
> -	continue;
>  
>        increment_region_refcnt (region_to_refcnt, curr_region);
>  
> @@ -505,8 +530,8 @@ count_all_references (const region_model *model,
>  
>  	  const svalue *unwrapped_sval
>  	      = binding_sval->unwrap_any_unmergeable ();
> -	  // if (unwrapped_sval->get_type () != pyobj_ptr_tree)
> -	  // continue;
> +	  if (unwrapped_sval->get_type () != pyobj_ptr_tree)
> +	  continue;

We'll probably want a smarter test for this, that the type "inherits"
C-style from PyObject (e.g. PyLongObject).


>  
>  	  const region *pointee = unwrapped_sval->maybe_get_region ();
>  	  if (!pointee || pointee->get_kind () != RK_HEAP_ALLOCATED)
> diff --git a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c
> index e1efd9efda5..46daf2f8975 100644
> --- a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c
> +++ b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c
> @@ -75,4 +75,23 @@ test_PyListAppend_6 ()
>    PyObject *list = NULL;
>    PyList_Append(list, item);
>    return list;
> -}
> \ No newline at end of file
> +}
> +
> +PyObject *
> +test_PyListAppend_7 (PyObject *item)
> +{
> +  PyObject *list = PyList_New (0);
> +  Py_INCREF(item);
> +  PyList_Append(list, item);
> +  return list;
> +  /* { dg-warning "expected 'item' to have reference count" "" { target *-*-* } .-1 } */

It would be good if these dg-warning directives regexp contained the
actual and expected counts; I find I can't easily tell what the
intended output is meant to be.


> +}
> +
> +PyObject *
> +test_PyListAppend_8 (PyObject *item, PyObject *list)
> +{
> +  Py_INCREF(item);
> +  Py_INCREF(item);
> +  PyList_Append(list, item);
> +  return list;
> +}

Should we complain here about item->ob_refcnt being too high?

> diff --git a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c
> new file mode 100644
> index 00000000000..a7f39509d6d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target analyzer } */
> +/* { dg-options "-fanalyzer" } */
> +/* { dg-require-python-h "" } */
> +
> +
> +#define PY_SSIZE_T_CLEAN
> +#include <Python.h>
> +#include "../analyzer/analyzer-decls.h"
> +
> +PyObject *
> +test_refcnt_1 (PyObject *obj)
> +{
> +  Py_INCREF(obj);
> +  Py_INCREF(obj);
> +  return obj;
> +  /* { dg-warning "expected 'obj' to have reference count" "" { target *-*-* } .-1 } */

Likewise, it would be better for the dg-warning directive's expressed
the expected "actual vs expected" values.

[...snip...]

Thanks again for the patch, hope this is constructive

Dave


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] analyzer: implement symbolic value support for CPython plugin's refcnt checker [PR107646]
  2023-09-07 17:28   ` David Malcolm
@ 2023-09-11  2:12     ` Eric Feng
  2023-09-11 19:00       ` David Malcolm
  0 siblings, 1 reply; 4+ messages in thread
From: Eric Feng @ 2023-09-11  2:12 UTC (permalink / raw)
  To: David Malcolm; +Cc: gcc, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 12148 bytes --]

On Thu, Sep 7, 2023 at 1:28 PM David Malcolm <dmalcolm@redhat.com> wrote:

> On Mon, 2023-09-04 at 22:13 -0400, Eric Feng wrote:
>
> > Hi Dave,
>
> Hi Eric, thanks for the patch.
>
> >
> > Recently I've been working on symbolic value support for the reference
> > count checker. I've attached a patch for it below; let me know it looks
> > OK for trunk. Thanks!
> >
> > Best,
> > Eric
> >
> > ---
> >
> > This patch enhances the reference count checker in the CPython plugin by
> > adding support for symbolic values. Whereas previously we were only able
> > to check the reference count of PyObject* objects created in the scope
> > of the function; we are now able to emit diagnostics on reference count
> > mismatch of objects that were, for example, passed in as a function
> > parameter.
> >
> > rc6.c:6:10: warning: expected ‘obj’ to have reference count: N + ‘1’ but
> ob_refcnt field is N + ‘2’
> >     6 |   return obj;
> >       |          ^~~
>
> [...snip...]
>
> >  create mode 100644
> gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c
> >
> > diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> > index bf1982e79c3..d7ecd7fce09 100644
> > --- a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> > +++ b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c
> > @@ -314,17 +314,20 @@ public:
> >    {
> >      diagnostic_metadata m;
> >      bool warned;
> > -    // just assuming constants for now
> > -    auto actual_refcnt
> > -     = m_actual_refcnt->dyn_cast_constant_svalue ()->get_constant ();
> > -    auto ob_refcnt = m_ob_refcnt->dyn_cast_constant_svalue
> ()->get_constant ();
> > -    warned = warning_meta (rich_loc, m, get_controlling_option (),
> > -                        "expected %qE to have "
> > -                        "reference count: %qE but ob_refcnt field is:
> %qE",
> > -                        m_reg_tree, actual_refcnt, ob_refcnt);
> > -
> > -    // location_t loc = rich_loc->get_loc ();
> > -    // foo (loc);
> > +
> > +    const auto *actual_refcnt_constant
> > +     = m_actual_refcnt->dyn_cast_constant_svalue ();
> > +    const auto *ob_refcnt_constant =
> m_ob_refcnt->dyn_cast_constant_svalue ();
> > +    if (!actual_refcnt_constant || !ob_refcnt_constant)
> > +      return false;
> > +
> > +    auto actual_refcnt = actual_refcnt_constant->get_constant ();
> > +    auto ob_refcnt = ob_refcnt_constant->get_constant ();
> > +    warned = warning_meta (
> > +     rich_loc, m, get_controlling_option (),
> > +     "expected %qE to have "
> > +     "reference count: N + %qE but ob_refcnt field is N + %qE",
> > +     m_reg_tree, actual_refcnt, ob_refcnt);
> >      return warned;
>
> I know you're emulating the old behavior I implemented way back in
> cpychecker, but I don't like that behavior :(
>
> Specifically, although the patch improves the behavior for symbolic
> values, it regresses the precision of wording for the concrete values
> case.  If we have e.g. a concrete ob_refcnt of 2, whereas we only have
> 1 pointer, then it's more readable to say:
>
>   warning: expected ‘obj’ to have reference count: ‘1’ but ob_refcnt
> field is ‘2’
>
> than:
>
>   warning: expected ‘obj’ to have reference count: N + ‘1’ but ob_refcnt
> field is N + ‘2’
>
> ...and we shouldn't quote concrete numbers, the message should be:
>
>   warning: expected ‘obj’ to have reference count of 1 but ob_refcnt field
> is 2


> or better:
>
>   warning: ‘*obj’ is pointed to by 1 pointer but 'ob_refcnt' field is 2
>
>
> Can you move the unwrapping of the svalue from the tests below into the
> emit vfunc?  That way the m_actual_refcnt doesn't have to be a
> constant_svalue; you could have logic in the emit vfunc to print
> readable messages based on what kind of svalue it is.
>
> Rather than 'N', it might be better to say 'initial'; how about:
>
>   warning: ‘*obj’ is pointed to by 0 additional pointers but 'ob_refcnt'
> field has increased by 1
>   warning: ‘*obj’ is pointed to by 1 additional pointer but 'ob_refcnt'
> field has increased by 2
>   warning: ‘*obj’ is pointed to by 1 additional pointer but 'ob_refcnt'
> field is unchanged
>   warning: ‘*obj’ is pointed to by 2 additional pointers but 'ob_refcnt'
> field has decreased by 1
>   warning: ‘*obj’ is pointed to by 1 fewer pointers but 'ob_refcnt' field
> is unchanged
>
> and similar?
>

That makes sense to me as well (indeed I was just emulating the old
behavior)! Will experiment and keep you posted on a revised patch with this
in mind.  This is somewhat of a minor detail but can we emit ‘*obj’ as
bolded text in the diagnostic message? Currently, I can emit this
(including the asterisk) like so: '*%E'. But unlike using %qE, it doesn't
bold the body of the single quotations. Is this possible?

>
> Maybe have a flag that tracks whether we're talking about a concrete
> value that's absolute versus a concrete value that's relative to the
> initial value?
>
>
> [...snip...]
>
>
> > @@ -369,6 +368,19 @@ increment_region_refcnt (hash_map<const region *,
> int> &map, const region *key)
> >    refcnt = existed ? refcnt + 1 : 1;
> >  }
> >
> > +static const region *
> > +get_region_from_svalue (const svalue *sval, region_model_manager *mgr)
> > +{
> > +  const auto *region_sval = sval->dyn_cast_region_svalue ();
> > +  if (region_sval)
> > +    return region_sval->get_pointee ();
> > +
> > +  const auto *initial_sval = sval->dyn_cast_initial_svalue ();
> > +  if (initial_sval)
> > +    return mgr->get_symbolic_region (initial_sval);
> > +
> > +  return nullptr;
> > +}
>
> This is dereferencing a pointer, right?
>
> Can the caller use region_model::deref_rvalue instead?
>
>
> [...snip...]
>
> > +static void
> > +unwrap_any_ob_refcnt_sval (const svalue *&ob_refcnt_sval)
> > +{
> > +  if (ob_refcnt_sval->get_kind () != SK_CONSTANT)
> > +    {
> > +      auto unwrap_cast = ob_refcnt_sval->maybe_undo_cast ();
> > +      if (!unwrap_cast)
> > +     unwrap_cast = ob_refcnt_sval;
> > +
> > +      if (unwrap_cast->get_kind () == SK_BINOP)
> > +     ob_refcnt_sval = unwrap_cast->dyn_cast_binop_svalue ()->get_arg1
> ();
>
> This would be better spelled as:
>
>          if (const binop_svalue *binop_sval =
> unwrap_cast->dyn_cast_binop_svalue ())
>             ob_refcnt_sval = binop_sval->get_arg1 ();
>
> [...snip...]
>
> >  /* Compare ob_refcnt field vs the actual reference count of a region */
> >  static void
> >  check_refcnt (const region_model *model,
> >             const region_model *old_model,
> >             region_model_context *ctxt,
> >             const hash_map<const ana::region *,
> > -                          int>::iterator::reference_pair region_refcnt)
> > +                          int>::iterator::reference_pair &region_refcnt)
>
> Could really use a typedef for
>   const hash_map<const ana::region *, int>
> to simplify this code.
>
> >  {
> >    region_model_manager *mgr = model->get_manager ();
> >    const auto &curr_region = region_refcnt.first;
> >    const auto &actual_refcnt = region_refcnt.second;
> > +
> >    const svalue *ob_refcnt_sval
> >        = retrieve_ob_refcnt_sval (curr_region, model, ctxt);
> > +  if (!ob_refcnt_sval)
> > +    return;
> > +
> > +  unwrap_any_ob_refcnt_sval (ob_refcnt_sval);
>
> As noted above, can the diagnostic store the pre-unwrapped
> ob_refcnt_sval?  Might mean you have to do the unwrapping both here,
> and later when displaying the diagnostic.  Or (probably best) track
> both the original and unwrapped ob_refcnt_sval, and store both in the
> pending_diagnostic.
>
> > +
> >    const svalue *actual_refcnt_sval = mgr->get_or_create_int_cst (
> >        ob_refcnt_sval->get_type (), actual_refcnt);
> > -
> >    if (ob_refcnt_sval != actual_refcnt_sval)
> > -    {
> > -      const svalue *curr_reg_sval
> > -       = mgr->get_ptr_svalue (pyobj_ptr_tree, curr_region);
> > -      tree reg_tree = old_model->get_representative_tree
> (curr_reg_sval);
> > -      if (!reg_tree)
> > -     return;
> > -
> > -      const auto &eg = ctxt->get_eg ();
> > -      refcnt_stmt_finder finder (*eg, reg_tree);
> > -      auto pd = make_unique<refcnt_mismatch> (curr_region,
> ob_refcnt_sval,
> > -                                           actual_refcnt_sval,
> reg_tree);
> > -      if (pd && eg)
> > -     ctxt->warn (std::move (pd), &finder);
> > -    }
> > +    handle_refcnt_mismatch (old_model, curr_region, ob_refcnt_sval,
> > +                         actual_refcnt_sval, ctxt);
> >  }
> >
> >  static void
> > @@ -493,8 +520,6 @@ count_all_references (const region_model *model,
> >    for (const auto &cluster : *model->get_store ())
> >      {
> >        auto curr_region = cluster.first;
> > -      if (curr_region->get_kind () != RK_HEAP_ALLOCATED)
> > -     continue;
> >
> >        increment_region_refcnt (region_to_refcnt, curr_region);
> >
> > @@ -505,8 +530,8 @@ count_all_references (const region_model *model,
> >
> >         const svalue *unwrapped_sval
> >             = binding_sval->unwrap_any_unmergeable ();
> > -       // if (unwrapped_sval->get_type () != pyobj_ptr_tree)
> > -       // continue;
> > +       if (unwrapped_sval->get_type () != pyobj_ptr_tree)
> > +       continue;
>
> We'll probably want a smarter test for this, that the type "inherits"
> C-style from PyObject (e.g. PyLongObject).
>
>
> >
> >         const region *pointee = unwrapped_sval->maybe_get_region ();
> >         if (!pointee || pointee->get_kind () != RK_HEAP_ALLOCATED)
> > diff --git
> a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c
> b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c
> > index e1efd9efda5..46daf2f8975 100644
> > --- a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c
> > +++ b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-PyList_Append.c
> > @@ -75,4 +75,23 @@ test_PyListAppend_6 ()
> >    PyObject *list = NULL;
> >    PyList_Append(list, item);
> >    return list;
> > -}
> > \ No newline at end of file
> > +}
> > +
> > +PyObject *
> > +test_PyListAppend_7 (PyObject *item)
> > +{
> > +  PyObject *list = PyList_New (0);
> > +  Py_INCREF(item);
> > +  PyList_Append(list, item);
> > +  return list;
> > +  /* { dg-warning "expected 'item' to have reference count" "" { target
> *-*-* } .-1 } */
>
> It would be good if these dg-warning directives regexp contained the
> actual and expected counts; I find I can't easily tell what the
> intended output is meant to be.
>
>
> > +}
> > +
> > +PyObject *
> > +test_PyListAppend_8 (PyObject *item, PyObject *list)
> > +{
> > +  Py_INCREF(item);
> > +  Py_INCREF(item);
> > +  PyList_Append(list, item);
> > +  return list;
> > +}
>
> Should we complain here about item->ob_refcnt being too high?
>
> > diff --git a/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c
> b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c
> > new file mode 100644
> > index 00000000000..a7f39509d6d
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/plugin/cpython-plugin-test-refcnt.c
> > @@ -0,0 +1,18 @@
> > +/* { dg-do compile } */
> > +/* { dg-require-effective-target analyzer } */
> > +/* { dg-options "-fanalyzer" } */
> > +/* { dg-require-python-h "" } */
> > +
> > +
> > +#define PY_SSIZE_T_CLEAN
> > +#include <Python.h>
> > +#include "../analyzer/analyzer-decls.h"
> > +
> > +PyObject *
> > +test_refcnt_1 (PyObject *obj)
> > +{
> > +  Py_INCREF(obj);
> > +  Py_INCREF(obj);
> > +  return obj;
> > +  /* { dg-warning "expected 'obj' to have reference count" "" { target
> *-*-* } .-1 } */
>
> Likewise, it would be better for the dg-warning directive's expressed
> the expected "actual vs expected" values.
>
> [...snip...]
>
> Thanks again for the patch, hope this is constructive
>
> Dave
>
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] analyzer: implement symbolic value support for CPython plugin's refcnt checker [PR107646]
  2023-09-11  2:12     ` Eric Feng
@ 2023-09-11 19:00       ` David Malcolm
  0 siblings, 0 replies; 4+ messages in thread
From: David Malcolm @ 2023-09-11 19:00 UTC (permalink / raw)
  To: Eric Feng; +Cc: gcc, gcc-patches

On Sun, 2023-09-10 at 22:12 -0400, Eric Feng wrote:
> On Thu, Sep 7, 2023 at 1:28 PM David Malcolm <dmalcolm@redhat.com>
> wrote:
> 
> > On Mon, 2023-09-04 at 22:13 -0400, Eric Feng wrote:
> > 

[...snip...]

> > 
> > 
> > I know you're emulating the old behavior I implemented way back in
> > cpychecker, but I don't like that behavior :(
> > 
> > Specifically, although the patch improves the behavior for symbolic
> > values, it regresses the precision of wording for the concrete
> > values
> > case.  If we have e.g. a concrete ob_refcnt of 2, whereas we only
> > have
> > 1 pointer, then it's more readable to say:
> > 
> >   warning: expected ‘obj’ to have reference count: ‘1’ but
> > ob_refcnt
> > field is ‘2’
> > 
> > than:
> > 
> >   warning: expected ‘obj’ to have reference count: N + ‘1’ but
> > ob_refcnt
> > field is N + ‘2’
> > 
> > ...and we shouldn't quote concrete numbers, the message should be:
> > 
> >   warning: expected ‘obj’ to have reference count of 1 but
> > ob_refcnt field
> > is 2
> 
> 
> > or better:
> > 
> >   warning: ‘*obj’ is pointed to by 1 pointer but 'ob_refcnt' field
> > is 2
> > 
> > 
> > Can you move the unwrapping of the svalue from the tests below into
> > the
> > emit vfunc?  That way the m_actual_refcnt doesn't have to be a
> > constant_svalue; you could have logic in the emit vfunc to print
> > readable messages based on what kind of svalue it is.
> > 
> > Rather than 'N', it might be better to say 'initial'; how about:
> > 
> >   warning: ‘*obj’ is pointed to by 0 additional pointers but
> > 'ob_refcnt'
> > field has increased by 1
> >   warning: ‘*obj’ is pointed to by 1 additional pointer but
> > 'ob_refcnt'
> > field has increased by 2
> >   warning: ‘*obj’ is pointed to by 1 additional pointer but
> > 'ob_refcnt'
> > field is unchanged
> >   warning: ‘*obj’ is pointed to by 2 additional pointers but
> > 'ob_refcnt'
> > field has decreased by 1
> >   warning: ‘*obj’ is pointed to by 1 fewer pointers but 'ob_refcnt'
> > field
> > is unchanged
> > 
> > and similar?
> > 
> 
> That makes sense to me as well (indeed I was just emulating the old
> behavior)! Will experiment and keep you posted on a revised patch
> with this
> in mind.  This is somewhat of a minor detail but can we emit ‘*obj’
> as
> bolded text in the diagnostic message? Currently, I can emit this
> (including the asterisk) like so: '*%E'. But unlike using %qE, it
> doesn't
> bold the body of the single quotations. Is this possible?

Yes.

You could use %< and %> to get the colorized (and localized) quotes
(see pretty-print.cc), but better would probably be to pass a tree for
the *obj, rather than obj.  You can make this by building a MEM_REF
tree node wrapping the pointer (you can see an example of this in the
RK_SYMBOLIC case of region_model::get_representative_path_var_1).

Dave


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-09-11 19:00 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <aa56c3d9c65032bf6c406dcdd4853afc4e721cf9.camel@redhat.com>
2023-09-05  2:13 ` [PATCH] analyzer: implement symbolic value support for CPython plugin's refcnt checker [PR107646] Eric Feng
2023-09-07 17:28   ` David Malcolm
2023-09-11  2:12     ` Eric Feng
2023-09-11 19:00       ` David Malcolm

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).