From: David Malcolm <dmalcolm@redhat.com>
To: gcc-patches@gcc.gnu.org
Cc: David Malcolm <dmalcolm@redhat.com>
Subject: [PATCH 3/9] analyzer: reimplement kf_strcpy [PR105899]
Date: Thu, 24 Aug 2023 10:38:57 -0400 [thread overview]
Message-ID: <20230824143903.3161185-4-dmalcolm@redhat.com> (raw)
In-Reply-To: <20230824143903.3161185-1-dmalcolm@redhat.com>
This patch reimplements the analyzer's implementation of strcpy using
the region_model::scan_for_null_terminator infrastructure, so that e.g.
it can complain about out-of-bounds reads/writes, unterminated strings,
etc.
gcc/analyzer/ChangeLog:
PR analyzer/105899
* kf.cc (kf_strcpy::impl_call_pre): Reimplement using
check_for_null_terminated_string_arg.
* region-model.cc (region_model::get_store_bytes): Shortcut
reading all of a string_region.
(region_model::scan_for_null_terminator): Use get_store_value for
the bytes rather than "unknown" when returning an unknown length.
(region_model::write_bytes): New.
* region-model.h (region_model::write_bytes): New decl.
gcc/testsuite/ChangeLog:
PR analyzer/105899
* gcc.dg/analyzer/out-of-bounds-diagram-16.c: New test.
* gcc.dg/analyzer/strcpy-1.c: Add test coverage.
* gcc.dg/analyzer/strcpy-3.c: Likewise.
* gcc.dg/analyzer/strcpy-4.c: New test.
---
gcc/analyzer/kf.cc | 32 +++++-------
gcc/analyzer/region-model.cc | 32 ++++++++++--
gcc/analyzer/region-model.h | 4 ++
.../analyzer/out-of-bounds-diagram-16.c | 31 +++++++++++
gcc/testsuite/gcc.dg/analyzer/strcpy-1.c | 22 ++++++++
gcc/testsuite/gcc.dg/analyzer/strcpy-3.c | 1 +
gcc/testsuite/gcc.dg/analyzer/strcpy-4.c | 51 +++++++++++++++++++
7 files changed, 150 insertions(+), 23 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-16.c
create mode 100644 gcc/testsuite/gcc.dg/analyzer/strcpy-4.c
diff --git a/gcc/analyzer/kf.cc b/gcc/analyzer/kf.cc
index 59f46bab581c..6b33cd159dac 100644
--- a/gcc/analyzer/kf.cc
+++ b/gcc/analyzer/kf.cc
@@ -1135,29 +1135,25 @@ void
kf_strcpy::impl_call_pre (const call_details &cd) const
{
region_model *model = cd.get_model ();
- region_model_manager *mgr = cd.get_manager ();
+ region_model_context *ctxt = cd.get_ctxt ();
const svalue *dest_sval = cd.get_arg_svalue (0);
const region *dest_reg = model->deref_rvalue (dest_sval, cd.get_arg_tree (0),
- cd.get_ctxt ());
- const svalue *src_sval = cd.get_arg_svalue (1);
- const region *src_reg = model->deref_rvalue (src_sval, cd.get_arg_tree (1),
- cd.get_ctxt ());
- const svalue *src_contents_sval = model->get_store_value (src_reg,
- cd.get_ctxt ());
- cd.check_for_null_terminated_string_arg (1);
-
+ ctxt);
+ /* strcpy returns the initial param. */
cd.maybe_set_lhs (dest_sval);
- /* Try to get the string size if SRC_REG is a string_region. */
- const svalue *copied_bytes_sval = model->get_string_size (src_reg);
- /* Otherwise, check if the contents of SRC_REG is a string. */
- if (copied_bytes_sval->get_kind () == SK_UNKNOWN)
- copied_bytes_sval = model->get_string_size (src_contents_sval);
-
- const region *sized_dest_reg
- = mgr->get_sized_region (dest_reg, NULL_TREE, copied_bytes_sval);
- model->set_value (sized_dest_reg, src_contents_sval, cd.get_ctxt ());
+ const svalue *bytes_to_copy;
+ if (const svalue *num_bytes_read_sval
+ = cd.check_for_null_terminated_string_arg (1, &bytes_to_copy))
+ {
+ model->write_bytes (dest_reg, num_bytes_read_sval, bytes_to_copy, ctxt);
+ }
+ else
+ {
+ if (cd.get_ctxt ())
+ cd.get_ctxt ()->terminate_path ();
+ }
}
/* Handler for "strdup" and "__builtin_strdup". */
diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index 7a2f81f36e0f..cc8d895d9665 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -3460,6 +3460,13 @@ region_model::get_store_bytes (const region *base_reg,
const byte_range &bytes,
region_model_context *ctxt) const
{
+ /* Shortcut reading all of a string_region. */
+ if (bytes.get_start_byte_offset () == 0)
+ if (const string_region *string_reg = base_reg->dyn_cast_string_region ())
+ if (bytes.m_size_in_bytes
+ == TREE_STRING_LENGTH (string_reg->get_string_cst ()))
+ return m_mgr->get_or_create_initial_value (base_reg);
+
const svalue *index_sval
= m_mgr->get_or_create_int_cst (size_type_node,
bytes.get_start_byte_offset ());
@@ -3533,14 +3540,14 @@ region_model::scan_for_null_terminator (const region *reg,
if (offset.symbolic_p ())
{
if (out_sval)
- *out_sval = m_mgr->get_or_create_unknown_svalue (NULL_TREE);
+ *out_sval = get_store_value (reg, nullptr);
return m_mgr->get_or_create_unknown_svalue (size_type_node);
}
byte_offset_t src_byte_offset;
if (!offset.get_concrete_byte_offset (&src_byte_offset))
{
if (out_sval)
- *out_sval = m_mgr->get_or_create_unknown_svalue (NULL_TREE);
+ *out_sval = get_store_value (reg, nullptr);
return m_mgr->get_or_create_unknown_svalue (size_type_node);
}
const byte_offset_t initial_src_byte_offset = src_byte_offset;
@@ -3582,7 +3589,7 @@ region_model::scan_for_null_terminator (const region *reg,
if (is_terminated.is_unknown ())
{
if (out_sval)
- *out_sval = m_mgr->get_or_create_unknown_svalue (NULL_TREE);
+ *out_sval = get_store_value (reg, nullptr);
return m_mgr->get_or_create_unknown_svalue (size_type_node);
}
@@ -3621,7 +3628,7 @@ region_model::scan_for_null_terminator (const region *reg,
if (c.has_symbolic_bindings_p ())
{
if (out_sval)
- *out_sval = m_mgr->get_or_create_unknown_svalue (NULL_TREE);
+ *out_sval = get_store_value (reg, nullptr);
return m_mgr->get_or_create_unknown_svalue (size_type_node);
}
@@ -3638,7 +3645,7 @@ region_model::scan_for_null_terminator (const region *reg,
if (base_reg->can_have_initial_svalue_p ())
{
if (out_sval)
- *out_sval = m_mgr->get_or_create_unknown_svalue (NULL_TREE);
+ *out_sval = get_store_value (reg, nullptr);
return m_mgr->get_or_create_unknown_svalue (size_type_node);
}
else
@@ -3801,6 +3808,21 @@ region_model::zero_fill_region (const region *reg)
m_store.zero_fill_region (m_mgr->get_store_manager(), reg);
}
+/* Copy NUM_BYTES_SVAL of SVAL to DEST_REG.
+ Use CTXT to report any warnings associated with the copy
+ (e.g. out-of-bounds writes). */
+
+void
+region_model::write_bytes (const region *dest_reg,
+ const svalue *num_bytes_sval,
+ const svalue *sval,
+ region_model_context *ctxt)
+{
+ const region *sized_dest_reg
+ = m_mgr->get_sized_region (dest_reg, NULL_TREE, num_bytes_sval);
+ set_value (sized_dest_reg, sval, ctxt);
+}
+
/* Mark REG as having unknown content. */
void
diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h
index 3979bf124783..9c6e60bbe824 100644
--- a/gcc/analyzer/region-model.h
+++ b/gcc/analyzer/region-model.h
@@ -367,6 +367,10 @@ class region_model
void purge_region (const region *reg);
void fill_region (const region *reg, const svalue *sval);
void zero_fill_region (const region *reg);
+ void write_bytes (const region *dest_reg,
+ const svalue *num_bytes_sval,
+ const svalue *sval,
+ region_model_context *ctxt);
void mark_region_as_unknown (const region *reg, uncertainty_t *uncertainty);
tristate eval_condition (const svalue *lhs,
diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-16.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-16.c
new file mode 100644
index 000000000000..b0fb409267ea
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-16.c
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-fdiagnostics-text-art-charset=unicode" } */
+
+#include <string.h>
+#include "analyzer-decls.h"
+
+char *test_fixed_size_heap_2_invalid (void)
+{
+ char str[] = "abc";
+ char *p = __builtin_malloc (strlen (str)); /* { dg-message "\\(1\\) capacity: 3 bytes" } */
+ if (!p)
+ return NULL;
+ strcpy (p, str); /* { dg-warning "heap-based buffer overflow" } */
+ return p;
+}
+
+/* { dg-begin-multiline-output "" }
+ ┌──────────────────────────────────────────────────────────────────────┐
+ │ write of 4 bytes │
+ └──────────────────────────────────────────────────────────────────────┘
+ │ │
+ │ │
+ v v
+ ┌───────────────────────────────────────────────────┐┌─────────────────┐
+ │ buffer allocated on heap at (1) ││after valid range│
+ └───────────────────────────────────────────────────┘└─────────────────┘
+ ├─────────────────────────┬─────────────────────────┤├────────┬────────┤
+ │ │
+ ╭────────┴────────╮ ╭─────────┴────────╮
+ │capacity: 3 bytes│ │overflow of 1 byte│
+ ╰─────────────────╯ ╰──────────────────╯
+ { dg-end-multiline-output "" } */
diff --git a/gcc/testsuite/gcc.dg/analyzer/strcpy-1.c b/gcc/testsuite/gcc.dg/analyzer/strcpy-1.c
index d21e77175119..30341061f4cc 100644
--- a/gcc/testsuite/gcc.dg/analyzer/strcpy-1.c
+++ b/gcc/testsuite/gcc.dg/analyzer/strcpy-1.c
@@ -30,3 +30,25 @@ char *test_uninitialized (char *dst)
return strcpy (dst, buf); /* { dg-warning "use of uninitialized value 'buf\\\[0\\\]'" } */
/* { dg-message "while looking for null terminator for argument 2 \\('&buf'\\) of 'strcpy'..." "event" { target *-*-* } .-1 } */
}
+
+extern void external_fn (void *ptr);
+
+char *test_external_fn (void)
+{
+ char src[10];
+ char dst[10];
+ external_fn (src);
+ strcpy (dst, src);
+ __analyzer_eval (strlen (dst) == strlen (src)); /* { dg-warning "UNKNOWN" } */
+ // TODO: ideally would be TRUE
+}
+
+void test_sprintf_strcpy (const char *a, const char *b)
+{
+ char buf_1[10];
+ char buf_2[10];
+ __builtin_sprintf (buf_1, "%s/%s", a, b);
+ strcpy (buf_2, buf_1);
+ __analyzer_eval (strlen (buf_1) == strlen (buf_2)); /* { dg-warning "UNKNOWN" } */
+ // TODO: ideally would be TRUE
+}
diff --git a/gcc/testsuite/gcc.dg/analyzer/strcpy-3.c b/gcc/testsuite/gcc.dg/analyzer/strcpy-3.c
index a38f9a7641fe..abb49bc39f27 100644
--- a/gcc/testsuite/gcc.dg/analyzer/strcpy-3.c
+++ b/gcc/testsuite/gcc.dg/analyzer/strcpy-3.c
@@ -20,4 +20,5 @@ void test_1 (void)
__analyzer_eval (result[3] == 'l'); /* { dg-warning "TRUE" } */
__analyzer_eval (result[4] == 'o'); /* { dg-warning "TRUE" } */
__analyzer_eval (result[5] == 0); /* { dg-warning "TRUE" } */
+ __analyzer_eval (strlen (result) == 5); /* { dg-warning "TRUE" } */
}
diff --git a/gcc/testsuite/gcc.dg/analyzer/strcpy-4.c b/gcc/testsuite/gcc.dg/analyzer/strcpy-4.c
new file mode 100644
index 000000000000..435a4cadee9d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/strcpy-4.c
@@ -0,0 +1,51 @@
+/* { dg-additional-options "-Wno-stringop-overflow" } */
+
+#include <string.h>
+#include "analyzer-decls.h"
+
+void
+test_fixed_size_stack_1 (void)
+{
+ char buf[3];
+ strcpy (buf, "abc"); /* { dg-warning "stack-based buffer overflow" } */
+}
+
+char *test_fixed_size_heap_1 (void)
+{
+ char str[] = "abc";
+ char *p = __builtin_malloc (3);
+ if (!p)
+ return NULL;
+ strcpy (p, str); /* { dg-warning "heap-based buffer overflow" } */
+ return p;
+}
+
+char *test_fixed_size_heap_2_invalid (void)
+{
+ char str[] = "abc";
+ char *p = __builtin_malloc (strlen (str));
+ if (!p)
+ return NULL;
+ strcpy (p, str); /* { dg-warning "heap-based buffer overflow" } */
+ return p;
+}
+
+char *test_fixed_size_heap_2_valid (void)
+{
+ char str[] = "abc";
+ char *p = __builtin_malloc (strlen (str) + 1);
+ if (!p)
+ return NULL;
+ strcpy (p, str); /* { dg-bogus "" } */
+ __analyzer_eval (strlen (p) == 3); /* { dg-warning "TRUE" } */
+ return p;
+}
+
+char *test_dynamic_size_heap_1 (const char *src)
+{
+ char *p = __builtin_malloc (strlen (src));
+ if (!p)
+ return NULL;
+ strcpy (p, src); // TODO: write of null terminator is oob
+ return p;
+}
--
2.26.3
next prev parent reply other threads:[~2023-08-24 14:39 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-08-24 14:38 [pushed 0/9] analyzer: strlen, strcpy, and strcat [PR105899] David Malcolm
2023-08-24 14:38 ` [PATCH 1/9] analyzer: add logging to impl_path_context David Malcolm
2023-08-24 14:38 ` [PATCH 2/9] analyzer: handle symbolic bindings in scan_for_null_terminator [PR105899] David Malcolm
2023-08-24 14:38 ` David Malcolm [this message]
2023-08-24 14:38 ` [PATCH 4/9] analyzer: eliminate region_model::get_string_size [PR105899] David Malcolm
2023-08-24 14:38 ` [PATCH 5/9] analyzer: reimplement kf_memcpy_memmove David Malcolm
2023-08-24 14:39 ` [PATCH 6/9] analyzer: handle strlen(INIT_VAL(STRING_REG)) [PR105899] David Malcolm
2023-08-24 14:39 ` [PATCH 7/9] analyzer: handle INIT_VAL(ELEMENT_REG(STRING_REG), CONSTANT_SVAL) [PR105899] David Malcolm
2023-08-24 14:39 ` [PATCH 8/9] analyzer: handle strlen(BITS_WITHIN) [PR105899] David Malcolm
2023-08-24 14:39 ` [PATCH 9/9] analyzer: implement kf_strcat [PR105899] David Malcolm
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230824143903.3161185-4-dmalcolm@redhat.com \
--to=dmalcolm@redhat.com \
--cc=gcc-patches@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).