From: David Malcolm <dmalcolm@redhat.com>
To: gcc-patches@gcc.gnu.org
Cc: David Malcolm <dmalcolm@redhat.com>
Subject: [PATCH 9/9] analyzer: implement kf_strcat [PR105899]
Date: Thu, 24 Aug 2023 10:39:03 -0400 [thread overview]
Message-ID: <20230824143903.3161185-10-dmalcolm@redhat.com> (raw)
In-Reply-To: <20230824143903.3161185-1-dmalcolm@redhat.com>
gcc/analyzer/ChangeLog:
PR analyzer/105899
* call-details.cc
(call_details::check_for_null_terminated_string_arg): Split into
overloads, one taking just an arg_idx, the other a new
"include_terminator" param.
* call-details.h: Likewise.
* kf.cc (class kf_strcat): New.
(kf_strcpy::impl_call_pre): Update for change to
check_for_null_terminated_string_arg.
(register_known_functions): Register kf_strcat.
* region-model.cc
(region_model::check_for_null_terminated_string_arg): Split into
overloads, one taking just an arg_idx, the other a new
"include_terminator" param. When returning an svalue, handle
"include_terminator" being false by subtracting one.
* region-model.h
(region_model::check_for_null_terminated_string_arg): Split into
overloads, one taking just an arg_idx, the other a new
"include_terminator" param.
gcc/ChangeLog:
PR analyzer/105899
* doc/invoke.texi (Static Analyzer Options): Add "strcat" to the
list of functions known to the analyzer.
gcc/testsuite/ChangeLog:
PR analyzer/105899
* gcc.dg/analyzer/strcat-1.c: New test.
---
gcc/analyzer/call-details.cc | 12 +-
gcc/analyzer/call-details.h | 5 +-
gcc/analyzer/kf.cc | 72 ++++++++++--
gcc/analyzer/region-model.cc | 63 +++++++++--
gcc/analyzer/region-model.h | 6 +-
gcc/doc/invoke.texi | 1 +
gcc/testsuite/gcc.dg/analyzer/strcat-1.c | 136 +++++++++++++++++++++++
7 files changed, 275 insertions(+), 20 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/analyzer/strcat-1.c
diff --git a/gcc/analyzer/call-details.cc b/gcc/analyzer/call-details.cc
index 8f5b28ce6c26..ce1f859c9996 100644
--- a/gcc/analyzer/call-details.cc
+++ b/gcc/analyzer/call-details.cc
@@ -386,13 +386,23 @@ call_details::lookup_function_attribute (const char *attr_name) const
return lookup_attribute (attr_name, TYPE_ATTRIBUTES (allocfntype));
}
+void
+call_details::check_for_null_terminated_string_arg (unsigned arg_idx) const
+{
+ check_for_null_terminated_string_arg (arg_idx, false, nullptr);
+}
+
const svalue *
call_details::
check_for_null_terminated_string_arg (unsigned arg_idx,
+ bool include_terminator,
const svalue **out_sval) const
{
region_model *model = get_model ();
- return model->check_for_null_terminated_string_arg (*this, arg_idx, out_sval);
+ return model->check_for_null_terminated_string_arg (*this,
+ arg_idx,
+ include_terminator,
+ out_sval);
}
} // namespace ana
diff --git a/gcc/analyzer/call-details.h b/gcc/analyzer/call-details.h
index 58b5ccd2acde..ae528e4ab116 100644
--- a/gcc/analyzer/call-details.h
+++ b/gcc/analyzer/call-details.h
@@ -72,9 +72,12 @@ public:
tree lookup_function_attribute (const char *attr_name) const;
+ void
+ check_for_null_terminated_string_arg (unsigned arg_idx) const;
const svalue *
check_for_null_terminated_string_arg (unsigned arg_idx,
- const svalue **out_sval = nullptr) const;
+ bool include_terminator,
+ const svalue **out_sval) const;
private:
const gcall *m_call;
diff --git a/gcc/analyzer/kf.cc b/gcc/analyzer/kf.cc
index 3eddbe200387..36d9d10bb013 100644
--- a/gcc/analyzer/kf.cc
+++ b/gcc/analyzer/kf.cc
@@ -1106,6 +1106,61 @@ public:
/* Currently a no-op. */
};
+/* Handler for "strcat" and "__builtin_strcat_chk". */
+
+class kf_strcat : public known_function
+{
+public:
+ kf_strcat (unsigned int num_args) : m_num_args (num_args) {}
+ bool matches_call_types_p (const call_details &cd) const final override
+ {
+ return (cd.num_args () == m_num_args
+ && cd.arg_is_pointer_p (0)
+ && cd.arg_is_pointer_p (1));
+ }
+
+ void impl_call_pre (const call_details &cd) const final override
+ {
+ region_model *model = cd.get_model ();
+ region_model_manager *mgr = cd.get_manager ();
+
+ const svalue *dest_sval = cd.get_arg_svalue (0);
+ const region *dest_reg = model->deref_rvalue (dest_sval, cd.get_arg_tree (0),
+ cd.get_ctxt ());
+
+ const svalue *dst_strlen_sval
+ = cd.check_for_null_terminated_string_arg (0, false, nullptr);
+ if (!dst_strlen_sval)
+ {
+ if (cd.get_ctxt ())
+ cd.get_ctxt ()->terminate_path ();
+ return;
+ }
+
+ const svalue *bytes_to_copy;
+ const svalue *num_src_bytes_read_sval
+ = cd.check_for_null_terminated_string_arg (1, true, &bytes_to_copy);
+ if (!num_src_bytes_read_sval)
+ {
+ if (cd.get_ctxt ())
+ cd.get_ctxt ()->terminate_path ();
+ return;
+ }
+
+ cd.maybe_set_lhs (dest_sval);
+
+ const region *offset_reg
+ = mgr->get_offset_region (dest_reg, NULL_TREE, dst_strlen_sval);
+ model->write_bytes (offset_reg,
+ num_src_bytes_read_sval,
+ bytes_to_copy,
+ cd.get_ctxt ());
+ }
+
+private:
+ unsigned int m_num_args;
+};
+
/* Handler for "strcpy" and "__builtin_strcpy_chk". */
class kf_strcpy : public known_function
@@ -1139,7 +1194,7 @@ kf_strcpy::impl_call_pre (const call_details &cd) const
const svalue *bytes_to_copy;
if (const svalue *num_bytes_read_sval
- = cd.check_for_null_terminated_string_arg (1, &bytes_to_copy))
+ = cd.check_for_null_terminated_string_arg (1, true, &bytes_to_copy))
{
model->write_bytes (dest_reg, num_bytes_read_sval, bytes_to_copy, ctxt);
}
@@ -1188,16 +1243,10 @@ public:
}
void impl_call_pre (const call_details &cd) const final override
{
- if (const svalue *bytes_read = cd.check_for_null_terminated_string_arg (0))
- if (bytes_read->get_kind () != SK_UNKNOWN)
+ if (const svalue *strlen_sval
+ = cd.check_for_null_terminated_string_arg (0, false, nullptr))
+ if (strlen_sval->get_kind () != SK_UNKNOWN)
{
- region_model_manager *mgr = cd.get_manager ();
- /* strlen is (bytes_read - 1). */
- const svalue *one = mgr->get_or_create_int_cst (size_type_node, 1);
- const svalue *strlen_sval = mgr->get_or_create_binop (size_type_node,
- MINUS_EXPR,
- bytes_read,
- one);
cd.maybe_set_lhs (strlen_sval);
return;
}
@@ -1415,6 +1464,8 @@ register_known_functions (known_function_manager &kfm)
kfm.add (BUILT_IN_SPRINTF, make_unique<kf_sprintf> ());
kfm.add (BUILT_IN_STACK_RESTORE, make_unique<kf_stack_restore> ());
kfm.add (BUILT_IN_STACK_SAVE, make_unique<kf_stack_save> ());
+ kfm.add (BUILT_IN_STRCAT, make_unique<kf_strcat> (2));
+ kfm.add (BUILT_IN_STRCAT_CHK, make_unique<kf_strcat> (3));
kfm.add (BUILT_IN_STRCHR, make_unique<kf_strchr> ());
kfm.add (BUILT_IN_STRCPY, make_unique<kf_strcpy> (2));
kfm.add (BUILT_IN_STRCPY_CHK, make_unique<kf_strcpy> (3));
@@ -1429,6 +1480,7 @@ register_known_functions (known_function_manager &kfm)
/* Known builtins and C standard library functions. */
{
kfm.add ("memset", make_unique<kf_memset> ());
+ kfm.add ("strcat", make_unique<kf_strcat> (2));
kfm.add ("strdup", make_unique<kf_strdup> ());
kfm.add ("strndup", make_unique<kf_strndup> ());
}
diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index 025b555d7b97..02c073c15bcc 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -3679,9 +3679,41 @@ region_model::scan_for_null_terminator (const region *reg,
- the buffer pointed to has any uninitalized bytes before any 0-terminator
- any of the reads aren't within the bounds of the underlying base region
- Otherwise, return a svalue for the number of bytes read (strlen + 1),
- and, if OUT_SVAL is non-NULL, write to *OUT_SVAL with an svalue
- representing the content of the buffer up to and including the terminator.
+ Otherwise, return a svalue for strlen of the buffer (*not* including
+ the null terminator).
+
+ TODO: we should also complain if:
+ - the pointer is NULL (or could be). */
+
+void
+region_model::check_for_null_terminated_string_arg (const call_details &cd,
+ unsigned arg_idx)
+{
+ check_for_null_terminated_string_arg (cd,
+ arg_idx,
+ false, /* include_terminator */
+ nullptr); // out_sval
+}
+
+
+/* Check that argument ARG_IDX (0-based) to the call described by CD
+ is a pointer to a valid null-terminated string.
+
+ Simulate scanning through the buffer, reading until we find a 0 byte
+ (equivalent to calling strlen).
+
+ Complain and return NULL if:
+ - the buffer pointed to isn't null-terminated
+ - the buffer pointed to has any uninitalized bytes before any 0-terminator
+ - any of the reads aren't within the bounds of the underlying base region
+
+ Otherwise, return a svalue. This will be the number of bytes read
+ (including the null terminator) if INCLUDE_TERMINATOR is true, or strlen
+ of the buffer (not including the null terminator) if it is false.
+
+ Also, when returning an svalue, if OUT_SVAL is non-NULL, write to
+ *OUT_SVAL with an svalue representing the content of the buffer up to
+ and including the terminator.
TODO: we should also complain if:
- the pointer is NULL (or could be). */
@@ -3689,6 +3721,7 @@ region_model::scan_for_null_terminator (const region *reg,
const svalue *
region_model::check_for_null_terminated_string_arg (const call_details &cd,
unsigned arg_idx,
+ bool include_terminator,
const svalue **out_sval)
{
class null_terminator_check_event : public custom_event
@@ -3786,10 +3819,26 @@ region_model::check_for_null_terminated_string_arg (const call_details &cd,
const region *buf_reg
= deref_rvalue (arg_sval, cd.get_arg_tree (arg_idx), &my_ctxt);
- return scan_for_null_terminator (buf_reg,
- cd.get_arg_tree (arg_idx),
- out_sval,
- &my_ctxt);
+ if (const svalue *num_bytes_read_sval
+ = scan_for_null_terminator (buf_reg,
+ cd.get_arg_tree (arg_idx),
+ out_sval,
+ &my_ctxt))
+ {
+ if (include_terminator)
+ return num_bytes_read_sval;
+ else
+ {
+ /* strlen is (bytes_read - 1). */
+ const svalue *one = m_mgr->get_or_create_int_cst (size_type_node, 1);
+ return m_mgr->get_or_create_binop (size_type_node,
+ MINUS_EXPR,
+ num_bytes_read_sval,
+ one);
+ }
+ }
+ else
+ return nullptr;
}
/* Remove all bindings overlapping REG within the store. */
diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h
index b1c705e22c28..40259625fb06 100644
--- a/gcc/analyzer/region-model.h
+++ b/gcc/analyzer/region-model.h
@@ -519,10 +519,14 @@ class region_model
const svalue *sval_hint,
region_model_context *ctxt) const;
+ void
+ check_for_null_terminated_string_arg (const call_details &cd,
+ unsigned idx);
const svalue *
check_for_null_terminated_string_arg (const call_details &cd,
unsigned idx,
- const svalue **out_sval = nullptr);
+ bool include_terminator,
+ const svalue **out_sval);
private:
const region *get_lvalue_1 (path_var pv, region_model_context *ctxt) const;
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index ef3f40989860..209d6b0ce4d3 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -11109,6 +11109,7 @@ and of the following functions:
@item @code{siglongjmp}
@item @code{signal}
@item @code{sigsetjmp}
+@item @code{strcat}
@item @code{strchr}
@item @code{strlen}
@end itemize
diff --git a/gcc/testsuite/gcc.dg/analyzer/strcat-1.c b/gcc/testsuite/gcc.dg/analyzer/strcat-1.c
new file mode 100644
index 000000000000..e3b698ae73d3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/strcat-1.c
@@ -0,0 +1,136 @@
+/* See e.g. https://en.cppreference.com/w/c/string/byte/strcat */
+
+#include "analyzer-decls.h"
+
+char *strcat (char *dest, const char *src);
+#define NULL ((void *)0)
+
+char *
+test_passthrough (char *dest, const char *src)
+{
+ return strcat (dest, src);
+}
+
+char *
+test_null_dest (const char *src)
+{
+ return strcat (NULL, src); /* { dg-warning "use of NULL where non-null expected" } */
+}
+
+char *
+test_null_src (char *dest)
+{
+ return strcat (dest, NULL); /* { dg-warning "use of NULL where non-null expected" } */
+}
+
+char *
+test_uninit_dest (const char *src)
+{
+ char dest[10];
+ return strcat (dest, src); /* { dg-warning "use of uninitialized value 'dest\\\[0\\\]'" } */
+}
+
+char *
+test_uninit_src (char *dest)
+{
+ const char src[10];
+ return strcat (dest, src); /* { dg-warning "use of uninitialized value 'src\\\[0\\\]'" } */
+}
+
+char *
+test_dest_not_terminated (char *src)
+{
+ char dest[3] = "foo";
+ return strcat (dest, src); /* { dg-warning "stack-based buffer over-read" } */
+ /* { dg-message "while looking for null terminator for argument 1 \\('&dest'\\) of 'strcat'" "" { target *-*-* } .-1 } */
+}
+
+char *
+test_src_not_terminated (char *dest)
+{
+ const char src[3] = "foo";
+ return strcat (dest, src); /* { dg-warning "stack-based buffer over-read" } */
+ /* { dg-message "while looking for null terminator for argument 2 \\('&src'\\) of 'strcat'" "" { target *-*-* } .-1 } */
+}
+
+char * __attribute__((noinline))
+call_strcat (char *dest, const char *src)
+{
+ return strcat (dest, src);
+}
+
+void
+test_concrete_valid_static_size (void)
+{
+ char buf[16];
+ char *p1 = __builtin_strcpy (buf, "abc");
+ char *p2 = call_strcat (buf, "def");
+ __analyzer_eval (p1 == buf); /* { dg-warning "TRUE" } */
+ __analyzer_eval (p2 == buf); /* { dg-warning "TRUE" } */
+ __analyzer_eval (buf[0] == 'a'); /* { dg-warning "TRUE" } */
+ __analyzer_eval (buf[1] == 'b'); /* { dg-warning "TRUE" } */
+ __analyzer_eval (buf[2] == 'c'); /* { dg-warning "TRUE" } */
+ __analyzer_eval (buf[3] == 'd'); /* { dg-warning "TRUE" } */
+ __analyzer_eval (buf[4] == 'e'); /* { dg-warning "TRUE" } */
+ __analyzer_eval (buf[5] == 'f'); /* { dg-warning "TRUE" } */
+ __analyzer_eval (buf[6] == '\0'); /* { dg-warning "TRUE" } */
+ __analyzer_eval (__builtin_strlen (buf) == 6); /* { dg-warning "TRUE" } */
+}
+
+void
+test_concrete_valid_static_size_2 (void)
+{
+ char buf[16];
+ char *p1 = __builtin_strcpy (buf, "abc");
+ char *p2 = call_strcat (buf, "def");
+ char *p3 = call_strcat (buf, "ghi");
+ __analyzer_eval (p1 == buf); /* { dg-warning "TRUE" } */
+ __analyzer_eval (p2 == buf); /* { dg-warning "TRUE" } */
+ __analyzer_eval (p3 == buf); /* { dg-warning "TRUE" } */
+ __analyzer_eval (buf[0] == 'a'); /* { dg-warning "TRUE" } */
+ __analyzer_eval (buf[1] == 'b'); /* { dg-warning "TRUE" } */
+ __analyzer_eval (buf[2] == 'c'); /* { dg-warning "TRUE" } */
+ __analyzer_eval (buf[3] == 'd'); /* { dg-warning "TRUE" } */
+ __analyzer_eval (buf[4] == 'e'); /* { dg-warning "TRUE" } */
+ __analyzer_eval (buf[5] == 'f'); /* { dg-warning "TRUE" } */
+ __analyzer_eval (buf[6] == 'g'); /* { dg-warning "TRUE" } */
+ __analyzer_eval (buf[7] == 'h'); /* { dg-warning "TRUE" } */
+ __analyzer_eval (buf[8] == 'i'); /* { dg-warning "TRUE" } */
+ __analyzer_eval (buf[9] == '\0'); /* { dg-warning "TRUE" } */
+ __analyzer_eval (__builtin_strlen (buf) == 9); /* { dg-warning "TRUE" } */
+ __analyzer_eval (__builtin_strlen (buf + 1) == 8); /* { dg-warning "TRUE" } */
+ __analyzer_eval (__builtin_strlen (buf + 2) == 7); /* { dg-warning "TRUE" } */
+ __analyzer_eval (__builtin_strlen (buf + 3) == 6); /* { dg-warning "TRUE" } */
+ __analyzer_eval (__builtin_strlen (buf + 4) == 5); /* { dg-warning "TRUE" } */
+ __analyzer_eval (__builtin_strlen (buf + 5) == 4); /* { dg-warning "TRUE" } */
+ __analyzer_eval (__builtin_strlen (buf + 6) == 3); /* { dg-warning "TRUE" } */
+ __analyzer_eval (__builtin_strlen (buf + 7) == 2); /* { dg-warning "TRUE" } */
+ __analyzer_eval (__builtin_strlen (buf + 8) == 1); /* { dg-warning "TRUE" } */
+ __analyzer_eval (__builtin_strlen (buf + 9) == 0); /* { dg-warning "TRUE" } */
+}
+
+char * __attribute__((noinline))
+call_strcat_invalid (char *dest, const char *src)
+{
+ return strcat (dest, src); /* { dg-warning "stack-based buffer overflow" } */
+}
+
+void
+test_concrete_invalid_static_size (void)
+{
+ char buf[3];
+ buf[0] = '\0';
+ call_strcat_invalid (buf, "abc");
+}
+
+void
+test_concrete_symbolic (const char *suffix)
+{
+ char buf[10];
+ buf[0] = '\0';
+ call_strcat (buf, suffix);
+}
+
+/* TODO:
+ - "The behavior is undefined if the strings overlap."
+*/
--
2.26.3
prev parent reply other threads:[~2023-08-24 14:39 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-08-24 14:38 [pushed 0/9] analyzer: strlen, strcpy, and strcat [PR105899] David Malcolm
2023-08-24 14:38 ` [PATCH 1/9] analyzer: add logging to impl_path_context David Malcolm
2023-08-24 14:38 ` [PATCH 2/9] analyzer: handle symbolic bindings in scan_for_null_terminator [PR105899] David Malcolm
2023-08-24 14:38 ` [PATCH 3/9] analyzer: reimplement kf_strcpy [PR105899] David Malcolm
2023-08-24 14:38 ` [PATCH 4/9] analyzer: eliminate region_model::get_string_size [PR105899] David Malcolm
2023-08-24 14:38 ` [PATCH 5/9] analyzer: reimplement kf_memcpy_memmove David Malcolm
2023-08-24 14:39 ` [PATCH 6/9] analyzer: handle strlen(INIT_VAL(STRING_REG)) [PR105899] David Malcolm
2023-08-24 14:39 ` [PATCH 7/9] analyzer: handle INIT_VAL(ELEMENT_REG(STRING_REG), CONSTANT_SVAL) [PR105899] David Malcolm
2023-08-24 14:39 ` [PATCH 8/9] analyzer: handle strlen(BITS_WITHIN) [PR105899] David Malcolm
2023-08-24 14:39 ` David Malcolm [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230824143903.3161185-10-dmalcolm@redhat.com \
--to=dmalcolm@redhat.com \
--cc=gcc-patches@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).