[PATCH 5b/6] diagnostics: Remove null-termination requirement for json::string json::string currently handles null-terminated data and so can't work with data that may contain embedded null bytes or that is not null-terminated. Supporting such data will make json::string more robust in some contexts, such as SARIF output, which uses it to output user source code that may contain embedded null bytes. gcc/ChangeLog: * json.h (class string): Add M_LEN member to store the length of the data. Add constructor taking an explicit length. * json.cc (string::string): Implement the new constructor. (string::print): Support print strings that are not null-terminated. Escape embdedded null bytes on output. (test_writing_strings): Test the new null-byte-related features of json::string. diff --git a/gcc/json.cc b/gcc/json.cc index 974f8c36825..3a79cac02ac 100644 --- a/gcc/json.cc +++ b/gcc/json.cc @@ -190,6 +190,15 @@ string::string (const char *utf8) { gcc_assert (utf8); m_utf8 = xstrdup (utf8); + m_len = strlen (utf8); +} + +string::string (const char *utf8, size_t len) +{ + gcc_assert (utf8); + m_utf8 = XNEWVEC (char, len); + m_len = len; + memcpy (m_utf8, utf8, len); } /* Implementation of json::value::print for json::string. */ @@ -198,9 +207,9 @@ void string::print (pretty_printer *pp) const { pp_character (pp, '"'); - for (const char *ptr = m_utf8; *ptr; ptr++) + for (size_t i = 0; i != m_len; ++i) { - char ch = *ptr; + char ch = m_utf8[i]; switch (ch) { case '"': @@ -224,7 +233,9 @@ string::print (pretty_printer *pp) const case '\t': pp_string (pp, "\\t"); break; - + case '\0': + pp_string (pp, "\\0"); + break; default: pp_character (pp, ch); } @@ -341,6 +352,12 @@ test_writing_strings () string contains_quotes ("before \"quoted\" after"); assert_print_eq (contains_quotes, "\"before \\\"quoted\\\" after\""); + + const char data[] = {'a', 'b', 'c', 'd', '\0', 'e', 'f'}; + string not_terminated (data, 3); + assert_print_eq (not_terminated, "\"abc\""); + string embedded_null (data, sizeof data); + assert_print_eq (embedded_null, "\"abcd\\0ef\""); } /* Verify that JSON literals are written correctly. */ diff --git a/gcc/json.h b/gcc/json.h index f272981259b..f7afd843dc5 100644 --- a/gcc/json.h +++ b/gcc/json.h @@ -156,16 +156,19 @@ class integer_number : public value class string : public value { public: - string (const char *utf8); + explicit string (const char *utf8); + string (const char *utf8, size_t len); ~string () { free (m_utf8); } enum kind get_kind () const final override { return JSON_STRING; } void print (pretty_printer *pp) const final override; const char *get_string () const { return m_utf8; } + size_t get_length () const { return m_len; } private: char *m_utf8; + size_t m_len; }; /* Subclass of value for the three JSON literals "true", "false",