From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.129.124]) by sourceware.org (Postfix) with ESMTPS id C635E3858412 for ; Fri, 25 Aug 2023 12:44:37 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org C635E3858412 Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=redhat.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=redhat.com DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com; s=mimecast20190719; t=1692967477; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding; bh=sI614cezgUR3u+A9lyOGuVljxqBJbhO/JOPTpkjMu+k=; b=SMn5QJl68noY6C86rKut5VzwpaIe3rESPQzB/mECyX8TvuRlrFEVe+QXCUJM6ixoY7f0E9 meKd0ptmPmWhj9FkJ9mo+ESbMcH2OKiL89DrXbYyvc64Ldxvsjneonyq+NfPamHO2vqBLp 0H+J8JpZxvP9OwAYkVnrG2nVS++vrEw= Received: from mimecast-mx02.redhat.com (66.187.233.73 [66.187.233.73]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-677-Z9pqTospOW6PacMbke5bUQ-1; Fri, 25 Aug 2023 08:44:35 -0400 X-MC-Unique: Z9pqTospOW6PacMbke5bUQ-1 Received: from smtp.corp.redhat.com (int-mx03.intmail.prod.int.rdu2.redhat.com [10.11.54.3]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 92E2F1C0725B for ; Fri, 25 Aug 2023 12:44:35 +0000 (UTC) Received: from t14s.localdomain.com (unknown [10.22.32.117]) by smtp.corp.redhat.com (Postfix) with ESMTP id 49CEC1121319; Fri, 25 Aug 2023 12:44:35 +0000 (UTC) From: David Malcolm To: gcc-patches@gcc.gnu.org Cc: David Malcolm Subject: [pushed] analyzer: fix ICE in text art strings support Date: Fri, 25 Aug 2023 08:44:33 -0400 Message-Id: <20230825124433.3279791-1-dmalcolm@redhat.com> MIME-Version: 1.0 X-Scanned-By: MIMEDefang 3.1 on 10.11.54.3 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-Spam-Status: No, score=-10.2 required=5.0 tests=BAYES_00,BODY_8BITS,DKIMWL_WL_HIGH,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,GIT_PATCH_0,RCVD_IN_DNSWL_NONE,RCVD_IN_MSPIKE_H4,RCVD_IN_MSPIKE_WL,SPF_HELO_NONE,SPF_NONE,TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu. Pushed to trunk as r14-3481-g99a3fcb8ff0bf2. gcc/analyzer/ChangeLog: * access-diagram.cc (class string_region_spatial_item): Remove assumption that the string is written to the start of the cluster. gcc/testsuite/ChangeLog: * gcc.dg/analyzer/out-of-bounds-diagram-17.c: New test. * gcc.dg/analyzer/out-of-bounds-diagram-18.c: New test. * gcc.dg/analyzer/out-of-bounds-diagram-19.c: New test. --- gcc/analyzer/access-diagram.cc | 57 ++++++++++++------- .../analyzer/out-of-bounds-diagram-17.c | 34 +++++++++++ .../analyzer/out-of-bounds-diagram-18.c | 38 +++++++++++++ .../analyzer/out-of-bounds-diagram-19.c | 45 +++++++++++++++ 4 files changed, 155 insertions(+), 19 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-17.c create mode 100644 gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-18.c create mode 100644 gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-19.c diff --git a/gcc/analyzer/access-diagram.cc b/gcc/analyzer/access-diagram.cc index d7b669a4e38e..a51d594b5b2c 100644 --- a/gcc/analyzer/access-diagram.cc +++ b/gcc/analyzer/access-diagram.cc @@ -1509,10 +1509,16 @@ public: out.add_all_bytes_in_range (m_actual_bits); else { - byte_range head_of_string (0, m_ellipsis_head_len); + byte_range bytes (0, 0); + bool valid = m_actual_bits.as_concrete_byte_range (&bytes); + gcc_assert (valid); + byte_range head_of_string (bytes.get_start_byte_offset (), + m_ellipsis_head_len); out.add_all_bytes_in_range (head_of_string); byte_range tail_of_string - (TREE_STRING_LENGTH (string_cst) - m_ellipsis_tail_len, + ((bytes.get_start_byte_offset () + + TREE_STRING_LENGTH (string_cst) + - m_ellipsis_tail_len), m_ellipsis_tail_len); out.add_all_bytes_in_range (tail_of_string); /* Adding the above pair of ranges will also effectively add @@ -1535,11 +1541,14 @@ public: tree string_cst = get_string_cst (); if (m_show_full_string) { - for (byte_offset_t byte_idx = bytes.get_start_byte_offset (); - byte_idx < bytes.get_next_byte_offset (); - byte_idx = byte_idx + 1) - add_column_for_byte (t, btm, sm, byte_idx, - byte_idx_table_y, byte_val_table_y); + for (byte_offset_t byte_idx_within_cluster + = bytes.get_start_byte_offset (); + byte_idx_within_cluster < bytes.get_next_byte_offset (); + byte_idx_within_cluster = byte_idx_within_cluster + 1) + add_column_for_byte + (t, btm, sm, byte_idx_within_cluster, + byte_idx_within_cluster - bytes.get_start_byte_offset (), + byte_idx_table_y, byte_val_table_y); if (m_show_utf8) { @@ -1566,10 +1575,13 @@ public: = decoded_char.m_start_byte - TREE_STRING_POINTER (string_cst); byte_size_t size_in_bytes = decoded_char.m_next_byte - decoded_char.m_start_byte; - byte_range bytes (start_byte_idx, size_in_bytes); + byte_range cluster_bytes_for_codepoint + (start_byte_idx + bytes.get_start_byte_offset (), + size_in_bytes); const table::rect_t code_point_table_rect - = btm.get_table_rect (&m_string_reg, bytes, + = btm.get_table_rect (&m_string_reg, + cluster_bytes_for_codepoint, utf8_code_point_table_y, 1); char buf[100]; sprintf (buf, "U+%04x", decoded_char.m_ch); @@ -1579,7 +1591,8 @@ public: if (show_unichars) { const table::rect_t character_table_rect - = btm.get_table_rect (&m_string_reg, bytes, + = btm.get_table_rect (&m_string_reg, + cluster_bytes_for_codepoint, utf8_character_table_y, 1); if (cpp_is_printable_char (decoded_char.m_ch)) t.set_cell_span (character_table_rect, @@ -1598,12 +1611,14 @@ public: { /* Head of string. */ for (int byte_idx = 0; byte_idx < m_ellipsis_head_len; byte_idx++) - add_column_for_byte (t, btm, sm, byte_idx, + add_column_for_byte (t, btm, sm, + byte_idx + bytes.get_start_byte_offset (), + byte_idx, byte_idx_table_y, byte_val_table_y); /* Ellipsis (two rows high). */ const byte_range ellipsis_bytes - (m_ellipsis_head_len, + (m_ellipsis_head_len + bytes.get_start_byte_offset (), TREE_STRING_LENGTH (string_cst) - (m_ellipsis_head_len + m_ellipsis_tail_len)); const table::rect_t table_rect @@ -1616,7 +1631,9 @@ public: = (TREE_STRING_LENGTH (string_cst) - m_ellipsis_tail_len); byte_idx < TREE_STRING_LENGTH (string_cst); byte_idx++) - add_column_for_byte (t, btm, sm, byte_idx, + add_column_for_byte (t, btm, sm, + byte_idx + bytes.get_start_byte_offset (), + byte_idx, byte_idx_table_y, byte_val_table_y); } @@ -1660,25 +1677,27 @@ private: void add_column_for_byte (table &t, const bit_to_table_map &btm, style_manager &sm, - const byte_offset_t byte_idx, + const byte_offset_t byte_idx_within_cluster, + const byte_offset_t byte_idx_within_string, const int byte_idx_table_y, const int byte_val_table_y) const { tree string_cst = get_string_cst (); - gcc_assert (byte_idx >= 0); - gcc_assert (byte_idx < TREE_STRING_LENGTH (string_cst)); + gcc_assert (byte_idx_within_string >= 0); + gcc_assert (byte_idx_within_string < TREE_STRING_LENGTH (string_cst)); - const byte_range bytes (byte_idx, 1); + const byte_range bytes (byte_idx_within_cluster, 1); if (1) // show_byte_indices { const table::rect_t idx_table_rect = btm.get_table_rect (&m_string_reg, bytes, byte_idx_table_y, 1); t.set_cell_span (idx_table_rect, fmt_styled_string (sm, "[%li]", - byte_idx.ulow ())); + byte_idx_within_string.ulow ())); } - char byte_val = TREE_STRING_POINTER (string_cst)[byte_idx.ulow ()]; + char byte_val + = TREE_STRING_POINTER (string_cst)[byte_idx_within_string.ulow ()]; const table::rect_t val_table_rect = btm.get_table_rect (&m_string_reg, bytes, byte_val_table_y, 1); table_cell_content content (make_cell_content_for_byte (sm, byte_val)); diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-17.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-17.c new file mode 100644 index 000000000000..6920e8c776fc --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-17.c @@ -0,0 +1,34 @@ +/* { dg-additional-options "-fdiagnostics-text-art-charset=unicode" } */ + +#include + +void test (void) +{ + char buf[10]; + strcpy (buf, "hello"); + strcat (buf, " world!"); /* { dg-warning "stack-based buffer overflow" } */ + /* { dg-message "write of 3 bytes to beyond the end of 'buf'" "" { target *-*-* } .-1 } */ +} + +/* { dg-begin-multiline-output "" } + ┌─────┬─────┬────┬────┬────┐┌─────┬─────┬─────┐ + │ [0] │ [1] │[2] │[3] │[4] ││ [5] │ [6] │ [7] │ + ├─────┼─────┼────┼────┼────┤├─────┼─────┼─────┤ + │ ' ' │ 'w' │'o' │'r' │'l' ││ 'd' │ '!' │ NUL │ + ├─────┴─────┴────┴────┴────┴┴─────┴─────┴─────┤ + │ string literal (type: 'char[8]') │ + └─────────────────────────────────────────────┘ + │ │ │ │ │ │ │ │ + │ │ │ │ │ │ │ │ + v v v v v v v v + ┌─────┬────────────────────────────────────────┬────┐┌─────────────────┐ + │ [0] │ ... │[9] ││ │ + ├─────┴────────────────────────────────────────┴────┤│after valid range│ + │ 'buf' (type: 'char[10]') ││ │ + └───────────────────────────────────────────────────┘└─────────────────┘ + ├─────────────────────────┬─────────────────────────┤├────────┬────────┤ + │ │ + ╭─────────┴────────╮ ╭─────────┴─────────╮ + │capacity: 10 bytes│ │overflow of 3 bytes│ + ╰──────────────────╯ ╰───────────────────╯ + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-18.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-18.c new file mode 100644 index 000000000000..ea0b88019cd9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-18.c @@ -0,0 +1,38 @@ +/* { dg-additional-options "-fdiagnostics-text-art-charset=unicode" } */ + +#include + +void test (void) +{ + char buf[11]; + strcpy (buf, "サツキ"); + strcat (buf, "メイ"); /* { dg-warning "stack-based buffer overflow" } */ + /* { dg-message "write of 5 bytes to beyond the end of 'buf'" "" { target *-*-* } .-1 } */ +} + +/* { dg-begin-multiline-output "" } + ┌─────┬─────────┐┌────┬────┬────┬────┬──────┐ + │ [0] │ [1] ││[2] │[3] │[4] │[5] │ [6] │ + ├─────┼─────────┤├────┼────┼────┼────┼──────┤ + │0xe3 │ 0x83 ││0xa1│0xe3│0x82│0xa4│ 0x00 │ + ├─────┴─────────┴┴────┼────┴────┴────┼──────┤ + │ U+30e1 │ U+30a4 │U+0000│ + ├─────────────────────┼──────────────┼──────┤ + │ メ │ イ │ NUL │ + ├─────────────────────┴──────────────┴──────┤ + │ string literal (type: 'char[7]') │ + └───────────────────────────────────────────┘ + │ │ │ │ │ │ │ + │ │ │ │ │ │ │ + v v v v v v v + ┌────┬───────────────────────────┬─────────┐┌──────────────────────────┐ + │[0] │ ... │ [10] ││ │ + ├────┴───────────────────────────┴─────────┤│ after valid range │ + │ 'buf' (type: 'char[11]') ││ │ + └──────────────────────────────────────────┘└──────────────────────────┘ + ├────────────────────┬─────────────────────┤├────────────┬─────────────┤ + │ │ + ╭─────────┴────────╮ ╭─────────┴─────────╮ + │capacity: 11 bytes│ │overflow of 5 bytes│ + ╰──────────────────╯ ╰───────────────────╯ + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-19.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-19.c new file mode 100644 index 000000000000..35ab72b6efc2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-19.c @@ -0,0 +1,45 @@ +/* { dg-additional-options "-fdiagnostics-text-art-charset=unicode -Wno-stringop-overflow" } */ +/* { dg-skip-if "" { powerpc-ibm-aix* } } */ + +#include + +#define LOREM_IPSUM \ + "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod" \ + " tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim" \ + " veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea" \ + " commodo consequat. Duis aute irure dolor in reprehenderit in voluptate" \ + " velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint" \ + " occaecat cupidatat non proident, sunt in culpa qui officia deserunt" \ + " mollit anim id est laborum." + +void +test_long_string () +{ + char buf[100]; + strcpy (buf, "abc "); + strcat (buf, LOREM_IPSUM); /* { dg-warning "stack-based buffer overflow" } */ + /* { dg-message "write of 350 bytes to beyond the end of 'buf'" "" { target *-*-* } .-1 } */ +} + +/* { dg-begin-multiline-output "" } + ┌───┬───┬───┬───┬───┬───┬───────┬─────┬─────┬─────┬─────┬─────┬─────┐ + │[0]│[1]│[2]│[3]│[4]│[5]│ │[440]│[441]│[442]│[443]│[444]│[445]│ + ├───┼───┼───┼───┼───┼───┤ ... ├─────┼─────┼─────┼─────┼─────┼─────┤ + │'L'│'o'│'r'│'e'│'m'│' '│ │ 'o' │ 'r' │ 'u' │ 'm' │ '.' │ NUL │ + ├───┴───┴───┴───┴───┴───┴───────┴─────┴─────┴─────┴─────┴─────┴─────┤ + │ string literal (type: 'char[446]') │ + └───────────────────────────────────────────────────────────────────┘ + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ + │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ + v v v v v v v v v v v v v v v + ┌───┬──────────────────────────┬────┐┌────────────────────────────────────┐ + │[0]│ ... │[99]││ │ + ├───┴──────────────────────────┴────┤│ after valid range │ + │ 'buf' (type: 'char[100]') ││ │ + └───────────────────────────────────┘└────────────────────────────────────┘ + ├─────────────────┬─────────────────┤├─────────────────┬──────────────────┤ + │ │ + ╭─────────┴─────────╮ ╭──────────┴──────────╮ + │capacity: 100 bytes│ │overflow of 350 bytes│ + ╰───────────────────╯ ╰─────────────────────╯ + { dg-end-multiline-output "" } */ -- 2.26.3