[PATCH] diagnostics: Make line-ending logic consistent with libcpp [PR91733] libcpp recognizes a lone \r as a valid line ending, so the infrastructure for retrieving source lines to be output in diagnostics needs to do the same. This patch fixes file_cache_slot::get_next_line() accordingly so that diagnostics display the correct part of the source when \r line endings are in use. gcc/ChangeLog: PR preprocessor/91733 * input.cc (find_end_of_line): New helper function. (file_cache_slot::get_next_line): Recognize \r as a line ending. * diagnostic-show-locus.cc (test_escaping_bytes_1): Adapt selftest since \r will now be interpreted as a line-ending. gcc/testsuite/ChangeLog: PR preprocessor/91733 * c-c++-common/pr91733.c: New test. diff --git a/gcc/diagnostic-show-locus.cc b/gcc/diagnostic-show-locus.cc index 6eafe19785f..d267d2c258d 100644 --- a/gcc/diagnostic-show-locus.cc +++ b/gcc/diagnostic-show-locus.cc @@ -5508,7 +5508,7 @@ test_tab_expansion (const line_table_case &case_) static void test_escaping_bytes_1 (const line_table_case &case_) { - const char content[] = "before\0\1\2\3\r\x80\xff""after\n"; + const char content[] = "before\0\1\2\3\v\x80\xff""after\n"; const size_t sz = sizeof (content); temp_source_file tmp (SELFTEST_LOCATION, ".c", content, sz); line_table_test ltt (case_); @@ -5523,18 +5523,18 @@ test_escaping_bytes_1 (const line_table_case &case_) if (finish > LINE_MAP_MAX_LOCATION_WITH_COLS) return; - /* Locations of the NUL and \r bytes. */ + /* Locations of the NUL and \v bytes. */ location_t nul_loc = linemap_position_for_line_and_column (line_table, ord_map, 1, 7); - location_t r_loc + location_t v_loc = linemap_position_for_line_and_column (line_table, ord_map, 1, 11); gcc_rich_location richloc (nul_loc); - richloc.add_range (r_loc); + richloc.add_range (v_loc); { test_diagnostic_context dc; diagnostic_show_locus (&dc, &richloc, DK_ERROR); - ASSERT_STREQ (" before \1\2\3 \x80\xff""after\n" + ASSERT_STREQ (" before \1\2\3\v\x80\xff""after\n" " ^ ~\n", pp_formatted_text (dc.printer)); } @@ -5544,7 +5544,7 @@ test_escaping_bytes_1 (const line_table_case &case_) dc.escape_format = DIAGNOSTICS_ESCAPE_FORMAT_UNICODE; diagnostic_show_locus (&dc, &richloc, DK_ERROR); ASSERT_STREQ - (" before<80>after\n" + (" before<80>after\n" " ^~~~~~~~ ~~~~~~~~\n", pp_formatted_text (dc.printer)); } @@ -5552,7 +5552,7 @@ test_escaping_bytes_1 (const line_table_case &case_) test_diagnostic_context dc; dc.escape_format = DIAGNOSTICS_ESCAPE_FORMAT_BYTES; diagnostic_show_locus (&dc, &richloc, DK_ERROR); - ASSERT_STREQ (" before<00><01><02><03><0d><80>after\n" + ASSERT_STREQ (" before<00><01><02><03><0b><80>after\n" " ^~~~ ~~~~\n", pp_formatted_text (dc.printer)); } diff --git a/gcc/input.cc b/gcc/input.cc index 2acbfdea4f8..060ca160126 100644 --- a/gcc/input.cc +++ b/gcc/input.cc @@ -646,6 +646,37 @@ file_cache_slot::maybe_read_data () return read_data (); } +/* Helper function for file_cache_slot::get_next_line (), to find the end of + the next line. Returns with the memchr convention, i.e. nullptr if a line + terminator was not found. We need to determine line endings in the same + manner that libcpp does: any of \n, \r\n, or \r is a line ending. */ + +static char * +find_end_of_line (char *s, size_t len) +{ + for (const auto end = s + len; s != end; ++s) + { + if (*s == '\n') + return s; + if (*s == '\r') + { + const auto next = s + 1; + if (next == end) + { + /* Don't find the line ending if \r is the very last character + in the buffer; we do not know if it's the end of the file or + just the end of what has been read so far, and we wouldn't + want to break in the middle of what's actually a \r\n + sequence. Instead, we will handle the case of a file ending + in a \r later. */ + break; + } + return (*next == '\n' ? next : s); + } + } + return nullptr; +} + /* Read a new line from file FP, using C as a cache for the data coming from the file. Upon successful completion, *LINE is set to the beginning of the line found. *LINE points directly in the @@ -671,17 +702,16 @@ file_cache_slot::get_next_line (char **line, ssize_t *line_len) char *next_line_start = NULL; size_t len = 0; - char *line_end = (char *) memchr (line_start, '\n', remaining_size); + char *line_end = find_end_of_line (line_start, remaining_size); if (line_end == NULL) { - /* We haven't found the end-of-line delimiter in the cache. - Fill the cache with more data from the file and look for the - '\n'. */ + /* We haven't found an end-of-line delimiter in the cache. + Fill the cache with more data from the file and look again. */ while (maybe_read_data ()) { line_start = m_data + m_line_start_idx; remaining_size = m_nb_read - m_line_start_idx; - line_end = (char *) memchr (line_start, '\n', remaining_size); + line_end = find_end_of_line (line_start, remaining_size); if (line_end != NULL) { next_line_start = line_end + 1; @@ -690,14 +720,22 @@ file_cache_slot::get_next_line (char **line, ssize_t *line_len) } if (line_end == NULL) { - /* We've loadded all the file into the cache and still no - '\n'. Let's say the line ends up at one byte passed the + /* We've loaded all the file into the cache and still no + terminator. Let's say the line ends up at one byte past the end of the file. This is to stay consistent with the case - of when the line ends up with a '\n' and line_end points to - that terminal '\n'. That consistency is useful below in - the len calculation. */ - line_end = m_data + m_nb_read ; - m_missing_trailing_newline = true; + of when the line ends up with a terminator and line_end points to + that. That consistency is useful below in the len calculation. + + If the file ends in a \r, we didn't identify it as a line + terminator above, so do that now instead. */ + line_end = m_data + m_nb_read; + if (m_nb_read && line_end[-1] == '\r') + { + --line_end; + m_missing_trailing_newline = false; + } + else + m_missing_trailing_newline = true; } else m_missing_trailing_newline = false; @@ -711,9 +749,8 @@ file_cache_slot::get_next_line (char **line, ssize_t *line_len) if (m_fp && ferror (m_fp)) return false; - /* At this point, we've found the end of the of line. It either - points to the '\n' or to one byte after the last byte of the - file. */ + /* At this point, we've found the end of the of line. It either points to + the line terminator or to one byte after the last byte of the file. */ gcc_assert (line_end != NULL); len = line_end - line_start; diff --git a/gcc/testsuite/c-c++-common/pr91733.c b/gcc/testsuite/c-c++-common/pr91733.c new file mode 100644 index 00000000000..1539bb4f386 --- /dev/null +++ b/gcc/testsuite/c-c++-common/pr91733.c @@ -0,0 +1,17 @@ +/* { dg-do preprocess } */ +/* { dg-additional-options "-fdiagnostics-show-caret" } */ + +const char *s = " "; + +/* { dg-warning "missing terminating \"" "test1" { target *-*-* } 4 } */ +/* { dg-warning "missing terminating \"" "test2" { target *-*-* } 5 } */ + +/* { dg-begin-multiline-output "test3" } + const char *s = " + ^ +{ dg-end-multiline-output "test3" } */ + +/* { dg-begin-multiline-output "test4" } + "; + ^ +{ dg-end-multiline-output "test4" } */