From 02b786977e113302a133094ddd5b5e770679b569 Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Wed, 24 May 2023 19:54:34 +0200 Subject: [PATCH] [gdb/tui] Handle unicode chars in prompt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's try to set the prompt using a unicode character, say '❯', aka U+276F (heavy right-pointing angle quotation mark ornament). This works fine on an xterm with CLI (with X marking the position of the blinking cursor): ... $ gdb -q -ex "set prompt GDB❯ " GDB❯ X ... but with TUI: ... $ gdb -q -tui -ex "set prompt GDB❯ " ... we get instead: ... GDB GDB X ... We can use the test-case gdb.tui/unicode-prompt.exp to get more details, using tuiterm. With Term::dump_screen we have: ... 16 (gdb) set prompt GDB❯ 17 GDB❯ GDB❯ GDB❯ set prompt (gdb) 18 (gdb) ... and with Term::dump_screen_with_attrs (summarizing using attribute sets and ): ... 16 (gdb) set prompt GDB❯ 17 GDB GDB GDB set prompt (gdb) 18 (gdb) ... where: ... == == ... This explains why we didn't see the unicode char on xterm: it's hidden because the invisible attribute is set. So, there seem to be two problems: - the attributes are incorrect, and - the prompt is repeated a couple of times. In TUI, the prompt is written out by tui_puts_internal, which outputs one byte at a time using waddch, which apparently breaks multi-byte char support. Fix this by detecting multi-byte chars in tui_puts_internal, and printing them using waddnstr. Tested on x86_64-linux. Reported-By: wuzy01@qq.com PR tui/28800 Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=28800 --- gdb/charset.c | 20 +++++ gdb/charset.h | 7 ++ gdb/testsuite/gdb.tui/unicode-prompt.exp | 52 ++++++++++++ gdb/tui/tui-io.c | 100 +++++++++++++++++++---- 4 files changed, 164 insertions(+), 15 deletions(-) create mode 100644 gdb/testsuite/gdb.tui/unicode-prompt.exp diff --git a/gdb/charset.c b/gdb/charset.c index bce6050c97f..765dce46fc3 100644 --- a/gdb/charset.c +++ b/gdb/charset.c @@ -690,6 +690,26 @@ wchar_iterator::iterate (enum wchar_iterate_result *out_result, return -1; } +/* See charset.h. */ + +void +wchar_iterator::skip (size_t len) +{ + m_input += len; + + gdb_assert (len <= m_bytes); + m_bytes -= len; +} + +/* See charset.h. */ + +void +wchar_iterator::reset (const gdb_byte *input, size_t bytes) +{ + m_input = input; + m_bytes = bytes; +} + struct charset_vector { ~charset_vector () diff --git a/gdb/charset.h b/gdb/charset.h index 52194547b0c..6bb0ce14af3 100644 --- a/gdb/charset.h +++ b/gdb/charset.h @@ -126,6 +126,13 @@ class wchar_iterator int iterate (enum wchar_iterate_result *out_result, gdb_wchar_t **out_chars, const gdb_byte **ptr, size_t *len); + /* Increase the input buffer pointer by LEN bytes. */ + void skip (size_t len); + + /* Reset the input buffer pointer to INPUT and the number of bytes in the + input buffer to BYTES. */ + void reset (const gdb_byte *input, size_t bytes); + private: /* The underlying iconv descriptor. */ diff --git a/gdb/testsuite/gdb.tui/unicode-prompt.exp b/gdb/testsuite/gdb.tui/unicode-prompt.exp new file mode 100644 index 00000000000..1351235743d --- /dev/null +++ b/gdb/testsuite/gdb.tui/unicode-prompt.exp @@ -0,0 +1,52 @@ +# Copyright 2023 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +require allow_tui_tests + +tuiterm_env + +save_vars { env(LC_ALL) } { + # Override "C" settings from default_gdb_init. + setenv LC_ALL "C.UTF-8" + + Term::clean_restart 24 80 + + if {![Term::enter_tui]} { + unsupported "TUI not supported" + return + } + + set unicode_char "\u276F" + + set color_on "\\033\[31m" + set color_off "\\033\[0m" + + set prompt "GDB$color_on$unicode_char$color_off " + set prompt_no_color "GDB$unicode_char " + set prompt_no_color_re [string_to_regexp $prompt_no_color] + + # Set new prompt. + send_gdb "set prompt $prompt\n" + # Set old prompt back. + send_gdb "set prompt (gdb) \n" + + gdb_assert { [Term::wait_for "^${prompt_no_color_re}set prompt $gdb_prompt "] } \ + "prompt with unicode char" + + set prompt_with_attrs_re "GDB$unicode_char " + set line [Term::get_line_with_attrs [expr $Term::_cur_row - 1]] + gdb_assert { [regexp "^$prompt_with_attrs_re.*$" $line] } \ + "colored unicode char" +} diff --git a/gdb/tui/tui-io.c b/gdb/tui/tui-io.c index 8cb68d12408..45eb9c5b755 100644 --- a/gdb/tui/tui-io.c +++ b/gdb/tui/tui-io.c @@ -47,6 +47,7 @@ #include #include "pager.h" #include "gdbsupport/gdb-checked-static-cast.h" +#include "charset.h" /* This redefines CTRL if it is not already defined, so it must come after terminal state releated include files like and @@ -520,30 +521,99 @@ tui_puts_internal (WINDOW *w, const char *string, int *height) char c; int prev_col = 0; bool saw_nl = false; + size_t skip = 0; + wchar_iterator it ((gdb_byte *)string, strlen (string), host_charset (), 1); - while ((c = *string++) != 0) + while (true) { - if (c == '\1' || c == '\2') - { - /* Ignore these, they are readline escape-marking - sequences. */ - continue; - } + bool handled = false; + + /* Get iterator in sync with string. */ + it.skip (skip); + skip = 0; + + /* Detect and handle multibyte chars. */ + { + enum wchar_iterate_result res2; + gdb_wchar_t *dummy1; + const gdb_byte *dummy2; + size_t len; + int res = it.iterate (&res2, &dummy1, &dummy2, &len); + if (res < 0) + { + /* End of string. */ + gdb_assert (res2 == wchar_iterate_eof); + break; + } + + if (res == 0) + { + if (res2 == wchar_iterate_invalid) + { + /* Let single-byte char code handle it. */ + gdb_assert (len == 1); + } + else if (res2 == wchar_iterate_incomplete) + { + /* Iterator has been setup to return end-of-string on next + call to iterate. Make that an advance-by-one instead, and + let single-byte char code handle it. */ + it.reset ((gdb_byte *)(string + 1), strlen (string + 1)); + } + else + gdb_assert_not_reached (""); + } + else + { + /* res > 0. */ + gdb_assert (res2 == wchar_iterate_ok); + if (len > 1) + { + /* Multi-byte char. Handle it. */ + waddnstr (w, string, len); + string += len; + handled = true; + } + else + { + /* Single-byte char. Let single-byte char code handle it. */ + gdb_assert (len == 1); + } + } + } - if (c == '\033') + if (!handled) { - size_t bytes_read = apply_ansi_escape (w, string - 1); - if (bytes_read > 0) + c = *string++; + if (c == '\0') + { + /* End of string. */ + break; + } + + if (c == '\1' || c == '\2') { - string = string + bytes_read - 1; + /* Ignore these, they are readline escape-marking + sequences. */ continue; } - } - if (c == '\n') - saw_nl = true; + if (c == '\033') + { + size_t bytes_read = apply_ansi_escape (w, string - 1); + if (bytes_read > 0) + { + skip = bytes_read - 1; + string += skip; + continue; + } + } + + if (c == '\n') + saw_nl = true; - do_tui_putc (w, c); + do_tui_putc (w, c); + } if (height != nullptr) { base-commit: 2b462da34de977f953a778afa0cb55e3286ece3d -- 2.35.3