From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <aburgess@sourceware.org>
Received: by sourceware.org (Postfix, from userid 1726)
	id 16186385828B; Sun,  2 Oct 2022 13:15:08 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 16186385828B
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org;
	s=default; t=1664716508;
	bh=i6E3+opGE93VDBoUsbeFXj6zD7znf7m4jPN3ZxYACic=;
	h=From:To:Subject:Date:From;
	b=NEBfUR3FLF4t9PvG5ZTiuwSN/lurSXLplDkJ4YnVsFcFRSvO+kcr4uJv6CKMgCe8s
	 zUhpMFkwBfRjAgLg3BHt4mDEldP+1nqdAps8Xx08AnXTP34vvnL4G3JvpCBaqQe282
	 kGZkhZ54o3xTiH5V/s+6pl5AMsiDaoE0e9oawZ3I=
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
From: Andrew Burgess <aburgess@sourceware.org>
To: gdb-cvs@sourceware.org
Subject: [binutils-gdb] gdb: disassembler opcode display formatting
X-Act-Checkin: binutils-gdb
X-Git-Author: Andrew Burgess <aburgess@redhat.com>
X-Git-Refname: refs/heads/master
X-Git-Oldrev: d309a8f9b34d8fd570dc8c7189eb6790b9afd4e3
X-Git-Newrev: d4ce49b7ac077a9882d6a5e689e260300045ca88
Message-Id: <20221002131508.16186385828B@sourceware.org>
Date: Sun,  2 Oct 2022 13:15:08 +0000 (GMT)
List-Id: <gdb-cvs.sourceware.org>

https://sourceware.org/git/gitweb.cgi?p=3Dbinutils-gdb.git;h=3Dd4ce49b7ac07=
7a9882d6a5e689e260300045ca88

commit d4ce49b7ac077a9882d6a5e689e260300045ca88
Author: Andrew Burgess <aburgess@redhat.com>
Date:   Tue Jun 21 20:23:35 2022 +0100

    gdb: disassembler opcode display formatting
   =20
    This commit changes the format of 'disassemble /r' to match GNU
    objdump.  Specifically, GDB will now display the instruction bytes in
    as 'objdump --wide --disassemble' does.
   =20
    Here is an example for RISC-V before this patch:
   =20
      (gdb) disassemble /r 0x0001018e,0x0001019e
      Dump of assembler code from 0x1018e to 0x1019e:
         0x0001018e <call_me+66>:     03 26 84 fe     lw      a2,-24(s0)
         0x00010192 <call_me+70>:     83 25 c4 fe     lw      a1,-20(s0)
         0x00010196 <call_me+74>:     61 65   lui     a0,0x18
         0x00010198 <call_me+76>:     13 05 85 6a     addi    a0,a0,1704
         0x0001019c <call_me+80>:     f1 22   jal     0x10368 <printf>
      End of assembler dump.
   =20
    And here's an example after this patch:
   =20
      (gdb) disassemble /r 0x0001018e,0x0001019e
      Dump of assembler code from 0x1018e to 0x1019e:
         0x0001018e <call_me+66>:     fe842603                lw      a2,-2=
4(s0)
         0x00010192 <call_me+70>:     fec42583                lw      a1,-2=
0(s0)
         0x00010196 <call_me+74>:     6561                    lui     a0,0x=
18
         0x00010198 <call_me+76>:     6a850513                addi    a0,a0=
,1704
         0x0001019c <call_me+80>:     22f1                    jal     0x103=
68 <printf>
      End of assembler dump.
   =20
    There are two differences here.  First, the instruction bytes after
    the patch are grouped based on the size of the instruction, and are
    byte-swapped to little-endian order.
   =20
    Second, after the patch, GDB now uses the bytes-per-line hint from
    libopcodes to add whitespace padding after the opcode bytes, this
    means that in most cases the instructions are nicely aligned.
   =20
    It is still possible for a very long instruction to intrude into the
    disassembled text space.  The next example is x86-64, before the
    patch:
   =20
      (gdb) disassemble /r main
      Dump of assembler code for function main:
         0x0000000000401106 <+0>:     55      push   %rbp
         0x0000000000401107 <+1>:     48 89 e5        mov    %rsp,%rbp
         0x000000000040110a <+4>:     c7 87 d8 00 00 00 01 00 00 00   movl =
  $0x1,0xd8(%rdi)
         0x0000000000401114 <+14>:    b8 00 00 00 00  mov    $0x0,%eax
         0x0000000000401119 <+19>:    5d      pop    %rbp
         0x000000000040111a <+20>:    c3      ret
      End of assembler dump.
   =20
    And after the patch:
   =20
      (gdb) disassemble /r main
      Dump of assembler code for function main:
         0x0000000000401106 <+0>:     55                      push   %rbp
         0x0000000000401107 <+1>:     48 89 e5                mov    %rsp,%=
rbp
         0x000000000040110a <+4>:     c7 87 d8 00 00 00 01 00 00 00   movl =
  $0x1,0xd8(%rdi)
         0x0000000000401114 <+14>:    b8 00 00 00 00          mov    $0x0,%=
eax
         0x0000000000401119 <+19>:    5d                      pop    %rbp
         0x000000000040111a <+20>:    c3                      ret
      End of assembler dump.
   =20
    Most instructions are aligned, except for the very long instruction.
    Notice too that for x86-64 libopcodes doesn't request that GDB group
    the instruction bytes.  This matches the behaviour of objdump.
   =20
    In case the user really wants the old behaviour, I have added a new
    modifier 'disassemble /b', this displays the instruction byte at a
    time.  For x86-64, which never groups instruction bytes, /b and /r are
    equivalent, but for RISC-V, using /b gets the old layout back (except
    that the whitespace for alignment is still present).  Consider our
    original RISC-V example, this time using /b:
   =20
      (gdb) disassemble /b 0x0001018e,0x0001019e
      Dump of assembler code from 0x1018e to 0x1019e:
         0x0001018e <call_me+66>:     03 26 84 fe             lw      a2,-2=
4(s0)
         0x00010192 <call_me+70>:     83 25 c4 fe             lw      a1,-2=
0(s0)
         0x00010196 <call_me+74>:     61 65                   lui     a0,0x=
18
         0x00010198 <call_me+76>:     13 05 85 6a             addi    a0,a0=
,1704
         0x0001019c <call_me+80>:     f1 22                   jal     0x103=
68 <printf>
      End of assembler dump.
   =20
    Obviously, this patch is a potentially significant change to the
    behaviour or /r.  I could have added /b with the new behaviour and
    left /r alone.  However, personally, I feel the new behaviour is
    significantly better than the old, hence, I made /r be what I consider
    the "better" behaviour.
   =20
    The reason I prefer the new behaviour is that, when I use /r, I almost
    always want to manually decode the instruction for some reason, and
    having the bytes displayed in "instruction order" rather than memory
    order, just makes this easier.
   =20
    The 'record instruction-history' command also takes a /r modifier, and
    has been modified in the same way as disassemble; /r gets the new
    behaviour, and /b has been added to retain the old behaviour.
   =20
    Finally, the MI command -data-disassemble, is unchanged in behaviour,
    this command now requests the raw bytes of the instruction, which is
    equivalent to the /b modifier.  This means that the MI output will
    remain backward compatible.

Diff:
---
 gdb/NEWS                                | 12 +++++++++
 gdb/cli/cli-cmds.c                      |  6 +++++
 gdb/disasm-flags.h                      |  1 +
 gdb/disasm.c                            | 43 ++++++++++++++++++++++++++---
 gdb/doc/gdb.texinfo                     | 48 ++++++++++++++++++++++++++++-=
----
 gdb/mi/mi-cmd-disas.c                   |  6 ++---
 gdb/record.c                            |  3 +++
 gdb/testsuite/gdb.mi/mi-disassemble.exp |  6 ++---
 8 files changed, 109 insertions(+), 16 deletions(-)
diff --git a/gdb/NEWS b/gdb/NEWS
index 1457c99ff04..796a4ef8072 100644
--- a/gdb/NEWS
+++ b/gdb/NEWS
@@ -59,6 +59,18 @@
=20
 * gdb now supports zstd compressed debug sections (ELFCOMPRESS_ZSTD) for E=
LF.
=20
+* The format of 'disassemble /r' and 'record instruction-history /r'
+  has changed.  The instruction bytes could now be grouped together,
+  and displayed in the endianness of the instruction.  This is the
+  same layout as used by GNU objdump when disassembling.
+
+  There is now 'disassemble /b' and 'record instruction-history /b'
+  which will always display the instructions bytes one at a time in
+  memory order, that is, the byte at the lowest address first.
+
+  For both /r and /b GDB is now better at using whitespace in order to
+  align the disassembled instruction text.
+
 * New commands
=20
 maintenance set ignore-prologue-end-flag on|off
diff --git a/gdb/cli/cli-cmds.c b/gdb/cli/cli-cmds.c
index d5707192be0..c78b93f57b5 100644
--- a/gdb/cli/cli-cmds.c
+++ b/gdb/cli/cli-cmds.c
@@ -1508,6 +1508,9 @@ disassemble_current_function (gdb_disassembly_flags f=
lags)
=20
    A /r modifier will include raw instructions in hex with the assembly.
=20
+   A /b modifier is similar to /r except the instruction bytes are printed
+   as separate bytes with no grouping, or endian switching.
+
    A /s modifier will include source code with the assembly, like /m, with
    two important differences:
    1) The output is still in pc address order.
@@ -1546,6 +1549,9 @@ disassemble_command (const char *arg, int from_tty)
 	    case 'r':
 	      flags |=3D DISASSEMBLY_RAW_INSN;
 	      break;
+	    case 'b':
+	      flags |=3D DISASSEMBLY_RAW_BYTES;
+	      break;
 	    case 's':
 	      flags |=3D DISASSEMBLY_SOURCE;
 	      break;
diff --git a/gdb/disasm-flags.h b/gdb/disasm-flags.h
index 025b6893941..5a7371b0a39 100644
--- a/gdb/disasm-flags.h
+++ b/gdb/disasm-flags.h
@@ -33,6 +33,7 @@ enum gdb_disassembly_flag
     DISASSEMBLY_OMIT_PC =3D (0x1 << 4),
     DISASSEMBLY_SOURCE =3D (0x1 << 5),
     DISASSEMBLY_SPECULATIVE =3D (0x1 << 6),
+    DISASSEMBLY_RAW_BYTES =3D (0x1 << 7),
   };
 DEF_ENUM_FLAGS_TYPE (enum gdb_disassembly_flag, gdb_disassembly_flags);
=20
diff --git a/gdb/disasm.c b/gdb/disasm.c
index ba6ac2d4827..b5e503fd71d 100644
--- a/gdb/disasm.c
+++ b/gdb/disasm.c
@@ -457,7 +457,7 @@ gdb_pretty_print_disassembler::pretty_print_insn (const=
 struct disasm_insn *insn
 	throw ex;
       }
=20
-    if (flags & DISASSEMBLY_RAW_INSN)
+    if ((flags & (DISASSEMBLY_RAW_INSN | DISASSEMBLY_RAW_BYTES)) !=3D 0)
       {
 	/* Build the opcodes using a temporary stream so we can
 	   write them out in a single go for the MI.  */
@@ -467,14 +467,51 @@ gdb_pretty_print_disassembler::pretty_print_insn (con=
st struct disasm_insn *insn
 	m_opcode_data.resize (size);
 	read_code (pc, m_opcode_data.data (), size);
=20
-	for (int i =3D 0; i < size; ++i)
+	/* The disassembler provides information about the best way to
+	   display the instruction bytes to the user.  We provide some sane
+	   defaults in case the disassembler gets it wrong.  */
+	const struct disassemble_info *di =3D m_di.disasm_info ();
+	int bytes_per_line =3D std::max (di->bytes_per_line, size);
+	int bytes_per_chunk =3D std::max (di->bytes_per_chunk, 1);
+
+	/* If the user has requested the instruction bytes be displayed
+	   byte at a time, then handle that here.  Also, if the instruction
+	   is not a multiple of the chunk size (which probably indicates a
+	   disassembler problem) then avoid that causing display problems
+	   by switching to byte at a time mode.  */
+	if ((flags & DISASSEMBLY_RAW_BYTES) !=3D 0
+	    || (size % bytes_per_chunk) !=3D 0)
+	  bytes_per_chunk =3D 1;
+
+	/* Print the instruction opcodes bytes, grouped into chunks.  */
+	for (int i =3D 0; i < size; i +=3D bytes_per_chunk)
 	  {
 	    if (i > 0)
 	      m_opcode_stb.puts (" ");
-	    m_opcode_stb.printf ("%02x", (unsigned) m_opcode_data[i]);
+
+	    if (di->display_endian =3D=3D BFD_ENDIAN_LITTLE)
+	      {
+		for (int k =3D bytes_per_chunk; k-- !=3D 0; )
+		  m_opcode_stb.printf ("%02x", (unsigned) m_opcode_data[i + k]);
+	      }
+	    else
+	      {
+		for (int k =3D 0; k < bytes_per_chunk; k++)
+		  m_opcode_stb.printf ("%02x", (unsigned) m_opcode_data[i + k]);
+	      }
+	  }
+
+	/* Calculate required padding.  */
+	int nspaces =3D 0;
+	for (int i =3D size; i < bytes_per_line; i +=3D bytes_per_chunk)
+	  {
+	    if (i > size)
+	      nspaces++;
+	    nspaces +=3D bytes_per_chunk * 2;
 	  }
=20
 	m_uiout->field_stream ("opcodes", m_opcode_stb);
+	m_uiout->spaces (nspaces);
 	m_uiout->text ("\t");
       }
=20
diff --git a/gdb/doc/gdb.texinfo b/gdb/doc/gdb.texinfo
index 238a49b027d..596e5873558 100644
--- a/gdb/doc/gdb.texinfo
+++ b/gdb/doc/gdb.texinfo
@@ -7945,7 +7945,10 @@ are printed in execution order.
=20
 It can also print mixed source+disassembly if you specify the the
 @code{/m} or @code{/s} modifier, and print the raw instructions in hex
-as well as in symbolic form by specifying the @code{/r} modifier.
+as well as in symbolic form by specifying the @code{/r} or @code{/b}
+modifier.  The behaviour of the @code{/m}, @code{/s}, @code{/r}, and
+@code{/b} modifiers are the same as for the @kbd{disassemble} command
+(@pxref{disassemble,,@kbd{disassemble}}).
=20
 The current position marker is printed for the instruction at the
 current program counter value.  This instruction can appear multiple
@@ -9859,6 +9862,7 @@ After @code{info line}, using @code{info line} again =
without
 specifying a location will display information about the next source
 line.
=20
+@anchor{disassemble}
 @table @code
 @kindex disassemble
 @cindex assembly instructions
@@ -9869,16 +9873,17 @@ line.
 @itemx disassemble /m
 @itemx disassemble /s
 @itemx disassemble /r
+@itemx disassemble /b
 This specialized command dumps a range of memory as machine
 instructions.  It can also print mixed source+disassembly by specifying
-the @code{/m} or @code{/s} modifier and print the raw instructions in hex
-as well as in symbolic form by specifying the @code{/r} modifier.
-The default memory range is the function surrounding the
+the @code{/m} or @code{/s} modifier and print the raw instructions in
+hex as well as in symbolic form by specifying the @code{/r} or @code{/b}
+modifier.  The default memory range is the function surrounding the
 program counter of the selected frame.  A single argument to this
 command is a program counter value; @value{GDBN} dumps the function
-surrounding this value.  When two arguments are given, they should
-be separated by a comma, possibly surrounded by whitespace.  The
-arguments specify a range of addresses to dump, in one of two forms:
+surrounding this value.  When two arguments are given, they should be
+separated by a comma, possibly surrounded by whitespace.  The arguments
+specify a range of addresses to dump, in one of two forms:
=20
 @table @code
 @item @var{start},@var{end}
@@ -9916,6 +9921,35 @@ Dump of assembler code from 0x32c4 to 0x32e4:
 End of assembler dump.
 @end smallexample
=20
+The following two examples are for RISC-V, and demonstrates the
+difference between the @code{/r} and @code{/b} modifiers.  First with
+@code{/b}, the bytes of the instruction are printed, in hex, in memory
+order:
+
+@smallexample
+(@value{GDBP}) disassemble /b 0x00010150,0x0001015c
+Dump of assembler code from 0x10150 to 0x1015c:
+   0x00010150 <call_me+4>:      22 dc                 	sw	s0,56(sp)
+   0x00010152 <call_me+6>:      80 00                 	addi	s0,sp,64
+   0x00010154 <call_me+8>:      23 26 a4 fe           	sw	a0,-20(s0)
+   0x00010158 <call_me+12>:     23 24 b4 fe           	sw	a1,-24(s0)
+End of assembler dump.
+@end smallexample
+
+In contrast, with @code{/r} the bytes of the instruction are displayed
+in the instruction order, for RISC-V this means that the bytes have been
+swapped to little-endian order:
+
+@smallexample
+(@value{GDBP}) disassemble /r 0x00010150,0x0001015c
+Dump of assembler code from 0x10150 to 0x1015c:
+   0x00010150 <call_me+4>:      dc22              	sw	s0,56(sp)
+   0x00010152 <call_me+6>:      0080              	addi	s0,sp,64
+   0x00010154 <call_me+8>:      fea42623        	sw	a0,-20(s0)
+   0x00010158 <call_me+12>:     feb42423        	sw	a1,-24(s0)
+End of assembler dump.
+@end smallexample
+
 Here is an example showing mixed source+assembly for Intel x86
 with @code{/m} or @code{/s}, when the program is stopped just after
 function prologue in a non-optimized function with no inline code.
diff --git a/gdb/mi/mi-cmd-disas.c b/gdb/mi/mi-cmd-disas.c
index 387c4900150..c8e06cd940a 100644
--- a/gdb/mi/mi-cmd-disas.c
+++ b/gdb/mi/mi-cmd-disas.c
@@ -165,16 +165,16 @@ mi_cmd_disassemble (const char *command, char **argv,=
 int argc)
       disasm_flags |=3D DISASSEMBLY_SOURCE_DEPRECATED;
       break;
     case 2:
-      disasm_flags |=3D DISASSEMBLY_RAW_INSN;
+      disasm_flags |=3D DISASSEMBLY_RAW_BYTES;
       break;
     case 3:
-      disasm_flags |=3D DISASSEMBLY_SOURCE_DEPRECATED | DISASSEMBLY_RAW_IN=
SN;
+      disasm_flags |=3D DISASSEMBLY_SOURCE_DEPRECATED | DISASSEMBLY_RAW_BY=
TES;
       break;
     case 4:
       disasm_flags |=3D DISASSEMBLY_SOURCE;
       break;
     case 5:
-      disasm_flags |=3D DISASSEMBLY_SOURCE | DISASSEMBLY_RAW_INSN;
+      disasm_flags |=3D DISASSEMBLY_SOURCE | DISASSEMBLY_RAW_BYTES;
       break;
     default:
       gdb_assert_not_reached ("bad disassembly mode");
diff --git a/gdb/record.c b/gdb/record.c
index 17a5df262bd..2390a58f9c0 100644
--- a/gdb/record.c
+++ b/gdb/record.c
@@ -494,6 +494,9 @@ get_insn_history_modifiers (const char **arg)
 	    case 'r':
 	      modifiers |=3D DISASSEMBLY_RAW_INSN;
 	      break;
+	    case 'b':
+	      modifiers |=3D DISASSEMBLY_RAW_BYTES;
+	      break;
 	    case 'f':
 	      modifiers |=3D DISASSEMBLY_OMIT_FNAME;
 	      break;
diff --git a/gdb/testsuite/gdb.mi/mi-disassemble.exp b/gdb/testsuite/gdb.mi=
/mi-disassemble.exp
index b7c52472c84..ef3337d4062 100644
--- a/gdb/testsuite/gdb.mi/mi-disassemble.exp
+++ b/gdb/testsuite/gdb.mi/mi-disassemble.exp
@@ -245,12 +245,12 @@ proc test_disassembly_opcode_format {} {
     # then disassemble using the MI command.
     set longest_insn_bytes ""
     set longest_insn_addr ""
-    gdb_test_multiple "disassemble /r main" "" {
-	-re "^disassemble /r main\r\n" {
+    gdb_test_multiple "disassemble /b main" "" {
+	-re "^disassemble /b main\r\n" {
 	    exp_continue
 	}
=20
-	-re "^&\"disassemble /r main.n\"\r\n" {
+	-re "^&\"disassemble /b main.n\"\r\n" {
 	    exp_continue
 	}