public inbox for gdb-patches@sourceware.org
 help / color / mirror / Atom feed
From: John Baldwin <jhb@FreeBSD.org>
To: gdb-patches@sourceware.org
Subject: [PATCH v5 19/19] gdbserver: Simplify handling of ZMM registers.
Date: Thu, 27 Apr 2023 14:01:13 -0700	[thread overview]
Message-ID: <20230427210113.45380-20-jhb@FreeBSD.org> (raw)
In-Reply-To: <20230427210113.45380-1-jhb@FreeBSD.org>

- Reuse num_xmm_registers directly for the count of ZMM0-15 registers
  as is already done for the YMM registers for AVX rather than using
  a new variable that is always the same.

- Replace 3 identical variables for the count of upper ZMM16-31
  registers with a single variable.  Make use of this to merge
  various loops working on the ZMM XSAVE region so that all of the
  handling for the various sub-registers in this region are always
  handled in a single loop.

- While here, fix some bugs in i387_cache_to_xsave on where if
  X86_XSTATE_ZMM was set on i386 (e.g. a 32-bit process on a 64-bit
  kernel), the -1 register nums would wrap around and store the
  value of GPRs in the XSAVE area.  This should be harmless, but
  is definitely odd.  Instead, check num_zmm_high_registers directly
  when checking X86_XSTATE_ZMM and skip the ZMM region handling
  entirely if the register count is 0.
---
 gdbserver/i387-fp.cc | 132 +++++++++++++++----------------------------
 1 file changed, 46 insertions(+), 86 deletions(-)

diff --git a/gdbserver/i387-fp.cc b/gdbserver/i387-fp.cc
index f53a6cfc477..91d3a0b8ca3 100644
--- a/gdbserver/i387-fp.cc
+++ b/gdbserver/i387-fp.cc
@@ -267,12 +267,8 @@ i387_cache_to_xsave (struct regcache *regcache, void *buf)
 
   /* Amd64 has 16 xmm regs; I386 has 8 xmm regs.  */
   int num_xmm_registers = amd64 ? 16 : 8;
-  /* AVX512 extends the existing xmm/ymm registers to a wider mode: zmm.  */
-  int num_avx512_zmmh_low_registers = num_xmm_registers;
-  /* AVX512 adds 16 extra regs in Amd64 mode, but none in I386 mode.*/
-  int num_avx512_zmmh_high_registers = amd64 ? 16 : 0;
-  int num_avx512_ymmh_registers = amd64 ? 16 : 0;
-  int num_avx512_xmm_registers = amd64 ? 16 : 0;
+  /* AVX512 adds 16 extra ZMM regs in Amd64 mode, but none in I386 mode.*/
+  int num_zmm_high_registers = amd64 ? 16 : 0;
 
   /* The supported bits in `xstat_bv' are 8 bytes.  Clear part in
      vector registers if its bit in xstat_bv is zero.  */
@@ -321,18 +317,12 @@ i387_cache_to_xsave (struct regcache *regcache, void *buf)
 	  memset (fp->k_space () + i * 8, 0, 8);
 
       if ((clear_bv & X86_XSTATE_ZMM_H))
-	for (i = 0; i < num_avx512_zmmh_low_registers; i++)
+	for (i = 0; i < num_xmm_registers; i++)
 	  memset (fp->zmmh_space () + i * 32, 0, 32);
 
       if ((clear_bv & X86_XSTATE_ZMM))
-	{
-	  for (i = 0; i < num_avx512_zmmh_high_registers; i++)
-	    memset (fp->zmm_space () + 32 + i * 64, 0, 32);
-	  for (i = 0; i < num_avx512_xmm_registers; i++)
-	    memset (fp->zmm_space () + i * 64, 0, 16);
-	  for (i = 0; i < num_avx512_ymmh_registers; i++)
-	    memset (fp->zmm_space () + 16 + i * 64, 0, 16);
-	}
+	for (i = 0; i < num_zmm_high_registers; i++)
+	  memset (fp->zmm_space () + i * 64, 0, 64);
 
       if ((clear_bv & X86_XSTATE_PKRU))
 	for (i = 0; i < num_pkeys_registers; i++)
@@ -446,7 +436,7 @@ i387_cache_to_xsave (struct regcache *regcache, void *buf)
     {
       int zmm0h_regnum = find_regno (regcache->tdesc, "zmm0h");
 
-      for (i = 0; i < num_avx512_zmmh_low_registers; i++)
+      for (i = 0; i < num_xmm_registers; i++)
 	{
 	  collect_register (regcache, i + zmm0h_regnum, raw);
 	  p = fp->zmmh_space () + i * 32;
@@ -458,55 +448,35 @@ i387_cache_to_xsave (struct regcache *regcache, void *buf)
 	}
     }
 
-  /* Check if any of ZMM16H-ZMM31H registers are changed.  */
-  if ((x86_xcr0 & X86_XSTATE_ZMM))
+  /* Check if any of ZMM16-ZMM31 registers are changed.  */
+  if ((x86_xcr0 & X86_XSTATE_ZMM) && num_zmm_high_registers != 0)
     {
-      int zmm16h_regnum = (num_avx512_zmmh_high_registers == 0
-			   ? -1
-			   : find_regno (regcache->tdesc, "zmm16h"));
+      int zmm16h_regnum = find_regno (regcache->tdesc, "zmm16h");
+      int ymm16h_regnum = find_regno (regcache->tdesc, "ymm16h");
+      int xmm16_regnum = find_regno (regcache->tdesc, "xmm16");
 
-      for (i = 0; i < num_avx512_zmmh_high_registers; i++)
+      for (i = 0; i < num_zmm_high_registers; i++)
 	{
-	  collect_register (regcache, i + zmm16h_regnum, raw);
-	  p = fp->zmm_space () + 32 + i * 64;
-	  if (memcmp (raw, p, 32) != 0)
-	    {
-	      xstate_bv |= X86_XSTATE_ZMM;
-	      memcpy (p, raw, 32);
-	    }
-	}
-    }
-
-  /* Check if any XMM_AVX512 registers are changed.  */
-  if ((x86_xcr0 & X86_XSTATE_ZMM))
-    {
-      int xmm_avx512_regnum = (num_avx512_xmm_registers == 0
-			       ? -1
-			       : find_regno (regcache->tdesc, "xmm16"));
-
-      for (i = 0; i < num_avx512_xmm_registers; i++)
-	{
-	  collect_register (regcache, i + xmm_avx512_regnum, raw);
 	  p = fp->zmm_space () + i * 64;
-	  if (memcmp (raw, p, 16) != 0)
+
+	  /* ZMMH sub-register.  */
+	  collect_register (regcache, i + zmm16h_regnum, raw);
+	  if (memcmp (raw, p + 32, 32) != 0)
+	    {
+	      xstate_bv |= X86_XSTATE_ZMM;
+	      memcpy (p, raw, 32);
+	    }
+
+	  /* YMMH sub-register.  */
+	  collect_register (regcache, i + ymm16h_regnum, raw);
+	  if (memcmp (raw, p + 16, 16) != 0)
 	    {
 	      xstate_bv |= X86_XSTATE_ZMM;
 	      memcpy (p, raw, 16);
 	    }
-	}
-    }
 
-  /* Check if any YMMH_AVX512 registers are changed.  */
-  if ((x86_xcr0 & X86_XSTATE_ZMM))
-    {
-      int ymmh_avx512_regnum = (num_avx512_ymmh_registers == 0
-				? -1
-				: find_regno (regcache->tdesc, "ymm16h"));
-
-      for (i = 0; i < num_avx512_ymmh_registers; i++)
-	{
-	  collect_register (regcache, i + ymmh_avx512_regnum, raw);
-	  p = fp->zmm_space () + 16 + i * 64;
+	  /* XMM sub-register.  */
+	  collect_register (regcache, i + xmm16_regnum, raw);
 	  if (memcmp (raw, p, 16) != 0)
 	    {
 	      xstate_bv |= X86_XSTATE_ZMM;
@@ -732,12 +702,8 @@ i387_xsave_to_cache (struct regcache *regcache, const void *buf)
 
    /* Amd64 has 16 xmm regs; I386 has 8 xmm regs.  */
   int num_xmm_registers = amd64 ? 16 : 8;
-  /* AVX512 extends the existing xmm/ymm registers to a wider mode: zmm.  */
-  int num_avx512_zmmh_low_registers = num_xmm_registers;
-  /* AVX512 adds 16 extra regs in Amd64 mode, but none in I386 mode.*/
-  int num_avx512_zmmh_high_registers = amd64 ? 16 : 0;
-  int num_avx512_ymmh_registers = amd64 ? 16 : 0;
-  int num_avx512_xmm_registers = amd64 ? 16 : 0;
+  /* AVX512 adds 16 extra ZMM regs in Amd64 mode, but none in I386 mode.*/
+  int num_zmm_high_registers = amd64 ? 16 : 0;
 
   /* The supported bits in `xstat_bv' are 8 bytes.  Clear part in
      vector registers if its bit in xstat_bv is zero.  */
@@ -854,47 +820,41 @@ i387_xsave_to_cache (struct regcache *regcache, const void *buf)
 
       if ((clear_bv & X86_XSTATE_ZMM_H) != 0)
 	{
-	  for (i = 0; i < num_avx512_zmmh_low_registers; i++)
+	  for (i = 0; i < num_xmm_registers; i++)
 	    supply_register_zeroed (regcache, i + zmm0h_regnum);
 	}
       else
 	{
 	  p = fp->zmmh_space ();
-	  for (i = 0; i < num_avx512_zmmh_low_registers; i++)
+	  for (i = 0; i < num_xmm_registers; i++)
 	    supply_register (regcache, i + zmm0h_regnum, p + i * 32);
 	}
     }
 
-  if ((x86_xcr0 & X86_XSTATE_ZMM) != 0)
+  if ((x86_xcr0 & X86_XSTATE_ZMM) != 0 && num_zmm_high_registers != 0)
     {
-      int zmm16h_regnum = (num_avx512_zmmh_high_registers == 0
-			   ? -1
-			   : find_regno (regcache->tdesc, "zmm16h"));
-      int ymm16h_regnum = (num_avx512_ymmh_registers == 0
-			   ? -1
-			   : find_regno (regcache->tdesc, "ymm16h"));
-      int xmm16_regnum = (num_avx512_xmm_registers == 0
-			  ? -1
-			  : find_regno (regcache->tdesc, "xmm16"));
+      int zmm16h_regnum = find_regno (regcache->tdesc, "zmm16h");
+      int ymm16h_regnum = find_regno (regcache->tdesc, "ymm16h");
+      int xmm16_regnum = find_regno (regcache->tdesc, "xmm16");
 
       if ((clear_bv & X86_XSTATE_ZMM) != 0)
 	{
-	  for (i = 0; i < num_avx512_zmmh_high_registers; i++)
-	    supply_register_zeroed (regcache, i + zmm16h_regnum);
-	  for (i = 0; i < num_avx512_ymmh_registers; i++)
-	    supply_register_zeroed (regcache, i + ymm16h_regnum);
-	  for (i = 0; i < num_avx512_xmm_registers; i++)
-	    supply_register_zeroed (regcache, i + xmm16_regnum);
+	  for (i = 0; i < num_zmm_high_registers; i++)
+	    {
+	      supply_register_zeroed (regcache, i + zmm16h_regnum);
+	      supply_register_zeroed (regcache, i + ymm16h_regnum);
+	      supply_register_zeroed (regcache, i + xmm16_regnum);
+	    }
 	}
       else
 	{
 	  p = fp->zmm_space ();
-	  for (i = 0; i < num_avx512_zmmh_high_registers; i++)
-	    supply_register (regcache, i + zmm16h_regnum, p + 32 + i * 64);
-	  for (i = 0; i < num_avx512_ymmh_registers; i++)
-	    supply_register (regcache, i + ymm16h_regnum, p + 16 + i * 64);
-	  for (i = 0; i < num_avx512_xmm_registers; i++)
-	    supply_register (regcache, i + xmm16_regnum, p + i * 64);
+	  for (i = 0; i < num_zmm_high_registers; i++)
+	    {
+	      supply_register (regcache, i + zmm16h_regnum, p + 32 + i * 64);
+	      supply_register (regcache, i + ymm16h_regnum, p + 16 + i * 64);
+	      supply_register (regcache, i + xmm16_regnum, p + i * 64);
+	    }
 	}
     }
 
-- 
2.40.0


      parent reply	other threads:[~2023-04-27 21:01 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-27 21:00 [PATCH v5 00/19] Handle variable XSAVE layouts John Baldwin
2023-04-27 21:00 ` [PATCH v5 01/19] x86: Add an x86_xsave_layout structure to handle " John Baldwin
2023-04-27 21:00 ` [PATCH v5 02/19] gdb: Store an x86_xsave_layout in i386_gdbarch_tdep John Baldwin
2023-05-03 16:22   ` Simon Marchi
2023-05-08 16:51     ` John Baldwin
2023-04-27 21:00 ` [PATCH v5 03/19] core: Support fetching x86 XSAVE layout from architectures John Baldwin
2023-04-27 21:00 ` [PATCH v5 04/19] nat/x86-cpuid.h: Add x86_cpuid_count wrapper around __get_cpuid_count John Baldwin
2023-04-27 21:00 ` [PATCH v5 05/19] x86 nat: Add helper functions to save the XSAVE layout for the host John Baldwin
2023-04-27 21:01 ` [PATCH v5 06/19] x86-fbsd-nat: Add missing public label John Baldwin
2023-04-27 21:01 ` [PATCH v5 07/19] *-fbsd-nat: Handle null inferior in read_description John Baldwin
2023-04-27 21:01 ` [PATCH v5 08/19] *-linux-nat: " John Baldwin
2023-05-03 16:38   ` Simon Marchi
2023-05-08 17:24     ` John Baldwin
2023-04-27 21:01 ` [PATCH v5 09/19] gdb: Update x86 FreeBSD architectures to support XSAVE layouts John Baldwin
2023-05-03 17:14   ` Simon Marchi
2023-05-03 17:20     ` Simon Marchi
2023-05-03 23:45     ` John Baldwin
2023-05-04 17:20       ` Simon Marchi
2023-05-08 17:33         ` John Baldwin
2023-04-27 21:01 ` [PATCH v5 10/19] gdb: Support XSAVE layouts for the current host in the FreeBSD x86 targets John Baldwin
2023-04-27 21:01 ` [PATCH v5 11/19] gdb: Update x86 Linux architectures to support XSAVE layouts John Baldwin
2023-04-27 21:01 ` [PATCH v5 12/19] gdb: Support XSAVE layouts for the current host in the Linux x86 targets John Baldwin
2023-04-27 21:01 ` [PATCH v5 13/19] gdb: Use x86_xstate_layout to parse the XSAVE extended state area John Baldwin
2023-04-27 21:01 ` [PATCH v5 14/19] gdbserver: Add a function to set the XSAVE mask and size John Baldwin
2023-04-27 21:01 ` [PATCH v5 15/19] gdbserver: Refactor the legacy region within the xsave struct John Baldwin
2023-04-27 21:01 ` [PATCH v5 16/19] gdbserver: Clear upper ZMM registers in the right location John Baldwin
2023-05-03 17:49   ` Simon Marchi
2023-05-03 23:47     ` John Baldwin
2023-04-27 21:01 ` [PATCH v5 17/19] gdbserver: Use x86_xstate_layout to parse the XSAVE extended state area John Baldwin
2023-04-27 21:01 ` [PATCH v5 18/19] x86: Remove X86_XSTATE_SIZE and related constants John Baldwin
2023-04-27 21:01 ` John Baldwin [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230427210113.45380-20-jhb@FreeBSD.org \
    --to=jhb@freebsd.org \
    --cc=gdb-patches@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).