From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 25703 invoked by alias); 7 Apr 2010 16:57:42 -0000 Received: (qmail 25690 invoked by uid 22791); 7 Apr 2010 16:57:38 -0000 X-SWARE-Spam-Status: No, hits=-1.5 required=5.0 tests=BAYES_00,NO_DNS_FOR_FROM,TW_AV,TW_CP,TW_EG,TW_FX,TW_VX,T_RP_MATCHES_RCVD X-Spam-Check-By: sourceware.org Received: from mga09.intel.com (HELO mga09.intel.com) (134.134.136.24) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Wed, 07 Apr 2010 16:57:29 +0000 Received: from orsmga002.jf.intel.com ([10.7.209.21]) by orsmga102.jf.intel.com with ESMTP; 07 Apr 2010 09:57:08 -0700 X-ExtLoop1: 1 Received: from gnu-6.sc.intel.com ([10.3.194.107]) by orsmga002.jf.intel.com with ESMTP; 07 Apr 2010 09:57:21 -0700 Received: by gnu-6.sc.intel.com (Postfix, from userid 500) id A614A812386; Wed, 7 Apr 2010 09:57:27 -0700 (PDT) Date: Wed, 07 Apr 2010 16:57:00 -0000 From: "H.J. Lu" To: GDB Subject: Re: PATCH: 5/6 [3rd try]: Add AVX support (i387 changes) Message-ID: <20100407165727.GC16891@intel.com> Reply-To: "H.J. Lu" References: <20100304180219.GA10826@intel.com> <20100304180408.GA10869@intel.com> <20100304180748.GC10869@intel.com> <20100304180901.GD10869@intel.com> <20100306222212.GF21133@intel.com> <20100312172445.GA6643@intel.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20100312172445.GA6643@intel.com> User-Agent: Mutt/1.5.20 (2009-08-17) Mailing-List: contact gdb-patches-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: gdb-patches-owner@sourceware.org X-SW-Source: 2010-04/txt/msg00137.txt.bz2 On Fri, Mar 12, 2010 at 09:24:45AM -0800, H.J. Lu wrote: > On Sat, Mar 06, 2010 at 02:22:12PM -0800, H.J. Lu wrote: > > Hi, > > > > Here are i387 changes to support AVX. OK to install? > > > > Thanks. > > > Here is the updated patch. I updated i387_collect_xsave to reduce the size of i387_collect_xsave by removing gcore optimization. OK to install? Thanks. H.J. --- 2010-04-07 H.J. Lu * i387-tdep.c: Include "i386-xstate.h". (XSAVE_XSTATE_BV_ADDR): New. (xsave_avxh_offset): Likewise. (XSAVE_AVXH_ADDR): Likewise. (i387_supply_xsave): Likewise. (i387_collect_xsave): Likewise. * i387-tdep.h (I387_NUM_YMM_REGS): New. (I387_YMM0H_REGNUM): Likewise. (I387_YMMENDH_REGNUM): Likewise. (i387_supply_xsave): Likewise. (i387_collect_xsave): Likewise. diff --git a/gdb/i387-tdep.c b/gdb/i387-tdep.c index 3fb5b56..4c32e09 100644 --- a/gdb/i387-tdep.c +++ b/gdb/i387-tdep.c @@ -34,6 +34,7 @@ #include "i386-tdep.h" #include "i387-tdep.h" +#include "i386-xstate.h" /* Print the floating point number specified by RAW. */ @@ -677,6 +678,475 @@ i387_collect_fxsave (const struct regcache *regcache, int regnum, void *fxsave) FXSAVE_MXCSR_ADDR (regs)); } +/* `xstate_bv' is at byte offset 512. */ +#define XSAVE_XSTATE_BV_ADDR(xsave) (xsave + 512) + +/* At xsave_avxh_offset[REGNUM] you'll find the offset to the location in + the upper 128bit of AVX register data structure used by the "xsave" + instruction where GDB register REGNUM is stored. */ + +static int xsave_avxh_offset[] = +{ + 576 + 0 * 16, /* Upper 128bit of %ymm0 through ... */ + 576 + 1 * 16, + 576 + 2 * 16, + 576 + 3 * 16, + 576 + 4 * 16, + 576 + 5 * 16, + 576 + 6 * 16, + 576 + 7 * 16, + 576 + 8 * 16, + 576 + 9 * 16, + 576 + 10 * 16, + 576 + 11 * 16, + 576 + 12 * 16, + 576 + 13 * 16, + 576 + 14 * 16, + 576 + 15 * 16 /* Upper 128bit of ... %ymm15 (128 bits each). */ +}; + +#define XSAVE_AVXH_ADDR(tdep, xsave, regnum) \ + (xsave + xsave_avxh_offset[regnum - I387_YMM0H_REGNUM (tdep)]) + +/* Similar to i387_supply_fxsave, but use XSAVE extended state. */ + +void +i387_supply_xsave (struct regcache *regcache, int regnum, + const void *xsave) +{ + struct gdbarch_tdep *tdep = gdbarch_tdep (get_regcache_arch (regcache)); + const gdb_byte *regs = xsave; + int i; + unsigned int clear_bv; + const gdb_byte *p; + enum + { + none = 0x0, + x87 = 0x1, + sse = 0x2, + avxh = 0x4, + all = x87 | sse | avxh + } regclass; + + gdb_assert (tdep->st0_regnum >= I386_ST0_REGNUM); + gdb_assert (tdep->num_xmm_regs > 0); + + if (regnum == -1) + regclass = all; + else if (regnum >= I387_YMM0H_REGNUM (tdep) + && regnum < I387_YMMENDH_REGNUM (tdep)) + regclass = avxh; + else if (regnum >= I387_XMM0_REGNUM(tdep) + && regnum < I387_MXCSR_REGNUM (tdep)) + regclass = sse; + else if (regnum >= I387_ST0_REGNUM (tdep) + && regnum < I387_FCTRL_REGNUM (tdep)) + regclass = x87; + else + regclass = none; + + if (regs != NULL && regclass != none) + { + /* Get `xstat_bv'. */ + const gdb_byte *xstate_bv_p = XSAVE_XSTATE_BV_ADDR (regs); + + /* The supported bits in `xstat_bv' are 1 byte. Clear part in + vector registers if its bit in xstat_bv is zero. */ + clear_bv = (~(*xstate_bv_p)) & tdep->xcr0; + } + else + clear_bv = I386_XSTATE_AVX_MASK; + + switch (regclass) + { + case none: + break; + + case avxh: + if ((clear_bv & I386_XSTATE_AVX)) + p = NULL; + else + p = XSAVE_AVXH_ADDR (tdep, regs, regnum); + regcache_raw_supply (regcache, regnum, p); + return; + + case sse: + if ((clear_bv & I386_XSTATE_SSE)) + p = NULL; + else + p = FXSAVE_ADDR (tdep, regs, regnum); + regcache_raw_supply (regcache, regnum, p); + return; + + case x87: + if ((clear_bv & I386_XSTATE_X87)) + p = NULL; + else + p = FXSAVE_ADDR (tdep, regs, regnum); + regcache_raw_supply (regcache, regnum, p); + return; + + case all: + /* Hanle the upper YMM registers. */ + if ((tdep->xcr0 & I386_XSTATE_AVX)) + { + if ((clear_bv & I386_XSTATE_AVX)) + p = NULL; + else + p = regs; + + for (i = I387_YMM0H_REGNUM (tdep); + i < I387_YMMENDH_REGNUM (tdep); i++) + { + if (p != NULL) + p = XSAVE_AVXH_ADDR (tdep, regs, i); + regcache_raw_supply (regcache, i, p); + } + } + + /* Handle the XMM registers. */ + if ((tdep->xcr0 & I386_XSTATE_SSE)) + { + if ((clear_bv & I386_XSTATE_SSE)) + p = NULL; + else + p = regs; + + for (i = I387_XMM0_REGNUM (tdep); + i < I387_MXCSR_REGNUM (tdep); i++) + { + if (p != NULL) + p = FXSAVE_ADDR (tdep, regs, i); + regcache_raw_supply (regcache, i, p); + } + } + + /* Handle the x87 registers. */ + if ((tdep->xcr0 & I386_XSTATE_X87)) + { + if ((clear_bv & I386_XSTATE_X87)) + p = NULL; + else + p = regs; + + for (i = I387_ST0_REGNUM (tdep); + i < I387_FCTRL_REGNUM (tdep); i++) + { + if (p != NULL) + p = FXSAVE_ADDR (tdep, regs, i); + regcache_raw_supply (regcache, i, p); + } + } + break; + } + + /* Only handle x87 control registers. */ + for (i = I387_FCTRL_REGNUM (tdep); i < I387_XMM0_REGNUM (tdep); i++) + if (regnum == -1 || regnum == i) + { + if (regs == NULL) + { + regcache_raw_supply (regcache, i, NULL); + continue; + } + + /* Most of the FPU control registers occupy only 16 bits in + the xsave extended state. Give those a special treatment. */ + if (i != I387_FIOFF_REGNUM (tdep) + && i != I387_FOOFF_REGNUM (tdep)) + { + gdb_byte val[4]; + + memcpy (val, FXSAVE_ADDR (tdep, regs, i), 2); + val[2] = val[3] = 0; + if (i == I387_FOP_REGNUM (tdep)) + val[1] &= ((1 << 3) - 1); + else if (i== I387_FTAG_REGNUM (tdep)) + { + /* The fxsave area contains a simplified version of + the tag word. We have to look at the actual 80-bit + FP data to recreate the traditional i387 tag word. */ + + unsigned long ftag = 0; + int fpreg; + int top; + + top = ((FXSAVE_ADDR (tdep, regs, + I387_FSTAT_REGNUM (tdep)))[1] >> 3); + top &= 0x7; + + for (fpreg = 7; fpreg >= 0; fpreg--) + { + int tag; + + if (val[0] & (1 << fpreg)) + { + int regnum = (fpreg + 8 - top) % 8 + + I387_ST0_REGNUM (tdep); + tag = i387_tag (FXSAVE_ADDR (tdep, regs, regnum)); + } + else + tag = 3; /* Empty */ + + ftag |= tag << (2 * fpreg); + } + val[0] = ftag & 0xff; + val[1] = (ftag >> 8) & 0xff; + } + regcache_raw_supply (regcache, i, val); + } + else + regcache_raw_supply (regcache, i, FXSAVE_ADDR (tdep, regs, i)); + } + + if (regnum == I387_MXCSR_REGNUM (tdep) || regnum == -1) + { + p = regs == NULL ? NULL : FXSAVE_MXCSR_ADDR (regs); + regcache_raw_supply (regcache, I387_MXCSR_REGNUM (tdep), p); + } +} + +/* Similar to i387_collect_fxsave, but use XSAVE extended state. */ + +void +i387_collect_xsave (const struct regcache *regcache, int regnum, + void *xsave, int gcore) +{ + struct gdbarch_tdep *tdep = gdbarch_tdep (get_regcache_arch (regcache)); + gdb_byte *regs = xsave; + int i; + enum + { + none = 0x0, + check = 0x1, + x87 = 0x2 | check, + sse = 0x4 | check, + avxh = 0x8 | check, + all = x87 | sse | avxh + } regclass; + + gdb_assert (tdep->st0_regnum >= I386_ST0_REGNUM); + gdb_assert (tdep->num_xmm_regs > 0); + + if (regnum == -1) + regclass = all; + else if (regnum >= I387_YMM0H_REGNUM (tdep) + && regnum < I387_YMMENDH_REGNUM (tdep)) + regclass = avxh; + else if (regnum >= I387_XMM0_REGNUM(tdep) + && regnum < I387_MXCSR_REGNUM (tdep)) + regclass = sse; + else if (regnum >= I387_ST0_REGNUM (tdep) + && regnum < I387_FCTRL_REGNUM (tdep)) + regclass = x87; + else + regclass = none; + + if (gcore) + { + /* Clear XSAVE extended state. */ + memset (regs, 0, I386_XSTATE_SIZE (tdep->xcr0)); + + /* Update XCR0 and `xstate_bv' with XCR0 for gcore. */ + if (tdep->xsave_xcr0_offset != -1) + memcpy (regs + tdep->xsave_xcr0_offset, &tdep->xcr0, 8); + memcpy (XSAVE_XSTATE_BV_ADDR (regs), &tdep->xcr0, 8); + } + + if ((regclass & check)) + { + gdb_byte raw[I386_MAX_REGISTER_SIZE]; + gdb_byte *xstate_bv_p = XSAVE_XSTATE_BV_ADDR (regs); + unsigned int xstate_bv = 0; + /* The supported bits in `xstat_bv' are 1 byte. */ + unsigned int clear_bv = (~(*xstate_bv_p)) & tdep->xcr0; + gdb_byte *p; + + /* Clear register set if its bit in xstat_bv is zero. */ + if (clear_bv) + { + if ((clear_bv & I386_XSTATE_AVX)) + for (i = I387_YMM0H_REGNUM (tdep); + i < I387_YMMENDH_REGNUM (tdep); i++) + memset (XSAVE_AVXH_ADDR (tdep, regs, i), 0, 16); + + if ((clear_bv & I386_XSTATE_SSE)) + for (i = I387_XMM0_REGNUM (tdep); + i < I387_MXCSR_REGNUM (tdep); i++) + memset (FXSAVE_ADDR (tdep, regs, i), 0, 16); + + if ((clear_bv & I386_XSTATE_X87)) + for (i = I387_ST0_REGNUM (tdep); + i < I387_FCTRL_REGNUM (tdep); i++) + memset (FXSAVE_ADDR (tdep, regs, i), 0, 10); + } + + if (regclass == all) + { + /* Check if any upper YMM registers are changed. */ + if ((tdep->xcr0 & I386_XSTATE_AVX)) + for (i = I387_YMM0H_REGNUM (tdep); + i < I387_YMMENDH_REGNUM (tdep); i++) + { + regcache_raw_collect (regcache, i, raw); + p = XSAVE_AVXH_ADDR (tdep, regs, i); + if (memcmp (raw, p, 16)) + { + xstate_bv |= I386_XSTATE_AVX; + memcpy (p, raw, 16); + } + } + + /* Check if any SSE registers are changed. */ + if ((tdep->xcr0 & I386_XSTATE_SSE)) + for (i = I387_XMM0_REGNUM (tdep); + i < I387_MXCSR_REGNUM (tdep); i++) + { + regcache_raw_collect (regcache, i, raw); + p = FXSAVE_ADDR (tdep, regs, i); + if (memcmp (raw, p, 16)) + { + xstate_bv |= I386_XSTATE_SSE; + memcpy (p, raw, 16); + } + } + + /* Check if any X87 registers are changed. */ + if ((tdep->xcr0 & I386_XSTATE_X87)) + for (i = I387_ST0_REGNUM (tdep); + i < I387_FCTRL_REGNUM (tdep); i++) + { + regcache_raw_collect (regcache, i, raw); + p = FXSAVE_ADDR (tdep, regs, i); + if (memcmp (raw, p, 10)) + { + xstate_bv |= I386_XSTATE_X87; + memcpy (p, raw, 10); + } + } + } + else + { + /* Check if REGNUM is changed. */ + regcache_raw_collect (regcache, regnum, raw); + + switch (regclass) + { + default: + abort (); + + case avxh: + /* This is an upper YMM register. */ + p = XSAVE_AVXH_ADDR (tdep, regs, regnum); + if (memcmp (raw, p, 16)) + { + xstate_bv |= I386_XSTATE_AVX; + memcpy (p, raw, 16); + } + break; + + case sse: + /* This is an SSE register. */ + p = FXSAVE_ADDR (tdep, regs, regnum); + if (memcmp (raw, p, 16)) + { + xstate_bv |= I386_XSTATE_SSE; + memcpy (p, raw, 16); + } + break; + + case x87: + /* This is an x87 register. */ + p = FXSAVE_ADDR (tdep, regs, regnum); + if (memcmp (raw, p, 10)) + { + xstate_bv |= I386_XSTATE_X87; + memcpy (p, raw, 10); + } + break; + } + } + + /* Update the corresponding bits in `xstate_bv' if any SSE/AVX + registers are changed. */ + if (xstate_bv) + { + /* The supported bits in `xstat_bv' are 1 byte. */ + *xstate_bv_p |= (gdb_byte) xstate_bv; + + switch (regclass) + { + default: + abort (); + + case all: + break; + + case x87: + case sse: + case avxh: + /* Register REGNUM has been updated. Return. */ + return; + } + } + else + { + /* Return if REGNUM isn't changed. */ + if (regclass != all) + return; + } + } + + /* Only handle x87 control registers. */ + for (i = I387_FCTRL_REGNUM (tdep); i < I387_XMM0_REGNUM (tdep); i++) + if (regnum == -1 || regnum == i) + { + /* Most of the FPU control registers occupy only 16 bits in + the xsave extended state. Give those a special treatment. */ + if (i != I387_FIOFF_REGNUM (tdep) + && i != I387_FOOFF_REGNUM (tdep)) + { + gdb_byte buf[4]; + + regcache_raw_collect (regcache, i, buf); + + if (i == I387_FOP_REGNUM (tdep)) + { + /* The opcode occupies only 11 bits. Make sure we + don't touch the other bits. */ + buf[1] &= ((1 << 3) - 1); + buf[1] |= ((FXSAVE_ADDR (tdep, regs, i))[1] & ~((1 << 3) - 1)); + } + else if (i == I387_FTAG_REGNUM (tdep)) + { + /* Converting back is much easier. */ + + unsigned short ftag; + int fpreg; + + ftag = (buf[1] << 8) | buf[0]; + buf[0] = 0; + buf[1] = 0; + + for (fpreg = 7; fpreg >= 0; fpreg--) + { + int tag = (ftag >> (fpreg * 2)) & 3; + + if (tag != 3) + buf[0] |= (1 << fpreg); + } + } + memcpy (FXSAVE_ADDR (tdep, regs, i), buf, 2); + } + else + regcache_raw_collect (regcache, i, FXSAVE_ADDR (tdep, regs, i)); + } + + if (regnum == I387_MXCSR_REGNUM (tdep) || regnum == -1) + regcache_raw_collect (regcache, I387_MXCSR_REGNUM (tdep), + FXSAVE_MXCSR_ADDR (regs)); +} + /* Recreate the FTW (tag word) valid bits from the 80-bit FP data in *RAW. */ diff --git a/gdb/i387-tdep.h b/gdb/i387-tdep.h index 645eb91..976fa11 100644 --- a/gdb/i387-tdep.h +++ b/gdb/i387-tdep.h @@ -33,6 +33,8 @@ struct ui_file; #define I387_ST0_REGNUM(tdep) ((tdep)->st0_regnum) #define I387_NUM_XMM_REGS(tdep) ((tdep)->num_xmm_regs) #define I387_MM0_REGNUM(tdep) ((tdep)->mm0_regnum) +#define I387_NUM_YMM_REGS(tdep) ((tdep)->num_ymm_regs) +#define I387_YMM0H_REGNUM(tdep) ((tdep)->ymm0h_regnum) #define I387_FCTRL_REGNUM(tdep) (I387_ST0_REGNUM (tdep) + 8) #define I387_FSTAT_REGNUM(tdep) (I387_FCTRL_REGNUM (tdep) + 1) @@ -45,6 +47,8 @@ struct ui_file; #define I387_XMM0_REGNUM(tdep) (I387_ST0_REGNUM (tdep) + 16) #define I387_MXCSR_REGNUM(tdep) \ (I387_XMM0_REGNUM (tdep) + I387_NUM_XMM_REGS (tdep)) +#define I387_YMMENDH_REGNUM(tdep) \ + (I387_YMM0H_REGNUM (tdep) + I387_NUM_YMM_REGS (tdep)) /* Print out the i387 floating point state. */ @@ -99,6 +103,11 @@ extern void i387_collect_fsave (const struct regcache *regcache, int regnum, extern void i387_supply_fxsave (struct regcache *regcache, int regnum, const void *fxsave); +/* Similar to i387_supply_fxsave, but use XSAVE extended state. */ + +extern void i387_supply_xsave (struct regcache *regcache, int regnum, + const void *xsave); + /* Fill register REGNUM (if it is a floating-point or SSE register) in *FXSAVE with the value from REGCACHE. If REGNUM is -1, do this for all registers. This function doesn't touch any of the reserved @@ -107,6 +116,11 @@ extern void i387_supply_fxsave (struct regcache *regcache, int regnum, extern void i387_collect_fxsave (const struct regcache *regcache, int regnum, void *fxsave); +/* Similar to i387_collect_fxsave, but use XSAVE extended state. */ + +extern void i387_collect_xsave (const struct regcache *regcache, + int regnum, void *xsave, int gcore); + /* Prepare the FPU stack in REGCACHE for a function return. */ extern void i387_return_value (struct gdbarch *gdbarch,