From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2126) id 363CB386DC47; Tue, 14 Jun 2022 15:13:22 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 363CB386DC47 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable From: Tom Tromey To: gdb-cvs@sourceware.org Subject: [binutils-gdb] Implement lazy FPU initialization for ravenscar X-Act-Checkin: binutils-gdb X-Git-Author: Tom Tromey X-Git-Refname: refs/heads/master X-Git-Oldrev: e73434e38f55e21cc33457ce3b218fa7b4592fec X-Git-Newrev: 965b71a7f739a747c6b427a96b1fa9dd26e38956 Message-Id: <20220614151322.363CB386DC47@sourceware.org> Date: Tue, 14 Jun 2022 15:13:22 +0000 (GMT) X-BeenThere: gdb-cvs@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gdb-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 14 Jun 2022 15:13:22 -0000 https://sourceware.org/git/gitweb.cgi?p=3Dbinutils-gdb.git;h=3D965b71a7f739= a747c6b427a96b1fa9dd26e38956 commit 965b71a7f739a747c6b427a96b1fa9dd26e38956 Author: Tom Tromey Date: Wed May 4 13:08:11 2022 -0600 Implement lazy FPU initialization for ravenscar =20 Some ravenscar runtimes implement lazy FPU handling. On these runtimes, the FPU is only initialized when a task tries to use it. Furthermore, the FP registers aren't automatically saved on a task switch -- instead, the save is deferred until the new task tries to use the FPU. Furthermore, each task's context area has a flag indicating whether the FPU has been initialized for this task. =20 This patch teaches GDB to understand this implementation. When fetching or storing registers, GDB now checks to see whether the live FP registers should be used. If not, the task's saved FP registers will be used if the task has caused FPU initialization. =20 Currently only AArch64 uses this code. bb-runtimes implements this for ARM as well, but GDB doesn't yet have an arm-ravenscar-thread.c. Diff: --- gdb/aarch64-ravenscar-thread.c | 9 +- gdb/ravenscar-thread.c | 194 ++++++++++++++++++++++++++++++-------= ---- gdb/ravenscar-thread.h | 70 ++++++++++++++- 3 files changed, 217 insertions(+), 56 deletions(-) diff --git a/gdb/aarch64-ravenscar-thread.c b/gdb/aarch64-ravenscar-thread.c index 045d022fc23..eb7bda88d5d 100644 --- a/gdb/aarch64-ravenscar-thread.c +++ b/gdb/aarch64-ravenscar-thread.c @@ -61,10 +61,17 @@ static const int aarch64_context_offsets[] =3D 112, 116, }; =20 +#define V_INIT_OFFSET 640 + /* The ravenscar_arch_ops vector for most Aarch64 targets. */ =20 static struct ravenscar_arch_ops aarch64_ravenscar_ops - (aarch64_context_offsets); + (aarch64_context_offsets, + -1, -1, + V_INIT_OFFSET, + /* The FPU context buffer starts with the FPSR register. */ + aarch64_context_offsets[AARCH64_FPSR_REGNUM], + AARCH64_V0_REGNUM, AARCH64_FPCR_REGNUM); =20 /* Register aarch64_ravenscar_ops in GDBARCH. */ =20 diff --git a/gdb/ravenscar-thread.c b/gdb/ravenscar-thread.c index e300095b53f..1718c367ff6 100644 --- a/gdb/ravenscar-thread.c +++ b/gdb/ravenscar-thread.c @@ -164,6 +164,32 @@ private: switch_to_thread (find_thread_ptid (proc_target, underlying)); } =20 + /* Some targets use lazy FPU initialization. On these, the FP + registers for a given task might be uninitialized, or stored in + the per-task context, or simply be the live registers on the CPU. + This enum is used to encode this information. */ + enum fpu_state + { + /* This target doesn't do anything special for FP registers -- if + any exist, they are treated just identical to non-FP + registers. */ + NOTHING_SPECIAL, + /* This target uses the lazy FP scheme, and the FP registers are + taken from the CPU. This can happen for any task, because if a + task switch occurs, the registers aren't immediately written to + the per-task context -- this is deferred until the current task + causes an FPU trap. */ + LIVE_FP_REGISTERS, + /* This target uses the lazy FP scheme, and the FP registers are + not available. Maybe this task never initialized the FPU, or + maybe GDB couldn't find the required symbol. */ + NO_FP_REGISTERS + }; + + /* Return the FPU state. */ + fpu_state get_fpu_state (struct regcache *regcache, + const ravenscar_arch_ops *arch_ops); + /* This maps a TID to the CPU on which it was running. This is needed because sometimes the runtime will report an active task that hasn't yet been put on the list of tasks that is read by @@ -508,9 +534,11 @@ ravenscar_arch_ops::supply_one_register (struct regcac= he *regcache, } =20 void -ravenscar_arch_ops::fetch_registers (struct regcache *regcache, - int regnum) const +ravenscar_arch_ops::fetch_register (struct regcache *regcache, + int regnum) const { + gdb_assert (regnum !=3D -1); + struct gdbarch *gdbarch =3D regcache->arch (); /* The tid is the thread_id field, which is a pointer to the thread. */ CORE_ADDR thread_descriptor_address @@ -518,26 +546,17 @@ ravenscar_arch_ops::fetch_registers (struct regcache = *regcache, =20 int sp_regno =3D -1; CORE_ADDR stack_address =3D 0; - if (regnum =3D=3D -1 - || (regnum >=3D first_stack_register && regnum <=3D last_stack_regis= ter)) + if (regnum >=3D first_stack_register && regnum <=3D last_stack_register) { /* We must supply SP for get_stack_base, so recurse. */ sp_regno =3D gdbarch_sp_regnum (gdbarch); gdb_assert (!(sp_regno >=3D first_stack_register && sp_regno <=3D last_stack_register)); - fetch_registers (regcache, sp_regno); + fetch_register (regcache, sp_regno); stack_address =3D get_stack_base (regcache); } =20 - if (regnum =3D=3D -1) - { - /* Fetch all registers. */ - for (int reg =3D 0; reg < offsets.size (); ++reg) - if (reg !=3D sp_regno && offsets[reg] !=3D -1) - supply_one_register (regcache, reg, thread_descriptor_address, - stack_address); - } - else if (regnum < offsets.size () && offsets[regnum] !=3D -1) + if (regnum < offsets.size () && offsets[regnum] !=3D -1) supply_one_register (regcache, regnum, thread_descriptor_address, stack_address); } @@ -562,27 +581,20 @@ ravenscar_arch_ops::store_one_register (struct regcac= he *regcache, int regnum, } =20 void -ravenscar_arch_ops::store_registers (struct regcache *regcache, - int regnum) const +ravenscar_arch_ops::store_register (struct regcache *regcache, + int regnum) const { + gdb_assert (regnum !=3D -1); + /* The tid is the thread_id field, which is a pointer to the thread. */ CORE_ADDR thread_descriptor_address =3D (CORE_ADDR) regcache->ptid ().tid (); =20 CORE_ADDR stack_address =3D 0; - if (regnum =3D=3D -1 - || (regnum >=3D first_stack_register && regnum <=3D last_stack_regis= ter)) + if (regnum >=3D first_stack_register && regnum <=3D last_stack_register) stack_address =3D get_stack_base (regcache); =20 - if (regnum =3D=3D -1) - { - /* Store all registers. */ - for (int reg =3D 0; reg < offsets.size (); ++reg) - if (offsets[reg] !=3D -1) - store_one_register (regcache, reg, thread_descriptor_address, - stack_address); - } - else if (regnum < offsets.size () && offsets[regnum] !=3D -1) + if (regnum < offsets.size () && offsets[regnum] !=3D -1) store_one_register (regcache, regnum, thread_descriptor_address, stack_address); } @@ -615,6 +627,48 @@ private: ptid_t m_save_ptid; }; =20 +ravenscar_thread_target::fpu_state +ravenscar_thread_target::get_fpu_state (struct regcache *regcache, + const ravenscar_arch_ops *arch_ops) +{ + /* We want to return true if the special FP register handling is + needed. If this target doesn't have lazy FP, then no special + treatment is ever needed. */ + if (!arch_ops->on_demand_fp ()) + return NOTHING_SPECIAL; + + bound_minimal_symbol fpu_context + =3D lookup_minimal_symbol ("system__bb__cpu_primitives__current_fpu_co= ntext", + nullptr, nullptr); + /* If the symbol can't be found, just fall back. */ + if (fpu_context.minsym =3D=3D nullptr) + return NO_FP_REGISTERS; + + struct type *ptr_type =3D builtin_type (target_gdbarch ())->builtin_data= _ptr; + ptr_type =3D lookup_pointer_type (ptr_type); + value *val =3D value_from_pointer (ptr_type, fpu_context.value_address (= )); + + int cpu =3D get_thread_base_cpu (regcache->ptid ()); + /* The array index type has a lower bound of 1 -- it is Ada code -- + so subtract 1 here. */ + val =3D value_ptradd (val, cpu - 1); + + val =3D value_ind (val); + CORE_ADDR fpu_task =3D value_as_long (val); + + /* The tid is the thread_id field, which is a pointer to the thread. */ + CORE_ADDR thread_descriptor_address + =3D (CORE_ADDR) regcache->ptid ().tid (); + if (fpu_task =3D=3D (thread_descriptor_address + + arch_ops->get_fpu_context_offset ())) + return LIVE_FP_REGISTERS; + + int v_init_offset =3D arch_ops->get_v_init_offset (); + gdb_byte init =3D 0; + read_memory (thread_descriptor_address + v_init_offset, &init, 1); + return init ? NOTHING_SPECIAL : NO_FP_REGISTERS; +} + void ravenscar_thread_target::fetch_registers (struct regcache *regcache, int regnum) @@ -623,19 +677,38 @@ ravenscar_thread_target::fetch_registers (struct regc= ache *regcache, =20 if (runtime_initialized () && is_ravenscar_task (ptid)) { - if (task_is_currently_active (ptid)) - { - ptid_t base =3D get_base_thread_from_ravenscar_task (ptid); - temporarily_change_regcache_ptid changer (regcache, base); - beneath ()->fetch_registers (regcache, regnum); - } - else - { - struct gdbarch *gdbarch =3D regcache->arch (); - struct ravenscar_arch_ops *arch_ops - =3D gdbarch_ravenscar_ops (gdbarch); + struct gdbarch *gdbarch =3D regcache->arch (); + bool is_active =3D task_is_currently_active (ptid); + struct ravenscar_arch_ops *arch_ops =3D gdbarch_ravenscar_ops (gdbar= ch); + gdb::optional fp_state; + + int low_reg =3D regnum =3D=3D -1 ? 0 : regnum; + int high_reg =3D regnum =3D=3D -1 ? gdbarch_num_regs (gdbarch) : reg= num + 1; =20 - arch_ops->fetch_registers (regcache, regnum); + ptid_t base =3D get_base_thread_from_ravenscar_task (ptid); + for (int i =3D low_reg; i < high_reg; ++i) + { + bool use_beneath =3D false; + if (arch_ops->is_fp_register (i)) + { + if (!fp_state.has_value ()) + fp_state =3D get_fpu_state (regcache, arch_ops); + if (*fp_state =3D=3D NO_FP_REGISTERS) + continue; + if (*fp_state =3D=3D LIVE_FP_REGISTERS + || (is_active && *fp_state =3D=3D NOTHING_SPECIAL)) + use_beneath =3D true; + } + else + use_beneath =3D is_active; + + if (use_beneath) + { + temporarily_change_regcache_ptid changer (regcache, base); + beneath ()->fetch_registers (regcache, i); + } + else + arch_ops->fetch_register (regcache, i); } } else @@ -650,19 +723,38 @@ ravenscar_thread_target::store_registers (struct regc= ache *regcache, =20 if (runtime_initialized () && is_ravenscar_task (ptid)) { - if (task_is_currently_active (ptid)) - { - ptid_t base =3D get_base_thread_from_ravenscar_task (ptid); - temporarily_change_regcache_ptid changer (regcache, base); - beneath ()->store_registers (regcache, regnum); - } - else - { - struct gdbarch *gdbarch =3D regcache->arch (); - struct ravenscar_arch_ops *arch_ops - =3D gdbarch_ravenscar_ops (gdbarch); + struct gdbarch *gdbarch =3D regcache->arch (); + bool is_active =3D task_is_currently_active (ptid); + struct ravenscar_arch_ops *arch_ops =3D gdbarch_ravenscar_ops (gdbar= ch); + gdb::optional fp_state; =20 - arch_ops->store_registers (regcache, regnum); + int low_reg =3D regnum =3D=3D -1 ? 0 : regnum; + int high_reg =3D regnum =3D=3D -1 ? gdbarch_num_regs (gdbarch) : reg= num + 1; + + ptid_t base =3D get_base_thread_from_ravenscar_task (ptid); + for (int i =3D low_reg; i < high_reg; ++i) + { + bool use_beneath =3D false; + if (arch_ops->is_fp_register (i)) + { + if (!fp_state.has_value ()) + fp_state =3D get_fpu_state (regcache, arch_ops); + if (*fp_state =3D=3D NO_FP_REGISTERS) + continue; + if (*fp_state =3D=3D LIVE_FP_REGISTERS + || (is_active && *fp_state =3D=3D NOTHING_SPECIAL)) + use_beneath =3D true; + } + else + use_beneath =3D is_active; + + if (use_beneath) + { + temporarily_change_regcache_ptid changer (regcache, base); + beneath ()->store_registers (regcache, i); + } + else + arch_ops->store_register (regcache, i); } } else diff --git a/gdb/ravenscar-thread.h b/gdb/ravenscar-thread.h index 5d5661f48df..eda7ab1026d 100644 --- a/gdb/ravenscar-thread.h +++ b/gdb/ravenscar-thread.h @@ -26,19 +26,63 @@ struct ravenscar_arch_ops { ravenscar_arch_ops (gdb::array_view offsets_, int first_stack =3D -1, - int last_stack =3D -1) + int last_stack =3D -1, + int v_init =3D -1, + int fpu_offset =3D -1, + int first_fp =3D -1, + int last_fp =3D -1) : offsets (offsets_), first_stack_register (first_stack), - last_stack_register (last_stack) + last_stack_register (last_stack), + v_init_offset (v_init), + fpu_context_offset (fpu_offset), + first_fp_register (first_fp), + last_fp_register (last_fp) { /* These must either both be -1 or both be valid. */ gdb_assert ((first_stack_register =3D=3D -1) =3D=3D (last_stack_regist= er =3D=3D -1)); /* They must also be ordered. */ gdb_assert (last_stack_register >=3D first_stack_register); + /* These must either all be -1 or all be valid. */ + gdb_assert ((v_init_offset =3D=3D -1) =3D=3D (fpu_context_offset =3D= =3D -1) + && (fpu_context_offset =3D=3D -1) =3D=3D (first_fp_register =3D=3D -1) + && (first_fp_register =3D=3D -1) =3D=3D (last_fp_register =3D=3D -1)); } =20 - void fetch_registers (struct regcache *, int) const; - void store_registers (struct regcache *, int) const; + /* Return true if this architecture implements on-demand floating + point. */ + bool on_demand_fp () const + { return v_init_offset !=3D -1; } + + /* Return true if REGNUM is a floating-point register for this + target. If this target does not use the on-demand FP scheme, + this will always return false. */ + bool is_fp_register (int regnum) const + { + return regnum >=3D first_fp_register && regnum <=3D last_fp_register; + } + + /* Return the offset, in the current task context, of the byte + indicating whether the FPU has been initialized for the task. + This can only be called when the architecture implements + on-demand floating-point. */ + int get_v_init_offset () const + { + gdb_assert (on_demand_fp ()); + return v_init_offset; + } + + /* Return the offset, in the current task context, of the FPU + context. This can only be called when the architecture + implements on-demand floating-point. */ + int get_fpu_context_offset () const + { + gdb_assert (on_demand_fp ()); + return fpu_context_offset; + } + + void fetch_register (struct regcache *recache, int regnum) const; + void store_register (struct regcache *recache, int regnum) const; =20 private: =20 @@ -54,6 +98,24 @@ private: const int first_stack_register; const int last_stack_register; =20 + /* If these are -1, there is no special treatment for floating-point + registers -- they are handled, or not, just like all other + registers. + + Otherwise, they must all not be -1, and the target is one that + uses on-demand FP initialization. V_INIT_OFFSET is the offset of + a boolean field in the context that indicates whether the FP + registers have been initialized for this task. + FPU_CONTEXT_OFFSET is the offset of the FPU context from the task + context. (This is needed to check whether the FPU registers have + been saved.) FIRST_FP_REGISTER and LAST_FP_REGISTER are the + register numbers of the first and last (inclusive) floating point + registers. */ + const int v_init_offset; + const int fpu_context_offset; + const int first_fp_register; + const int last_fp_register; + /* Helper function to supply one register. */ void supply_one_register (struct regcache *regcache, int regnum, CORE_ADDR descriptor,