2005-01-28 H.J. Lu * elf/tst-auditmod1.c: Add ia64 entries. * sysdeps/generic/ldsodefs.h (La_ia64_regs): New. (La_ia64_retval): New. (audit_ifaces): Add ia64 entries. * sysdeps/ia64/bits/link.h: New file. * sysdeps/ia64/dl-machine.h (elf_machine_runtime_setup): Test for dl_profile non-null. (ARCH_LA_PLTENTER): New. (ARCH_LA_PLTEXIT): New. * sysdeps/ia64/dl-trampoline.S (_dl_runtime_resolve): Allocate only 2 output registers. Allocate stack to save/restore 8 incoming fp registers. Call _dl_fixup instead of fixup. (_dl_runtime_profile): Rewrite. --- libc/elf/tst-auditmod1.c.tramp 2005-01-24 15:30:36.000000000 -0800 +++ libc/elf/tst-auditmod1.c 2005-01-28 15:21:21.000000000 -0800 @@ -144,6 +144,12 @@ la_symbind64 (Elf64_Sym *sym, unsigned i # define La_regs La_alpha_regs # define La_retval La_alpha_retval # define int_retval lrv_r0 +#elif defined __ia64__ +# define pltenter la_ia64_gnu_pltenter +# define pltexit la_ia64_gnu_pltexit +# define La_regs La_ia64_regs +# define La_retval La_ia64_retval +# define int_retval lrv_r8 #else # error "architecture specific code needed" #endif --- libc/sysdeps/generic/ldsodefs.h.tramp 2005-01-24 15:30:39.000000000 -0800 +++ libc/sysdeps/generic/ldsodefs.h 2005-01-27 09:15:12.000000000 -0800 @@ -189,6 +189,8 @@ struct La_m68k_regs; struct La_m68k_retval; struct La_alpha_regs; struct La_alpha_retval; +struct La_ia64_regs; +struct La_ia64_retval; struct audit_ifaces { @@ -233,6 +235,10 @@ struct audit_ifaces uintptr_t *, struct La_alpha_regs *, unsigned int *, const char *name, long int *framesizep); + Elf64_Addr (*ia64_gnu_pltenter) (Elf64_Sym *, unsigned int, uintptr_t *, + uintptr_t *, struct La_ia64_regs *, + unsigned int *, const char *name, + long int *framesizep); }; union { @@ -262,6 +268,10 @@ struct audit_ifaces uintptr_t *, const struct La_alpha_regs *, struct La_alpha_retval *, const char *); + unsigned int (*ia64_gnu_pltexit) (Elf64_Sym *, unsigned int, uintptr_t *, + uintptr_t *, + const struct La_ia64_regs *, + struct La_ia64_retval *, const char *); }; unsigned int (*objclose) (uintptr_t *); --- libc/sysdeps/ia64/bits/link.h.tramp 2005-01-07 14:13:48.000000000 -0800 +++ libc/sysdeps/ia64/bits/link.h 2005-01-28 12:21:38.000000000 -0800 @@ -0,0 +1,63 @@ +/* Copyright (C) 2005 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef _LINK_H +# error "Never include directly; use instead." +#endif + +/* Registers for entry into PLT on ia64. */ +typedef struct La_ia64_regs +{ + uint64_t lr_r8; + uint64_t lr_r9; + uint64_t lr_r10; + uint64_t lr_r11; + uint64_t lr_gr [8]; + long double lr_fr [8]; + uint64_t lr_unat; + uint64_t lr_sp; +} La_ia64_regs; + +/* Return values for calls from PLT on ia64. */ +typedef struct La_ia64_retval +{ + uint64_t lrv_r8; + uint64_t lrv_r9; + uint64_t lrv_r10; + uint64_t lrv_r11; + long double lr_fr [8]; +} La_ia64_retval; + + +__BEGIN_DECLS + +extern Elf64_Addr la_ia64_gnu_pltenter (Elf64_Sym *__sym, unsigned int __ndx, + uintptr_t *__refcook, + uintptr_t *__defcook, + La_ia64_regs *__regs, + unsigned int *__flags, + const char *__symname, + long int *__framesizep); +extern unsigned int la_ia64_gnu_pltexit (Elf64_Sym *__sym, unsigned int __ndx, + uintptr_t *__refcook, + uintptr_t *__defcook, + const La_ia64_regs *__inregs, + La_ia64_retval *__outregs, + const char *symname); + +__END_DECLS --- libc/sysdeps/ia64/dl-machine.h.tramp 2005-01-27 09:15:12.000000000 -0800 +++ libc/sysdeps/ia64/dl-machine.h 2005-01-27 09:15:12.000000000 -0800 @@ -123,7 +123,8 @@ elf_machine_runtime_setup (struct link_m doit = (Elf64_Addr) ((struct fdesc *) &_dl_runtime_resolve)->ip; else { - if (_dl_name_match_p (GLRO(dl_profile), l)) + if (GLRO(dl_profile) != NULL + && _dl_name_match_p (GLRO(dl_profile), l)) { /* This is the object we are looking for. Say that we really want profiling and the timers are started. */ @@ -139,6 +140,9 @@ elf_machine_runtime_setup (struct link_m return lazy; } +/* Names of the architecture-specific auditing callback functions. */ +#define ARCH_LA_PLTENTER ia64_gnu_pltenter +#define ARCH_LA_PLTEXIT ia64_gnu_pltexit /* Undo the adds out0 = 16, sp below to get at the value we want in __libc_stack_end. */ --- libc/sysdeps/ia64/dl-trampoline.S.tramp 2005-01-22 21:11:50.000000000 -0800 +++ libc/sysdeps/ia64/dl-trampoline.S 2005-01-28 17:07:52.147856945 -0800 @@ -18,37 +18,38 @@ 02111-1307 USA. */ #include +#undef ret /* - This code is used in dl-runtime.c to call the `fixup' function - and then redirect to the address it returns. `fixup()' takes two - arguments, however profile_fixup() takes three. + This code is used in dl-runtime.c to call the `_dl_fixup' function + and then redirect to the address it returns. `_dl_fixup()' takes two + arguments, however _dl_profile_fixup() takes five. The ABI specifies that we will never see more than 8 input registers to a function call, thus it is safe to simply allocate those, and simpler than playing stack games. */ +/* Used to save and restore 8 incoming fp registers */ +#define RESOLVE_FRAME_SIZE (16*8) + ENTRY(_dl_runtime_resolve) { .mmi .prologue .save ar.pfs, r40 - alloc loc0 = ar.pfs, 8, 6, 3, 0 - adds r2 = -144, r12 - adds r3 = -128, r12 + alloc loc0 = ar.pfs, 8, 6, 2, 0 + /* Use the 16 byte scratch area. r2 will start at f8 and + r3 will start at f9. */ + adds r2 = -(RESOLVE_FRAME_SIZE - 16), r12 + adds r3 = -(RESOLVE_FRAME_SIZE - 32), r12 } { .mii - .fframe 160 - adds r12 = -160, r12 - .save rp, r41 + .fframe RESOLVE_FRAME_SIZE + adds r12 = -RESOLVE_FRAME_SIZE, r12 + .save rp, loc1 mov loc1 = b0 .body - mov out2 = b0 /* needed by fixup_profile */ - ;; - } - { .mfb mov loc2 = r8 /* preserve struct value register */ - nop.f 0 - nop.b 0 + ;; } { .mii mov loc3 = r9 /* preserve language specific register */ @@ -70,18 +71,20 @@ ENTRY(_dl_runtime_resolve) { .mmi stf.spill [r2] = f12, 32 stf.spill [r3] = f13, 32 + /* Relocation record is 24 byte. */ shladd out1 = r15, 3, out1 ;; } { .mmb stf.spill [r2] = f14 stf.spill [r3] = f15 - br.call.sptk.many b0 = fixup + br.call.sptk.many b0 = _dl_fixup } { .mii - ld8 r9 = [ret0], 8 + /* Skip the 16byte scratch area. */ adds r2 = 16, r12 adds r3 = 32, r12 + mov b6 = ret0 ;; } { .mmi @@ -93,7 +96,7 @@ ENTRY(_dl_runtime_resolve) { .mmi ldf.fill f10 = [r2], 32 ldf.fill f11 = [r3], 32 - mov b6 = r9 + mov gp = ret1 ;; } { .mmi @@ -106,7 +109,7 @@ ENTRY(_dl_runtime_resolve) ldf.fill f14 = [r2], 32 ldf.fill f15 = [r3], 32 .restore sp /* pop the unwind frame state */ - adds r12 = 160, r12 + adds r12 = RESOLVE_FRAME_SIZE, r12 ;; } { .mii @@ -115,7 +118,6 @@ ENTRY(_dl_runtime_resolve) mov r11 = loc5 /* restore language specific register */ } { .mii - ld8 gp = [ret0] mov r8 = loc2 /* restore struct value register */ ;; } @@ -128,42 +130,151 @@ ENTRY(_dl_runtime_resolve) br.sptk.many b6 ;; } -END (_dl_runtime_resolve) +END(_dl_runtime_resolve) + + +/* The fourth argument to _dl_profile_fixup and the third one to + _dl_call_pltexit are a pointer to La_ia64_regs: + + 8byte r8 + 8byte r9 + 8byte r10 + 8byte r11 + 8byte in0 + 8byte in1 + 8byte in2 + 8byte in3 + 8byte in4 + 8byte in5 + 8byte in6 + 8byte in7 + 16byte f8 + 16byte f9 + 16byte f10 + 16byte f11 + 16byte f12 + 16byte f13 + 16byte f14 + 16byte f15 + 8byte ar.unat + 8byte sp + + The fifth argument to _dl_profile_fixup is a pointer to long int. + The fourth argument to _dl_call_pltexit is a pointer to + La_ia64_retval: + + 8byte r8 + 8byte r9 + 8byte r10 + 8byte r11 + 16byte f8 + 16byte f9 + 16byte f10 + 16byte f11 + 16byte f12 + 16byte f13 + 16byte f14 + 16byte f15 + + Since stack has to be 16 byte aligned, the stack allocation is in + 16byte increment. Before calling _dl_profile_fixup, the stack will + look like + + psp new frame_size + +16 La_ia64_regs + sp scratch + */ + +#define PLTENTER_FRAME_SIZE (4*8 + 8*8 + 8*16 + 2*8 + 16) +#define PLTEXIT_FRAME_SIZE (PLTENTER_FRAME_SIZE + 4*8 + 8*16) ENTRY(_dl_runtime_profile) - { .mmi + { .mii .prologue .save ar.pfs, r40 - alloc loc0 = ar.pfs, 8, 6, 3, 0 - adds r2 = -144, r12 - adds r3 = -128, r12 + alloc loc0 = ar.pfs, 8, 12, 8, 0 + .vframe loc10 + mov loc10 = r12 + .save rp, loc1 + mov loc1 = b0 + } + { .mii + .save ar.unat, r17 + mov r17 = ar.unat + .save ar.lc, loc6 + mov loc6 = ar.lc + mov loc11 = gp } { .mii - .fframe 160 - adds r12 = -160, r12 - .save rp, r41 - mov loc1 = b0 .body - mov out2 = b0 /* needed by fixup_profile */ + /* There is a 16 byte scratch area. r2 will start at r8 and + r3 will start at r9 for La_ia64_regs. */ + adds r2 = -(PLTENTER_FRAME_SIZE - 16), r12 + adds r3 = -(PLTENTER_FRAME_SIZE - 24), r12 + adds r12 = -PLTENTER_FRAME_SIZE, r12 + ;; + } + { .mmi + st8 [r2] = r8, 16; + st8 [r3] = r9, 16; + mov out2 = b0 /* needed by _dl_fixup_profile */ + ;; + } + { .mmi + st8 [r2] = r10, 16; + st8 [r3] = r11, 16; + adds out3 = 16, r12 /* pointer to La_ia64_regs */ + ;; + } + { .mmi + .mem.offset 0, 0 + st8.spill [r2] = in0, 16 + .mem.offset 8, 0 + st8.spill [r3] = in1, 16 + mov out4 = loc10 /* pointer to new frame size */ ;; } - { .mfb + { .mmi + .mem.offset 0, 0 + st8.spill [r2] = in2, 16 + .mem.offset 8, 0 + st8.spill [r3] = in3, 16 mov loc2 = r8 /* preserve struct value register */ - nop.f 0 - nop.b 0 + ;; } - { .mii + { .mmi + .mem.offset 0, 0 + st8.spill [r2] = in4, 16 + .mem.offset 8, 0 + st8.spill [r3] = in5, 16 mov loc3 = r9 /* preserve language specific register */ + ;; + } + { .mmi + .mem.offset 0, 0 + st8 [r2] = in6, 16 + .mem.offset 8, 0 + st8 [r3] = in7, 24 /* adjust for f9 */ mov loc4 = r10 /* preserve language specific register */ + ;; + } + { .mii + mov r18 = ar.unat /* save it in La_ia64_regs */ + mov loc7 = out3 /* save it for _dl_call_pltexit */ mov loc5 = r11 /* preserve language specific register */ } { .mmi stf.spill [r2] = f8, 32 stf.spill [r3] = f9, 32 - mov out0 = r16 + mov out0 = r16 /* needed by _dl_fixup_profile */ ;; } + { .mii + mov ar.unat = r17 /* restore it for function call */ + mov loc8 = r16 /* save it for _dl_call_pltexit */ + nop.i 0x0 + } { .mmi stf.spill [r2] = f10, 32 stf.spill [r3] = f11, 32 @@ -173,57 +284,249 @@ ENTRY(_dl_runtime_profile) { .mmi stf.spill [r2] = f12, 32 stf.spill [r3] = f13, 32 + /* Relocation record is 24 byte. */ shladd out1 = r15, 3, out1 ;; } + { .mmi + stf.spill [r2] = f14, 32 + stf.spill [r3] = f15, 24 + mov loc9 = out1 /* save it for _dl_call_pltexit */ + ;; + } { .mmb - stf.spill [r2] = f14 - stf.spill [r3] = f15 - br.call.sptk.many b0 = profile_fixup + st8 [r2] = r18 /* store ar.unat */ + st8 [r3] = loc10 /* store sp */ + br.call.sptk.many b0 = _dl_profile_fixup } { .mii - ld8 r9 = [ret0], 8 - adds r2 = 16, r12 - adds r3 = 32, r12 + /* Skip the 16byte scratch area, 4 language specific GRs and + 8 incoming GRs to restore incoming fp registers. */ + adds r2 = (4*8 + 8*8 + 16), r12 + adds r3 = (4*8 + 8*8 + 32), r12 + mov b6 = ret0 ;; } { .mmi ldf.fill f8 = [r2], 32 ldf.fill f9 = [r3], 32 - mov b0 = loc1 + mov gp = ret1 ;; } { .mmi ldf.fill f10 = [r2], 32 ldf.fill f11 = [r3], 32 - mov b6 = r9 + mov r8 = loc2 /* restore struct value register */ ;; } { .mmi ldf.fill f12 = [r2], 32 ldf.fill f13 = [r3], 32 - mov ar.pfs = loc0 + mov r9 = loc3 /* restore language specific register */ ;; } { .mmi ldf.fill f14 = [r2], 32 ldf.fill f15 = [r3], 32 - .restore sp /* pop the unwind frame state */ - adds r12 = 160, r12 + mov r10 = loc4 /* restore language specific register */ ;; } { .mii - mov r9 = loc3 /* restore language specific register */ - mov r10 = loc4 /* restore language specific register */ + ld8 r15 = [loc10] /* load the new frame size */ mov r11 = loc5 /* restore language specific register */ + ;; + cmp.eq p6, p7 = -1, r15 + ;; } { .mii - ld8 gp = [ret0] - mov r8 = loc2 /* restore struct value register */ +(p7) cmp.eq p8, p9 = 0, r15 +(p6) mov b0 = loc1 +(p6) mov ar.lc = loc6 + } + { .mib + nop.m 0x0 +(p6) mov ar.pfs = loc0 +(p6) br.cond.dptk.many .Lresolved ;; } - /* An alloc is needed for the break system call to work. - We don't care about the old value of the pfs register. */ + + /* At this point, the stack looks like + + +psp free + +16 La_ia64_regs + sp scratch + + We need to keep the current stack and call the resolved + function by copying the r15 byte from sp + PLTENTER_FRAME_SIZE + + 16 (scratch area) to sp + 16 (scratch area). Since stack + has to be 16byte aligned, we around r15 up to 16byte. */ + + { .mbb +(p9) adds r15 = 15, r15 +(p8) br.cond.dptk.many .Lno_new_frame + nop.b 0x0 + ;; + } + { .mmi + and r15 = -16, r15 + ;; + /* We don't copy the 16byte scatch area. Prepare r16/r17 as + destination. */ + sub r16 = r12, r15 + sub r17 = r12, r15 + ;; + } + { .mii + adds r16 = 16, r16 + adds r17 = 24, r17 + sub r12 = r12, r15 /* Adjust stack */ + ;; + } + { .mii + nop.m 0x0 + shr r15 = r15, 4 + ;; + adds r15 = -1, r15 + ;; + } + { .mii + /* Skip the 16byte scatch area. Prepare r2/r3 as source. */ + adds r2 = 16, loc10 + adds r3 = 24, loc10 + mov ar.lc = r15 + ;; + } +.Lcopy: + { .mmi + ld8 r18 = [r2], 16 + ld8 r19 = [r3], 16 + nop.i 0x0 + ;; + } + { .mmb + st8 [r16] = r18, 16 + st8 [r17] = r19, 16 + br.cloop.sptk.few .Lcopy + } +.Lno_new_frame: + { .mii + mov out0 = in0 + mov out1 = in1 + mov out2 = in2 + } + { .mii + mov out3 = in3 + mov out4 = in4 + mov out5 = in5 + } + { .mib + mov out6 = in6 + mov out7 = in7 + /* Call the resolved function */ + br.call.sptk.many b0 = b6 + } + { .mii + /* Prepare stack for _dl_call_pltexit. Loc10 has the original + stack pointer. */ + adds r12 = -PLTEXIT_FRAME_SIZE, loc10 + adds r2 = -(PLTEXIT_FRAME_SIZE - 16), loc10 + adds r3 = -(PLTEXIT_FRAME_SIZE - 24), loc10 + ;; + } + { .mmi + /* Load all possible return values into buffer. */ + st8 [r2] = r8, 16 + st8 [r3] = r9, 16 + mov out0 = loc8 + ;; + } + { .mmi + st8 [r2] = r10, 16 + st8 [r3] = r11, 24 + mov out1 = loc9 + ;; + } + { .mmi + stf.spill [r2] = f8, 32 + stf.spill [r3] = f9, 32 + mov out2 = loc7 /* Pointer to La_ia64_regs */ + ;; + } + { .mmi + stf.spill [r2] = f10, 32 + stf.spill [r3] = f11, 32 + adds out3 = 16, r12 /* Pointer to La_ia64_retval */ + ;; + } + { .mmi + stf.spill [r2] = f12, 32 + stf.spill [r3] = f13, 32 + /* We need to restore gp for _dl_call_pltexit. */ + mov gp = loc11 + ;; + } + { .mmb + stf.spill [r2] = f14 + stf.spill [r3] = f15 + br.call.sptk.many b0 = _dl_call_pltexit + } + { .mmi + /* Load all the non-floating and floating return values. Skip + the 16byte scratch area. */ + adds r2 = 16, r12 + adds r3 = 24, r12 + nop.i 0x0 + ;; + } + { .mmi + ld8 r8 = [r2], 16 + ld8 r9 = [r3], 16 + nop.i 0x0 + ;; + } + { .mmi + ld8 r10 = [r2], 16 + ld8 r11 = [r3], 24 + nop.i 0x0 + ;; + } + { .mmi + ldf.fill f8 = [r2], 32 + ldf.fill f9 = [r3], 32 + mov ar.lc = loc6 + ;; + } + { .mmi + ldf.fill f10 = [r2], 32 + ldf.fill f11 = [r3], 32 + mov ar.pfs = loc0 + ;; + } + { .mmi + ldf.fill f12 = [r2], 32 + ldf.fill f13 = [r3], 32 + mov b0 = loc1 + ;; + } + { .mmi + ldf.fill f14 = [r2] + ldf.fill f15 = [r3] + /* We know that the previous stack pointer, loc10, isn't 0. + We use it to reload p7. */ + cmp.ne p7, p0 = 0, loc10 + ;; + } +.Lresolved: + { .mmb + .restore sp + mov r12 = loc10 +(p7) br.ret.sptk.many b0 + ;; + } + /* An alloc is needed for the break system call to work. We + don't care about the old value of the pfs register. After + this alloc, we can't use any rotating registers. Otherwise + assembler won't be happy. This has to be at the end. */ { .mmb .prologue .body @@ -231,4 +534,4 @@ ENTRY(_dl_runtime_profile) br.sptk.many b6 ;; } -END (_dl_runtime_profile) +END(_dl_runtime_profile)