Hi Sebastian, Just a couple of questions/issues: 1. How did you arrive at 70 doubles and 43 doubles length values? 2. It appears that __SPE__ is not accounted for in setjmp.h -- Jeff J. On Fri, Sep 23, 2022 at 7:37 AM Sebastian Huber < sebastian.huber@embedded-brains.de> wrote: > Use 64-bit store/load instructions to save/restore the general-purpose > registers. > --- > newlib/libc/include/machine/setjmp.h | 8 +++ > newlib/libc/machine/powerpc/setjmp.S | 79 ++++++++++++++++++++++++---- > 2 files changed, 78 insertions(+), 9 deletions(-) > > diff --git a/newlib/libc/include/machine/setjmp.h > b/newlib/libc/include/machine/setjmp.h > index 0bb53238b..53878a03d 100644 > --- a/newlib/libc/include/machine/setjmp.h > +++ b/newlib/libc/include/machine/setjmp.h > @@ -173,11 +173,19 @@ _BEGIN_STD_C > #endif > > #ifdef __PPC__ > +#ifdef __powerpc64__ > +#ifdef __ALTIVEC__ > +#define _JBLEN 70 > +#else > +#define _JBLEN 43 > +#endif > +#else > #ifdef __ALTIVEC__ > #define _JBLEN 64 > #else > #define _JBLEN 32 > #endif > +#endif > #define _JBTYPE double > #endif > > diff --git a/newlib/libc/machine/powerpc/setjmp.S > b/newlib/libc/machine/powerpc/setjmp.S > index f4ccd1bb5..f1f8ac28f 100644 > --- a/newlib/libc/machine/powerpc/setjmp.S > +++ b/newlib/libc/machine/powerpc/setjmp.S > @@ -1,6 +1,7 @@ > /* This is a simple version of setjmp and longjmp for the PowerPC. > Ian Lance Taylor, Cygnus Support, 9 Feb 1994. > - Modified by Jeff Johnston, Red Hat Inc. 2 Oct 2001. */ > + Modified by Jeff Johnston, Red Hat Inc. 2 Oct 2001. > + Modified by Sebastian Huber, embedded brains GmbH. 22 Sep 2022. */ > > #include "ppc-asm.h" > > @@ -40,6 +41,31 @@ FUNC_START(setjmp) > stored. Note that we are not adding 168 because the next > store instruction uses an offset of 4. */ > addi 3,3,164 > +#elif __powerpc64__ > + /* In the first store, add 16 to r3 so that the subsequent > floating > + point stores are aligned on an 8 byte boundary and the Altivec > + stores are aligned on a 16 byte boundary. */ > + stdu 1,16(3) # offset 16 > + stdu 2,8(3) # offset 24 > + stdu 13,8(3) # offset 32 > + stdu 14,8(3) # offset 40 > + stdu 15,8(3) # offset 48 > + stdu 16,8(3) # offset 56 > + stdu 17,8(3) # offset 64 > + stdu 18,8(3) # offset 72 > + stdu 19,8(3) # offset 80 > + stdu 20,8(3) # offset 88 > + stdu 21,8(3) # offset 96 > + stdu 22,8(3) # offset 104 > + stdu 23,8(3) # offset 112 > + stdu 24,8(3) # offset 120 > + stdu 25,8(3) # offset 128 > + stdu 26,8(3) # offset 136 > + stdu 27,8(3) # offset 144 > + stdu 28,8(3) # offset 152 > + stdu 29,8(3) # offset 160 > + stdu 30,8(3) # offset 168 > + stdu 31,8(3) # offset 176 > #else > stw 1,0(3) # offset 0 > stwu 2,4(3) # offset 4 > @@ -64,10 +90,15 @@ FUNC_START(setjmp) > stwu 31,4(3) # offset 80 > #endif > > - /* From this point on until the end of this function, add 84 > - to the offset shown if __SPE__. This difference comes from > - the fact that we save 21 64-bit registers instead of 21 > - 32-bit registers above. */ > + /* If __SPE__, then add 84 to the offset shown from this point on > until > + the end of this function. This difference comes from the fact > that > + we save 21 64-bit registers instead of 21 32-bit registers > above. > + > + If __powerpc64__, then add 96 to the offset shown from this > point on until > + the end of this function. This difference comes from the fact > that > + we save 21 64-bit registers instead of 21 32-bit registers > above and > + we take alignement requirements of floating point and Altivec > stores > + into account. */ > mflr 4 > stwu 4,4(3) # offset 84 > mfcr 4 > @@ -188,6 +219,31 @@ FUNC_START(longjmp) > loaded. Note that we are not adding 168 because the next > load instruction uses an offset of 4. */ > addi 3,3,164 > +#elif __powerpc64__ > + /* In the first load, add 16 to r3 so that the subsequent floating > + point loades are aligned on an 8 byte boundary and the Altivec > + loads are aligned on a 16 byte boundary. */ > + ldu 1,16(3) # offset 16 > + ldu 2,8(3) # offset 24 > + ldu 13,8(3) # offset 32 > + ldu 14,8(3) # offset 40 > + ldu 15,8(3) # offset 48 > + ldu 16,8(3) # offset 56 > + ldu 17,8(3) # offset 64 > + ldu 18,8(3) # offset 72 > + ldu 19,8(3) # offset 80 > + ldu 20,8(3) # offset 88 > + ldu 21,8(3) # offset 96 > + ldu 22,8(3) # offset 104 > + ldu 23,8(3) # offset 112 > + ldu 24,8(3) # offset 120 > + ldu 25,8(3) # offset 128 > + ldu 26,8(3) # offset 136 > + ldu 27,8(3) # offset 144 > + ldu 28,8(3) # offset 152 > + ldu 29,8(3) # offset 160 > + ldu 30,8(3) # offset 168 > + ldu 31,8(3) # offset 176 > #else > lwz 1,0(3) # offset 0 > lwzu 2,4(3) # offset 4 > @@ -211,10 +267,15 @@ FUNC_START(longjmp) > lwzu 30,4(3) # offset 76 > lwzu 31,4(3) # offset 80 > #endif > - /* From this point on until the end of this function, add 84 > - to the offset shown if __SPE__. This difference comes from > - the fact that we restore 21 64-bit registers instead of 21 > - 32-bit registers above. */ > + /* If __SPE__, then add 84 to the offset shown from this point on > until > + the end of this function. This difference comes from the fact > that > + we restore 21 64-bit registers instead of 21 32-bit registers > above. > + > + If __powerpc64__, then add 96 to the offset shown from this > point on until > + the end of this function. This difference comes from the fact > that > + we restore 21 64-bit registers instead of 21 32-bit registers > above and > + we take alignement requirements of floating point and Altivec > loads > + into account. */ > lwzu 5,4(3) # offset 84 > mtlr 5 > lwzu 5,4(3) # offset 88 > -- > 2.35.3 > >