The following change fixes a number of problems with atomic loads and stores on hppa. Tested on hppa-unknown-linux-gnu and hppa64-hp-hpux11.11. The TARGET_SOFT_FLOAT case is not tested. It's possible we need additional atomic load support for this case but I think that's unnecessary since hppa requires strict alignment. We could use an array of locks in sync-libfuncs.c to improve performance but I kept things simple for now. Committed to trunk. Dave --- Fix support for atomic loads and stores on hppa. This change updates the atomic libcall support to fix the following issues: 1) A internal compiler error with -fno-sync-libcalls. 2) When sync libcalls are disabled, we don't generate libcalls for libatomic. 3) There is no sync libcall support for targets other than linux. As a result, non-atomic stores are silently emitted for types smaller or equal to the word size. There are now a few atomic libcalls in the libgcc code, so we need sync support on all targets. 2023-01-13 John David Anglin gcc/ChangeLog: * config/pa/pa-linux.h (TARGET_SYNC_LIBCALL): Delete define. * config/pa/pa.cc (pa_init_libfuncs): Use MAX_SYNC_LIBFUNC_SIZE define. * config/pa/pa.h (TARGET_SYNC_LIBCALLS): Use flag_sync_libcalls. (MAX_SYNC_LIBFUNC_SIZE): Define. (TARGET_CPU_CPP_BUILTINS): Define __SOFTFP__ when soft float is enabled. * config/pa/pa.md (atomic_storeqi): Emit __atomic_exchange_1 libcall when sync libcalls are disabled. (atomic_storehi, atomic_storesi, atomic_storedi): Likewise. (atomic_loaddi): Emit __atomic_load_8 libcall when sync libcalls are disabled on 32-bit target. * config/pa/pa.opt (matomic-libcalls): New option. * doc/invoke.texi (HPPA Options): Update. libgcc/ChangeLog: * config.host (hppa*64*-*-linux*): Adjust tmake_file to use pa/t-pa64-linux. (hppa*64*-*-hpux11*): Adjust tmake_file to use pa/t-pa64-hpux instead of pa/t-hpux and pa/t-pa64. * config/pa/linux-atomic.c: Define u32 type. (ATOMIC_LOAD): Define new macro to implement atomic_load_1, atomic_load_2, atomic_load_4 and atomic_load_8. Update sync defines to use atomic_load calls for type. (SYNC_LOCK_LOAD_2): New macro to implement __sync_lock_load_8. * config/pa/sync-libfuncs.c: New file. * config/pa/t-netbsd (LIB2ADD_ST): Define. * config/pa/t-openbsd (LIB2ADD_ST): Define. * config/pa/t-pa64-hpux: New file. * config/pa/t-pa64-linux: New file. diff --git a/gcc/config/pa/pa-linux.h b/gcc/config/pa/pa-linux.h index 5af11a1df80..1073f42bd6b 100644 --- a/gcc/config/pa/pa-linux.h +++ b/gcc/config/pa/pa-linux.h @@ -133,9 +133,6 @@ along with GCC; see the file COPYING3. If not see #undef TARGET_GAS #define TARGET_GAS 1 -#undef TARGET_SYNC_LIBCALL -#define TARGET_SYNC_LIBCALL 1 - /* The SYNC operations are implemented as library functions, not INSN patterns. As a result, the HAVE defines for the patterns are not defined. We need to define them to generate the corresponding diff --git a/gcc/config/pa/pa.cc b/gcc/config/pa/pa.cc index 9f43802075f..b43a91f2edb 100644 --- a/gcc/config/pa/pa.cc +++ b/gcc/config/pa/pa.cc @@ -5940,8 +5940,8 @@ pa_init_libfuncs (void) "_U_Qfcnvxf_udbl_to_quad"); } - if (TARGET_SYNC_LIBCALL) - init_sync_libfuncs (8); + if (TARGET_SYNC_LIBCALLS) + init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE); } /* HP's millicode routines mean something special to the assembler. diff --git a/gcc/config/pa/pa.h b/gcc/config/pa/pa.h index bafdf602138..93d6f53f97f 100644 --- a/gcc/config/pa/pa.h +++ b/gcc/config/pa/pa.h @@ -72,10 +72,12 @@ extern unsigned long total_code_bytes; #define HPUX_LONG_DOUBLE_LIBRARY 0 #endif -/* Linux kernel atomic operation support. */ -#ifndef TARGET_SYNC_LIBCALL -#define TARGET_SYNC_LIBCALL 0 -#endif +/* Sync libcall support. */ +#define TARGET_SYNC_LIBCALLS (flag_sync_libcalls) + +/* The maximum size of the sync library functions supported. DImode + is supported on 32-bit targets using floating point loads and stores. */ +#define MAX_SYNC_LIBFUNC_SIZE 8 /* The following three defines are potential target switches. The current defines are optimal given the current capabilities of GAS and GNU ld. */ @@ -173,6 +175,8 @@ do { \ builtin_define("_PA_RISC1_0"); \ if (HPUX_LONG_DOUBLE_LIBRARY) \ builtin_define("__SIZEOF_FLOAT128__=16"); \ + if (TARGET_SOFT_FLOAT) \ + builtin_define("__SOFTFP__"); \ } while (0) /* An old set of OS defines for various BSD-like systems. */ diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md index 41382271e54..71f391f2bf7 100644 --- a/gcc/config/pa/pa.md +++ b/gcc/config/pa/pa.md @@ -10360,7 +10360,23 @@ add,l %2,%3,%3\;bv,n %%r0(%3)" ;; doubleword loads and stores are not guaranteed to be atomic ;; when referencing the I/O address space. -;; These patterns are at the bottom so the non atomic versions are preferred. +;; Atomic and sync libcalls use different lock sets. Great care is +;; needed if both are used in a single application. + +;; Atomic load and store libcalls are enabled by the -matomic-libcalls +;; option. This option is not enabled by default as the generated +;; libcalls depend on libatomic which is not built until the end of +;; the gcc build. For loads, we only need an atomic libcall for DImode. +;; Sync libcalls are not generated when atomic libcalls are enabled. + +;; Sync libcalls are enabled by default when supported. They can be +;; disabled by the -fno-sync-libcalls option. Sync libcalls always +;; use a single memory store in their implementation, even for DImode. +;; DImode stores are done using either std or fstd. Thus, we only +;; need a sync load libcall for DImode when we don't have an atomic +;; processor load available for the mode (TARGET_SOFT_FLOAT). + +;; Implement atomic QImode store using exchange. (define_expand "atomic_storeqi" [(match_operand:QI 0 "memory_operand") ;; memory @@ -10368,19 +10384,30 @@ add,l %2,%3,%3\;bv,n %%r0(%3)" (match_operand:SI 2 "const_int_operand")] ;; model "" { - if (TARGET_SYNC_LIBCALL) + rtx addr, libfunc; + + if (TARGET_SYNC_LIBCALLS) { - rtx libfunc = optab_libfunc (sync_lock_test_and_set_optab, QImode); - rtx addr = convert_memory_address (Pmode, XEXP (operands[0], 0)); + addr = convert_memory_address (Pmode, XEXP (operands[0], 0)); + libfunc = optab_libfunc (sync_lock_test_and_set_optab, QImode); + emit_library_call (libfunc, LCT_NORMAL, VOIDmode, addr, Pmode, + operands[1], QImode); + DONE; + } + if (TARGET_ATOMIC_LIBCALLS) + { + addr = convert_memory_address (Pmode, XEXP (operands[0], 0)); + libfunc = init_one_libfunc ("__atomic_exchange_1"); emit_library_call (libfunc, LCT_NORMAL, VOIDmode, addr, Pmode, operands[1], QImode); DONE; } + FAIL; }) -;; Implement atomic HImode stores using exchange. +;; Implement atomic HImode store using exchange. (define_expand "atomic_storehi" [(match_operand:HI 0 "memory_operand") ;; memory @@ -10388,15 +10415,26 @@ add,l %2,%3,%3\;bv,n %%r0(%3)" (match_operand:SI 2 "const_int_operand")] ;; model "" { - if (TARGET_SYNC_LIBCALL) + rtx addr, libfunc; + + if (TARGET_SYNC_LIBCALLS) { - rtx libfunc = optab_libfunc (sync_lock_test_and_set_optab, HImode); - rtx addr = convert_memory_address (Pmode, XEXP (operands[0], 0)); + addr = convert_memory_address (Pmode, XEXP (operands[0], 0)); + libfunc = optab_libfunc (sync_lock_test_and_set_optab, HImode); + emit_library_call (libfunc, LCT_NORMAL, VOIDmode, addr, Pmode, + operands[1], HImode); + DONE; + } + if (TARGET_ATOMIC_LIBCALLS) + { + addr = convert_memory_address (Pmode, XEXP (operands[0], 0)); + libfunc = init_one_libfunc ("__atomic_exchange_2"); emit_library_call (libfunc, LCT_NORMAL, VOIDmode, addr, Pmode, operands[1], HImode); DONE; } + FAIL; }) @@ -10408,20 +10446,39 @@ add,l %2,%3,%3\;bv,n %%r0(%3)" (match_operand:SI 2 "const_int_operand")] ;; model "" { - if (TARGET_SYNC_LIBCALL) + rtx addr, libfunc; + + if (TARGET_SYNC_LIBCALLS) { - rtx libfunc = optab_libfunc (sync_lock_test_and_set_optab, SImode); - rtx addr = convert_memory_address (Pmode, XEXP (operands[0], 0)); + addr = convert_memory_address (Pmode, XEXP (operands[0], 0)); + libfunc = optab_libfunc (sync_lock_test_and_set_optab, SImode); + emit_library_call (libfunc, LCT_NORMAL, VOIDmode, addr, Pmode, + operands[1], SImode); + DONE; + } + if (TARGET_ATOMIC_LIBCALLS) + { + addr = convert_memory_address (Pmode, XEXP (operands[0], 0)); + libfunc = init_one_libfunc ("__atomic_exchange_4"); emit_library_call (libfunc, LCT_NORMAL, VOIDmode, addr, Pmode, operands[1], SImode); DONE; } + FAIL; }) ;; Implement atomic DImode load. +;; We need an atomic or sync libcall whenever the processor load or +;; store used for DImode is not atomic. The 32-bit libatomic +;; implementation uses a pair of stw instructions. They are not +;; atomic, so we need to call __atomic_load_8. The linux libgcc +;; sync implementation uses a std or fstd instruction. They are +;; atomic, so we only need to call __sync_load_8 when the load +;; operation would not be atomic (e.g., 32-bit TARGET_SOFT_FLOAT). + (define_expand "atomic_loaddi" [(match_operand:DI 0 "register_operand") ;; val out (match_operand:DI 1 "memory_operand") ;; memory @@ -10429,12 +10486,35 @@ add,l %2,%3,%3\;bv,n %%r0(%3)" "" { enum memmodel model; + rtx addr, libfunc; - if (TARGET_64BIT || TARGET_SOFT_FLOAT) + if (TARGET_64BIT) FAIL; + if (TARGET_SYNC_LIBCALLS && MAX_SYNC_LIBFUNC_SIZE >= 8 && TARGET_SOFT_FLOAT) + { + addr = convert_memory_address (Pmode, XEXP (operands[1], 0)); + libfunc = init_one_libfunc ("__sync_load_8"); + emit_library_call_value (libfunc, operands[0], LCT_NORMAL, DImode, + addr, Pmode); + DONE; + } + + if (TARGET_ATOMIC_LIBCALLS && TARGET_SOFT_FLOAT) + { + addr = convert_memory_address (Pmode, XEXP (operands[1], 0)); + libfunc = init_one_libfunc ("__atomic_load_8"); + emit_library_call_value (libfunc, operands[0], LCT_NORMAL, DImode, + addr, Pmode); + DONE; + } + + if (TARGET_SOFT_FLOAT) + FAIL; + + /* Fallback to processor load with barriers. */ model = memmodel_from_int (INTVAL (operands[2])); - operands[1] = force_reg (SImode, XEXP (operands[1], 0)); + operands[1] = force_reg (Pmode, XEXP (operands[1], 0)); if (is_mm_seq_cst (model)) expand_mem_thread_fence (model); emit_insn (gen_atomic_loaddi_1 (operands[0], operands[1])); @@ -10460,12 +10540,21 @@ add,l %2,%3,%3\;bv,n %%r0(%3)" "" { enum memmodel model; + rtx addr, libfunc; - if (TARGET_SYNC_LIBCALL) + if (TARGET_SYNC_LIBCALLS && MAX_SYNC_LIBFUNC_SIZE >= 8) { - rtx libfunc = optab_libfunc (sync_lock_test_and_set_optab, DImode); - rtx addr = convert_memory_address (Pmode, XEXP (operands[0], 0)); + addr = convert_memory_address (Pmode, XEXP (operands[0], 0)); + libfunc = optab_libfunc (sync_lock_test_and_set_optab, DImode); + emit_library_call (libfunc, LCT_NORMAL, VOIDmode, addr, Pmode, + operands[1], DImode); + DONE; + } + if (TARGET_ATOMIC_LIBCALLS) + { + addr = convert_memory_address (Pmode, XEXP (operands[0], 0)); + libfunc = init_one_libfunc ("__atomic_exchange_8"); emit_library_call (libfunc, LCT_NORMAL, VOIDmode, addr, Pmode, operands[1], DImode); DONE; @@ -10474,8 +10563,9 @@ add,l %2,%3,%3\;bv,n %%r0(%3)" if (TARGET_64BIT || TARGET_SOFT_FLOAT) FAIL; + /* Fallback to processor store with barriers. */ model = memmodel_from_int (INTVAL (operands[2])); - operands[0] = force_reg (SImode, XEXP (operands[0], 0)); + operands[0] = force_reg (Pmode, XEXP (operands[0], 0)); if (operands[1] != CONST0_RTX (DImode)) operands[1] = force_reg (DImode, operands[1]); expand_mem_thread_fence (model); diff --git a/gcc/config/pa/pa.opt b/gcc/config/pa/pa.opt index 58b59f8f0e8..2d074f5fe88 100644 --- a/gcc/config/pa/pa.opt +++ b/gcc/config/pa/pa.opt @@ -37,6 +37,10 @@ march=2.0 Target RejectNegative Generate PA2.0 code (requires binutils 2.10 or later). +matomic-libcalls +Target Var(TARGET_ATOMIC_LIBCALLS) Init(1) +Generate libcalls for atomic loads and stores when sync libcalls are disabled. + mbig-switch Target Ignore Does nothing. Preserved for backward compatibility. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 701c228bd0a..474dcaed5dc 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -975,17 +975,18 @@ Objective-C and Objective-C++ Dialects}. @emph{HPPA Options} @gccoptlist{-march=@var{architecture-type} @gol +-matomic-libcalls -mbig-switch @gol -mcaller-copies -mdisable-fpregs -mdisable-indexing @gol --mfast-indirect-calls -mgas -mgnu-ld -mhp-ld @gol +-mordered -mfast-indirect-calls -mgas -mgnu-ld -mhp-ld @gol -mfixed-range=@var{register-range} @gol --mjump-in-delay -mlinker-opt -mlong-calls @gol --mlong-load-store -mno-disable-fpregs @gol +-mcoherent-ldcw -mjump-in-delay -mlinker-opt -mlong-calls @gol +-mlong-load-store -mno-atomic-libcalls -mno-disable-fpregs @gol -mno-disable-indexing -mno-fast-indirect-calls -mno-gas @gol -mno-jump-in-delay -mno-long-load-store @gol -mno-portable-runtime -mno-soft-float @gol -mno-space-regs -msoft-float -mpa-risc-1-0 @gol -mpa-risc-1-1 -mpa-risc-2-0 -mportable-runtime @gol --mschedule=@var{cpu-type} -mspace-regs -msio -mwsio @gol +-mschedule=@var{cpu-type} -mspace-regs -msoft-mult -msio -mwsio @gol -munix=@var{unix-std} -nolibdld -static -threads} @emph{IA-64 Options} @@ -24891,6 +24892,33 @@ other way around. @opindex mpa-risc-2-0 Synonyms for @option{-march=1.0}, @option{-march=1.1}, and @option{-march=2.0} respectively. +@item -matomic-libcalls +@opindex matomic-libcalls +@opindex mno-atomic-libcalls +Generate libcalls for atomic loads and stores when sync libcalls are disabled. +This option is enabled by default. It only affects the generation of +atomic libcalls by the HPPA backend. + +Both the sync and @file{libatomic} libcall implementations use locking. +As a result, processor stores are not atomic with respect to other +atomic operations. Processor loads up to DImode are atomic with +respect to other atomic operations provided they are implemented as +a single access. + +The PA-RISC architecture does not support any atomic operations in +hardware except for the @code{ldcw} instruction. Thus, all atomic +support is implemented using sync and atomic libcalls. Sync libcall +support is in @file{libgcc.a}. Atomic libcall support is in +@file{libatomic}. + +This option generates @code{__atomic_exchange} calls for atomic stores. +It also provides special handling for atomic DImode accesses on 32-bit +targets. + +@item -mbig-switch +@opindex mbig-switch +Does nothing. Preserved for backward compatibility. + @item -mcaller-copies @opindex mcaller-copies The caller copies function arguments passed by hidden reference. This @@ -24899,30 +24927,19 @@ option should be used with care as it is not compatible with the default passed by hidden reference and the option provides better compatibility with OpenMP. -@item -mjump-in-delay -@opindex mjump-in-delay -This option is ignored and provided for compatibility purposes only. +@item -mcoherent-ldcw +@opindex mcoherent-ldcw +Use ldcw/ldcd coherent cache-control hint. @item -mdisable-fpregs @opindex mdisable-fpregs -Prevent floating-point registers from being used in any manner. This is -necessary for compiling kernels that perform lazy context switching of -floating-point registers. If you use this option and attempt to perform -floating-point operations, the compiler aborts. +Disable floating-point registers. Equivalent to @code{-msoft-float}. @item -mdisable-indexing @opindex mdisable-indexing Prevent the compiler from using indexing address modes. This avoids some rather obscure problems when compiling MIG generated code under MACH@. -@item -mno-space-regs -@opindex mno-space-regs -@opindex mspace-regs -Generate code that assumes the target has no space registers. This allows -GCC to generate faster indirect calls and use unscaled index address modes. - -Such code is suitable for level 0 PA systems and kernels. - @item -mfast-indirect-calls @opindex mfast-indirect-calls Generate code that assumes calls never cross space boundaries. This @@ -24939,57 +24956,10 @@ useful when compiling kernel code. A register range is specified as two registers separated by a dash. Multiple register ranges can be specified separated by a comma. -@item -mlong-load-store -@opindex mlong-load-store -Generate 3-instruction load and store sequences as sometimes required by -the HP-UX 10 linker. This is equivalent to the @samp{+k} option to -the HP compilers. - -@item -mportable-runtime -@opindex mportable-runtime -Use the portable calling conventions proposed by HP for ELF systems. - @item -mgas @opindex mgas Enable the use of assembler directives only GAS understands. -@item -mschedule=@var{cpu-type} -@opindex mschedule -Schedule code according to the constraints for the machine type -@var{cpu-type}. The choices for @var{cpu-type} are @samp{700} -@samp{7100}, @samp{7100LC}, @samp{7200}, @samp{7300} and @samp{8000}. Refer -to @file{/usr/lib/sched.models} on an HP-UX system to determine the -proper scheduling option for your machine. The default scheduling is -@samp{8000}. - -@item -mlinker-opt -@opindex mlinker-opt -Enable the optimization pass in the HP-UX linker. Note this makes symbolic -debugging impossible. It also triggers a bug in the HP-UX 8 and HP-UX 9 -linkers in which they give bogus error messages when linking some programs. - -@item -msoft-float -@opindex msoft-float -Generate output containing library calls for floating point. -@strong{Warning:} the requisite libraries are not available for all HPPA -targets. Normally the facilities of the machine's usual C compiler are -used, but this cannot be done directly in cross-compilation. You must make -your own arrangements to provide suitable library functions for -cross-compilation. - -@option{-msoft-float} changes the calling convention in the output file; -therefore, it is only useful if you compile @emph{all} of a program with -this option. In particular, you need to compile @file{libgcc.a}, the -library that comes with GCC, with @option{-msoft-float} in order for -this to work. - -@item -msio -@opindex msio -Generate the predefine, @code{_SIO}, for server IO@. The default is -@option{-mwsio}. This generates the predefines, @code{__hp9000s700}, -@code{__hp9000s700__} and @code{_WSIO}, for workstation IO@. These -options are available under HP-UX and HI-UX@. - @item -mgnu-ld @opindex mgnu-ld Use options specific to GNU @command{ld}. @@ -25019,6 +24989,12 @@ configure option, GCC's program search path, and finally by the user's `gcc -print-prog-name=ld`}. This option is only available on the 64-bit HP-UX GCC, i.e.@: configured with @samp{hppa*64*-*-hpux*}. +@item -mlinker-opt +@opindex mlinker-opt +Enable the optimization pass in the HP-UX linker. Note this makes symbolic +debugging impossible. It also triggers a bug in the HP-UX 8 and HP-UX 9 +linkers in which they give bogus error messages when linking some programs. + @item -mlong-calls @opindex mno-long-calls @opindex mlong-calls @@ -25047,6 +25023,69 @@ symbol-difference or pc-relative calls should be relatively small. However, an indirect call is used on 32-bit ELF systems in pic code and it is quite long. +@item -mlong-load-store +@opindex mlong-load-store +Generate 3-instruction load and store sequences as sometimes required by +the HP-UX 10 linker. This is equivalent to the @samp{+k} option to +the HP compilers. + +@item -mjump-in-delay +@opindex mjump-in-delay +This option is ignored and provided for compatibility purposes only. + +@item -mno-space-regs +@opindex mno-space-regs +@opindex mspace-regs +Generate code that assumes the target has no space registers. This allows +GCC to generate faster indirect calls and use unscaled index address modes. + +Such code is suitable for level 0 PA systems and kernels. + +@item -mordered +@opindex mordered +Assume memory references are ordered and barriers are not needed. + +@item -mportable-runtime +@opindex mportable-runtime +Use the portable calling conventions proposed by HP for ELF systems. + +@item -mschedule=@var{cpu-type} +@opindex mschedule +Schedule code according to the constraints for the machine type +@var{cpu-type}. The choices for @var{cpu-type} are @samp{700} +@samp{7100}, @samp{7100LC}, @samp{7200}, @samp{7300} and @samp{8000}. Refer +to @file{/usr/lib/sched.models} on an HP-UX system to determine the +proper scheduling option for your machine. The default scheduling is +@samp{8000}. + +@item -msio +@opindex msio +Generate the predefine, @code{_SIO}, for server IO@. The default is +@option{-mwsio}. This generates the predefines, @code{__hp9000s700}, +@code{__hp9000s700__} and @code{_WSIO}, for workstation IO@. These +options are available under HP-UX and HI-UX@. + +@item -msoft-float +@opindex msoft-float +Generate output containing library calls for floating point. +@strong{Warning:} the requisite libraries are not available for all HPPA +targets. Normally the facilities of the machine's usual C compiler are +used, but this cannot be done directly in cross-compilation. You must make +your own arrangements to provide suitable library functions for +cross-compilation. + +@option{-msoft-float} changes the calling convention in the output file; +therefore, it is only useful if you compile @emph{all} of a program with +this option. In particular, you need to compile @file{libgcc.a}, the +library that comes with GCC, with @option{-msoft-float} in order for +this to work. + +@item -msoft-mult +@opindex msoft-mult +Use software integer multiplication. + +This disables the use of the @code{xmpyu} instruction. + @item -munix=@var{unix-std} @opindex march Generate compiler predefines and select a startfile for the specified diff --git a/libgcc/config.host b/libgcc/config.host index d2087654c40..3e2c9109ab1 100644 --- a/libgcc/config.host +++ b/libgcc/config.host @@ -633,7 +633,7 @@ h8300-*-linux*) tm_file="$tm_file h8300/h8300-lib.h" ;; hppa*64*-*-linux*) - tmake_file="$tmake_file pa/t-linux64 pa/t-dimode" + tmake_file="$tmake_file pa/t-pa64-linux pa/t-dimode" tmake_file="$tmake_file pa/t-softfp-sfdftf t-softfp" extra_parts="crtbegin.o crtbeginS.o crtbeginT.o crtend.o crtendS.o" ;; @@ -649,7 +649,7 @@ hppa*-*-linux*) md_unwind_header=pa/linux-unwind.h ;; hppa*64*-*-hpux11*) - tmake_file="$tmake_file pa/t-hpux pa/t-pa64 pa/t-dimode" + tmake_file="$tmake_file pa/t-pa64-hpux pa/t-dimode" tmake_file="$tmake_file pa/t-stublib t-libgcc-pic t-slibgcc" # Set the libgcc version number if test x$ac_cv_sjlj_exceptions = xyes; then diff --git a/libgcc/config/pa/linux-atomic.c b/libgcc/config/pa/linux-atomic.c index 10d7f4217f5..1978e681f77 100644 --- a/libgcc/config/pa/linux-atomic.c +++ b/libgcc/config/pa/linux-atomic.c @@ -32,6 +32,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see typedef unsigned char u8; typedef short unsigned int u16; +typedef unsigned int u32; #ifdef __LP64__ typedef long unsigned int u64; #else @@ -115,6 +116,36 @@ __kernel_cmpxchg2 (volatile void *mem, const void *oldval, const void *newval, #define MASK_1 0xffu #define MASK_2 0xffffu +/* Load value with an atomic processor load if possible. */ +#define ATOMIC_LOAD(TYPE, WIDTH) \ + static inline TYPE \ + atomic_load_##WIDTH (volatile void *ptr) \ + { \ + return *(volatile TYPE *)ptr; \ + } + +#if defined(__LP64__) || defined(__SOFTFP__) +ATOMIC_LOAD (u64, 8) +#else +static inline u64 +atomic_load_8 (volatile void *ptr) +{ + u64 result; + double tmp; + + asm volatile ("{fldds|fldd} 0(%2),%1\n\t" + "{fstds|fstd} %1,-16(%%sp)\n\t" + "{ldws|ldw} -16(%%sp),%0\n\t" + "{ldws|ldw} -12(%%sp),%R0" + : "=r" (result), "=f" (tmp) : "r" (ptr): "memory"); + return result; +} +#endif + +ATOMIC_LOAD (u32, 4) +ATOMIC_LOAD (u16, 2) +ATOMIC_LOAD (u8, 1) + #define FETCH_AND_OP_2(OP, PFX_OP, INF_OP, TYPE, WIDTH, INDEX) \ TYPE HIDDEN \ __sync_fetch_and_##OP##_##WIDTH (volatile void *ptr, TYPE val) \ @@ -123,7 +154,7 @@ __kernel_cmpxchg2 (volatile void *mem, const void *oldval, const void *newval, long failure; \ \ do { \ - tmp = __atomic_load_n ((volatile TYPE *)ptr, __ATOMIC_RELAXED); \ + tmp = atomic_load_##WIDTH ((volatile TYPE *)ptr); \ newval = PFX_OP (tmp INF_OP val); \ failure = __kernel_cmpxchg2 (ptr, &tmp, &newval, INDEX); \ } while (failure != 0); \ @@ -160,7 +191,7 @@ FETCH_AND_OP_2 (nand, ~, &, u8, 1, 0) long failure; \ \ do { \ - tmp = __atomic_load_n ((volatile TYPE *)ptr, __ATOMIC_RELAXED); \ + tmp = atomic_load_##WIDTH ((volatile TYPE *)ptr); \ newval = PFX_OP (tmp INF_OP val); \ failure = __kernel_cmpxchg2 (ptr, &tmp, &newval, INDEX); \ } while (failure != 0); \ @@ -197,8 +228,7 @@ OP_AND_FETCH_2 (nand, ~, &, u8, 1, 0) long failure; \ \ do { \ - tmp = __atomic_load_n ((volatile unsigned int *)ptr, \ - __ATOMIC_RELAXED); \ + tmp = atomic_load_4 ((volatile unsigned int *)ptr); \ failure = __kernel_cmpxchg (ptr, tmp, PFX_OP (tmp INF_OP val)); \ } while (failure != 0); \ \ @@ -220,8 +250,7 @@ FETCH_AND_OP_WORD (nand, ~, &) long failure; \ \ do { \ - tmp = __atomic_load_n ((volatile unsigned int *)ptr, \ - __ATOMIC_RELAXED); \ + tmp = atomic_load_4 ((volatile unsigned int *)ptr); \ failure = __kernel_cmpxchg (ptr, tmp, PFX_OP (tmp INF_OP val)); \ } while (failure != 0); \ \ @@ -247,8 +276,7 @@ typedef unsigned char bool; \ while (1) \ { \ - actual_oldval = __atomic_load_n ((volatile TYPE *)ptr, \ - __ATOMIC_RELAXED); \ + actual_oldval = atomic_load_##WIDTH ((volatile TYPE *)ptr); \ \ if (__builtin_expect (oldval != actual_oldval, 0)) \ return actual_oldval; \ @@ -281,8 +309,7 @@ __sync_val_compare_and_swap_4 (volatile void *ptr, unsigned int oldval, while (1) { - actual_oldval = __atomic_load_n ((volatile unsigned int *)ptr, - __ATOMIC_RELAXED); + actual_oldval = atomic_load_4 ((volatile unsigned int *)ptr); if (__builtin_expect (oldval != actual_oldval, 0)) return actual_oldval; @@ -310,8 +337,7 @@ TYPE HIDDEN \ long failure; \ \ do { \ - oldval = __atomic_load_n ((volatile TYPE *)ptr, \ - __ATOMIC_RELAXED); \ + oldval = atomic_load_##WIDTH ((volatile TYPE *)ptr); \ failure = __kernel_cmpxchg2 (ptr, &oldval, &val, INDEX); \ } while (failure != 0); \ \ @@ -322,14 +348,14 @@ SYNC_LOCK_TEST_AND_SET_2 (u64, 8, 3) SYNC_LOCK_TEST_AND_SET_2 (u16, 2, 1) SYNC_LOCK_TEST_AND_SET_2 (u8, 1, 0) -unsigned int HIDDEN +u32 HIDDEN __sync_lock_test_and_set_4 (volatile void *ptr, unsigned int val) { long failure; unsigned int oldval; do { - oldval = __atomic_load_n ((volatile unsigned int *)ptr, __ATOMIC_RELAXED); + oldval = atomic_load_4 ((volatile unsigned int *)ptr); failure = __kernel_cmpxchg (ptr, oldval, val); } while (failure != 0); @@ -344,8 +370,7 @@ __sync_lock_test_and_set_4 (volatile void *ptr, unsigned int val) long failure; \ \ do { \ - oldval = __atomic_load_n ((volatile TYPE *)ptr, \ - __ATOMIC_RELAXED); \ + oldval = atomic_load_##WIDTH ((volatile TYPE *)ptr); \ failure = __kernel_cmpxchg2 (ptr, &oldval, &val, INDEX); \ } while (failure != 0); \ } @@ -361,7 +386,27 @@ __sync_lock_release_4 (volatile void *ptr) unsigned int oldval; do { - oldval = __atomic_load_n ((volatile unsigned int *)ptr, __ATOMIC_RELAXED); + oldval = atomic_load_4 ((volatile unsigned int *)ptr); failure = __kernel_cmpxchg (ptr, oldval, 0); } while (failure != 0); } + +#ifndef __LP64__ +#define SYNC_LOCK_LOAD_2(TYPE, WIDTH, INDEX) \ + TYPE __sync_lock_load_##WIDTH (volatile void *) HIDDEN; \ + TYPE \ + __sync_lock_load_##WIDTH (volatile void *ptr) \ + { \ + TYPE oldval; \ + long failure; \ + \ + do { \ + oldval = atomic_load_##WIDTH ((volatile TYPE *)ptr); \ + failure = __kernel_cmpxchg2 (ptr, &oldval, &oldval, INDEX); \ + } while (failure != 0); \ + \ + return oldval; \ + } + +SYNC_LOCK_LOAD_2 (u64, 8, 3) +#endif diff --git a/libgcc/config/pa/sync-libfuncs.c b/libgcc/config/pa/sync-libfuncs.c new file mode 100644 index 00000000000..c70be0fde73 --- /dev/null +++ b/libgcc/config/pa/sync-libfuncs.c @@ -0,0 +1,324 @@ +/* PA-RISC sync libfunc support. + Copyright (C) 2008-2023 Free Software Foundation, Inc. + Based on code contributed by CodeSourcery for ARM EABI Linux. + Modifications for PA Linux by Helge Deller + Revised for general use by John David Anglin + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +typedef unsigned char u8; +typedef short unsigned int u16; +typedef unsigned int u32; +#ifdef __LP64__ +typedef long unsigned int u64; +#else +typedef long long unsigned int u64; +#endif + +/* PA-RISC 2.0 supports out-of-order execution for loads and stores. + Thus, we need to synchonize memory accesses. For more info, see: + "Advanced Performance Features of the 64-bit PA-8000" by Doug Hunt. */ + +typedef volatile int __attribute__((aligned (16))) ldcw_t; +static ldcw_t __atomicity_lock = 1; + +/* We want default visibility for the sync routines. */ +#undef VISIBILITY +#if defined(__hpux__) && !defined(__LP64__) +#define VISIBILITY +#else +#define VISIBILITY __attribute__ ((visibility ("default"))) +#endif + +/* Perform ldcw operation in cache when possible. The ldcw instruction + is a full barrier. */ +#ifndef _PA_LDCW_INSN +# ifdef _PA_RISC2_0 +# define _PA_LDCW_INSN "ldcw,co" +# else +# define _PA_LDCW_INSN "ldcw" +# endif +#endif + +static inline void +__sync_spin_lock (void) +{ + ldcw_t *lock = &__atomicity_lock; + int tmp; + + __asm__ __volatile__ (_PA_LDCW_INSN " 0(%1),%0\n\t" + "cmpib,<>,n 0,%0,.+20\n\t" + "ldw,ma 0(%1),%0\n\t" + "cmpib,<> 0,%0,.-12\n\t" + "nop\n\t" + "b,n .-12" + : "=&r" (tmp) + : "r" (lock) + : "memory"); +} + +static inline void +__sync_spin_unlock (void) +{ + ldcw_t *lock = &__atomicity_lock; + int tmp = 1; + + /* Use ordered store for release. */ + __asm__ __volatile__ ("stw,ma %1,0(%0)" + : : "r" (lock), "r" (tmp) : "memory"); +} + +/* Load value with an atomic processor load if possible. */ +#define ATOMIC_LOAD(TYPE, WIDTH) \ + static inline TYPE \ + atomic_load_##WIDTH (volatile void *ptr) \ + { \ + return *(volatile TYPE *)ptr; \ + } + +#if defined(__LP64__) || defined(__SOFTFP__) +ATOMIC_LOAD (u64, 8) +#else +static inline u64 +atomic_load_8 (volatile void *ptr) +{ + u64 result; + double tmp; + + asm volatile ("{fldds|fldd} 0(%2),%1\n\t" + "{fstds|fstd} %1,-16(%%sp)\n\t" + "{ldws|ldw} -16(%%sp),%0\n\t" + "{ldws|ldw} -12(%%sp),%R0" + : "=r" (result), "=f" (tmp) : "r" (ptr): "memory"); + return result; +} +#endif + +ATOMIC_LOAD (u32, 4) +ATOMIC_LOAD (u16, 2) +ATOMIC_LOAD (u8, 1) + +/* Store value with an atomic processor store if possible. */ +#define ATOMIC_STORE(TYPE, WIDTH) \ + static inline void \ + atomic_store_##WIDTH (volatile void *ptr, TYPE value) \ + { \ + *(volatile TYPE *)ptr = value; \ + } + +#if defined(__LP64__) || defined(__SOFTFP__) +ATOMIC_STORE (u64, 8) +#else +static inline void +atomic_store_8 (volatile void *ptr, u64 value) +{ + double tmp; + + asm volatile ("stws|stw} %2,-16(%%sp)\n\t" + "{stws|stw} %R2,-12(%%sp)\n\t" + "{fldds|fldd} -16(%%sp),%1\n\t" + "{fstds|fstd} %1,0(%0)" + : "=m" (ptr), "=&f" (tmp) : "r" (value): "memory"); +} +#endif + +ATOMIC_STORE (u32, 4) +ATOMIC_STORE (u16, 2) +ATOMIC_STORE (u8, 1) + +#define FETCH_AND_OP(OP, PFX_OP, INF_OP, TYPE, WIDTH) \ + TYPE VISIBILITY \ + __sync_fetch_and_##OP##_##WIDTH (volatile void *ptr, TYPE val) \ + { \ + TYPE tmp, newval; \ + \ + __sync_spin_lock(); \ + tmp = atomic_load_##WIDTH (ptr); \ + newval = PFX_OP (tmp INF_OP val); \ + atomic_store_##WIDTH (ptr, newval); \ + __sync_spin_unlock(); \ + \ + return tmp; \ + } + +FETCH_AND_OP (add, , +, u64, 8) +FETCH_AND_OP (sub, , -, u64, 8) +FETCH_AND_OP (or, , |, u64, 8) +FETCH_AND_OP (and, , &, u64, 8) +FETCH_AND_OP (xor, , ^, u64, 8) +FETCH_AND_OP (nand, ~, &, u64, 8) + +FETCH_AND_OP (add, , +, u32, 4) +FETCH_AND_OP (sub, , -, u32, 4) +FETCH_AND_OP (or, , |, u32, 4) +FETCH_AND_OP (and, , &, u32, 4) +FETCH_AND_OP (xor, , ^, u32, 4) +FETCH_AND_OP (nand, ~, &, u32, 4) + +FETCH_AND_OP (add, , +, u16, 2) +FETCH_AND_OP (sub, , -, u16, 2) +FETCH_AND_OP (or, , |, u16, 2) +FETCH_AND_OP (and, , &, u16, 2) +FETCH_AND_OP (xor, , ^, u16, 2) +FETCH_AND_OP (nand, ~, &, u16, 2) + +FETCH_AND_OP (add, , +, u8, 1) +FETCH_AND_OP (sub, , -, u8, 1) +FETCH_AND_OP (or, , |, u8, 1) +FETCH_AND_OP (and, , &, u8, 1) +FETCH_AND_OP (xor, , ^, u8, 1) +FETCH_AND_OP (nand, ~, &, u8, 1) + +#define OP_AND_FETCH(OP, PFX_OP, INF_OP, TYPE, WIDTH) \ + TYPE VISIBILITY \ + __sync_##OP##_and_fetch_##WIDTH (volatile void *ptr, TYPE val) \ + { \ + TYPE tmp, newval; \ + \ + __sync_spin_lock(); \ + tmp = atomic_load_##WIDTH (ptr); \ + newval = PFX_OP (tmp INF_OP val); \ + atomic_store_##WIDTH (ptr, newval); \ + __sync_spin_unlock(); \ + \ + return newval; \ + } + +OP_AND_FETCH (add, , +, u64, 8) +OP_AND_FETCH (sub, , -, u64, 8) +OP_AND_FETCH (or, , |, u64, 8) +OP_AND_FETCH (and, , &, u64, 8) +OP_AND_FETCH (xor, , ^, u64, 8) +OP_AND_FETCH (nand, ~, &, u64, 8) + +OP_AND_FETCH (add, , +, u32, 4) +OP_AND_FETCH (sub, , -, u32, 4) +OP_AND_FETCH (or, , |, u32, 4) +OP_AND_FETCH (and, , &, u32, 4) +OP_AND_FETCH (xor, , ^, u32, 4) +OP_AND_FETCH (nand, ~, &, u32, 4) + +OP_AND_FETCH (add, , +, u16, 2) +OP_AND_FETCH (sub, , -, u16, 2) +OP_AND_FETCH (or, , |, u16, 2) +OP_AND_FETCH (and, , &, u16, 2) +OP_AND_FETCH (xor, , ^, u16, 2) +OP_AND_FETCH (nand, ~, &, u16, 2) + +OP_AND_FETCH (add, , +, u8, 1) +OP_AND_FETCH (sub, , -, u8, 1) +OP_AND_FETCH (or, , |, u8, 1) +OP_AND_FETCH (and, , &, u8, 1) +OP_AND_FETCH (xor, , ^, u8, 1) +OP_AND_FETCH (nand, ~, &, u8, 1) + +#define COMPARE_AND_SWAP(TYPE, WIDTH) \ + TYPE VISIBILITY \ + __sync_val_compare_and_swap_##WIDTH (volatile void *ptr, TYPE oldval, \ + TYPE newval) \ + { \ + TYPE actual_oldval; \ + \ + __sync_spin_lock(); \ + actual_oldval = atomic_load_##WIDTH (ptr); \ + if (actual_oldval == oldval) \ + atomic_store_##WIDTH (ptr, newval); \ + __sync_spin_unlock(); \ + \ + return actual_oldval; \ + } \ + \ + _Bool VISIBILITY \ + __sync_bool_compare_and_swap_##WIDTH (volatile void *ptr, \ + TYPE oldval, TYPE newval) \ + { \ + TYPE actual_oldval; \ + _Bool result; \ + \ + __sync_spin_lock(); \ + actual_oldval = atomic_load_##WIDTH (ptr); \ + result = (actual_oldval == oldval); \ + if (result) \ + atomic_store_##WIDTH (ptr, newval); \ + __sync_spin_unlock(); \ + \ + return result; \ + } + +COMPARE_AND_SWAP (u64, 8) +COMPARE_AND_SWAP (u32, 4) +COMPARE_AND_SWAP (u16, 2) +COMPARE_AND_SWAP (u8, 1) + +#define SYNC_LOCK_TEST_AND_SET(TYPE, WIDTH) \ +TYPE VISIBILITY \ + __sync_lock_test_and_set_##WIDTH (volatile void *ptr, TYPE val) \ + { \ + TYPE oldval; \ + \ + __sync_spin_lock(); \ + oldval = atomic_load_##WIDTH (ptr); \ + atomic_store_##WIDTH (ptr, val); \ + __sync_spin_unlock(); \ + \ + return oldval; \ + } + +SYNC_LOCK_TEST_AND_SET (u64, 8) +SYNC_LOCK_TEST_AND_SET (u32, 4) +SYNC_LOCK_TEST_AND_SET (u16, 2) +SYNC_LOCK_TEST_AND_SET (u8, 1) + +#define SYNC_LOCK_RELEASE(TYPE, WIDTH) \ + void VISIBILITY \ + __sync_lock_release_##WIDTH (volatile void *ptr) \ + { \ + TYPE val = 0; \ + \ + __sync_spin_lock(); \ + atomic_store_##WIDTH (ptr, val); \ + __sync_spin_unlock(); \ + } + +SYNC_LOCK_RELEASE (u64, 8) +SYNC_LOCK_RELEASE (u32, 4) +SYNC_LOCK_RELEASE (u16, 2) +SYNC_LOCK_RELEASE (u8, 1) + +#define SYNC_LOCK_LOAD(TYPE, WIDTH) \ +TYPE VISIBILITY __sync_lock_load_##WIDTH (volatile void *); \ +TYPE VISIBILITY \ + __sync_lock_load_##WIDTH (volatile void *ptr) \ + { \ + TYPE oldval; \ + \ + __sync_spin_lock(); \ + oldval = atomic_load_##WIDTH (ptr); \ + __sync_spin_unlock(); \ + \ + return oldval; \ + } + +SYNC_LOCK_LOAD (u64, 8) +SYNC_LOCK_LOAD (u32, 4) +SYNC_LOCK_LOAD (u16, 2) +SYNC_LOCK_LOAD (u8, 1) diff --git a/libgcc/config/pa/t-netbsd b/libgcc/config/pa/t-netbsd index 8b99068ce42..13943940a37 100644 --- a/libgcc/config/pa/t-netbsd +++ b/libgcc/config/pa/t-netbsd @@ -7,3 +7,4 @@ LIB1ASMFUNCS = _divI _divU _remI _remU _div_const _mulI _dyncall HOST_LIBGCC2_CFLAGS += -DELF=1 -DLINUX=1 LIB2ADD = $(srcdir)/config/pa/fptr.c +LIB2ADD_ST = $(srcdir)/config/pa/sync-libfuncs.c diff --git a/libgcc/config/pa/t-openbsd b/libgcc/config/pa/t-openbsd index 8b99068ce42..13943940a37 100644 --- a/libgcc/config/pa/t-openbsd +++ b/libgcc/config/pa/t-openbsd @@ -7,3 +7,4 @@ LIB1ASMFUNCS = _divI _divU _remI _remU _div_const _mulI _dyncall HOST_LIBGCC2_CFLAGS += -DELF=1 -DLINUX=1 LIB2ADD = $(srcdir)/config/pa/fptr.c +LIB2ADD_ST = $(srcdir)/config/pa/sync-libfuncs.c diff --git a/libgcc/config/pa/t-pa64-hpux b/libgcc/config/pa/t-pa64-hpux new file mode 100644 index 00000000000..55194e8f379 --- /dev/null +++ b/libgcc/config/pa/t-pa64-hpux @@ -0,0 +1,4 @@ +LIB2ADD = $(srcdir)/config/pa/quadlib.c +LIB2ADD_ST = $(srcdir)/config/pa/sync-libfuncs.c + +HOST_LIBGCC2_CFLAGS += -frandom-seed=fixed-seed -Dpa64=1 -DELF=1 -mlong-calls diff --git a/libgcc/config/pa/t-pa64-linux b/libgcc/config/pa/t-pa64-linux new file mode 100644 index 00000000000..026b48b02e5 --- /dev/null +++ b/libgcc/config/pa/t-pa64-linux @@ -0,0 +1,8 @@ +# Plug millicode routines into libgcc.a We want these on both native and +# cross compiles. +LIB1ASMSRC = pa/milli64.S +LIB1ASMFUNCS = _divI _divU _remI _remU _div_const _mulI + +HOST_LIBGCC2_CFLAGS += -Dpa64=1 -DELF=1 -DLINUX=1 + +LIB2ADD_ST = $(srcdir)/config/pa/linux-atomic.c