The following change fixes a number of problems with atomic loads
and stores on hppa.

Tested on hppa-unknown-linux-gnu and hppa64-hp-hpux11.11.

The TARGET_SOFT_FLOAT case is not tested.  It's possible we need
additional atomic load support for this case but I think that's
unnecessary since hppa requires strict alignment.

We could use an array of locks in sync-libfuncs.c to improve
performance but I kept things simple for now.

Committed to trunk.

Dave
---

Fix support for atomic loads and stores on hppa.

This change updates the atomic libcall support to fix the following
issues:

1) A internal compiler error with -fno-sync-libcalls.
2) When sync libcalls are disabled, we don't generate libcalls for
   libatomic.
3) There is no sync libcall support for targets other than linux.
   As a result, non-atomic stores are silently emitted for types
   smaller or equal to the word size.  There are now a few atomic
   libcalls in the libgcc code, so we need sync support on all
   targets.

2023-01-13  John David Anglin  <danglin@gcc.gnu.org>

gcc/ChangeLog:

	* config/pa/pa-linux.h (TARGET_SYNC_LIBCALL): Delete define.
	* config/pa/pa.cc (pa_init_libfuncs): Use MAX_SYNC_LIBFUNC_SIZE
	define.
	* config/pa/pa.h (TARGET_SYNC_LIBCALLS): Use flag_sync_libcalls.
	(MAX_SYNC_LIBFUNC_SIZE): Define.
	(TARGET_CPU_CPP_BUILTINS): Define __SOFTFP__ when soft float is
	enabled.
	* config/pa/pa.md (atomic_storeqi): Emit __atomic_exchange_1
	libcall when sync libcalls are disabled.
	(atomic_storehi, atomic_storesi, atomic_storedi): Likewise.
	(atomic_loaddi): Emit __atomic_load_8 libcall when sync libcalls
	are disabled on 32-bit target.
	* config/pa/pa.opt (matomic-libcalls): New option.
	* doc/invoke.texi (HPPA Options): Update.

libgcc/ChangeLog:

	* config.host (hppa*64*-*-linux*): Adjust tmake_file to use
	pa/t-pa64-linux.
	(hppa*64*-*-hpux11*): Adjust tmake_file to use pa/t-pa64-hpux
	instead of pa/t-hpux and pa/t-pa64.
	* config/pa/linux-atomic.c: Define u32 type.
	(ATOMIC_LOAD): Define new macro to implement atomic_load_1,
	atomic_load_2, atomic_load_4 and atomic_load_8.  Update sync
	defines to use atomic_load calls for type.
	(SYNC_LOCK_LOAD_2): New macro to implement __sync_lock_load_8.
	* config/pa/sync-libfuncs.c: New file.
	* config/pa/t-netbsd (LIB2ADD_ST): Define.
	* config/pa/t-openbsd (LIB2ADD_ST): Define.
	* config/pa/t-pa64-hpux: New file.
	* config/pa/t-pa64-linux: New file.  

diff --git a/gcc/config/pa/pa-linux.h b/gcc/config/pa/pa-linux.h
index 5af11a1df80..1073f42bd6b 100644
--- a/gcc/config/pa/pa-linux.h
+++ b/gcc/config/pa/pa-linux.h
@@ -133,9 +133,6 @@ along with GCC; see the file COPYING3.  If not see
 #undef TARGET_GAS
 #define TARGET_GAS 1
 
-#undef TARGET_SYNC_LIBCALL
-#define TARGET_SYNC_LIBCALL 1
-
 /* The SYNC operations are implemented as library functions, not
    INSN patterns.  As a result, the HAVE defines for the patterns are
    not defined.  We need to define them to generate the corresponding
diff --git a/gcc/config/pa/pa.cc b/gcc/config/pa/pa.cc
index 9f43802075f..b43a91f2edb 100644
--- a/gcc/config/pa/pa.cc
+++ b/gcc/config/pa/pa.cc
@@ -5940,8 +5940,8 @@ pa_init_libfuncs (void)
 			"_U_Qfcnvxf_udbl_to_quad");
     }
 
-  if (TARGET_SYNC_LIBCALL)
-    init_sync_libfuncs (8);
+  if (TARGET_SYNC_LIBCALLS)
+    init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
 }
 
 /* HP's millicode routines mean something special to the assembler.
diff --git a/gcc/config/pa/pa.h b/gcc/config/pa/pa.h
index bafdf602138..93d6f53f97f 100644
--- a/gcc/config/pa/pa.h
+++ b/gcc/config/pa/pa.h
@@ -72,10 +72,12 @@ extern unsigned long total_code_bytes;
 #define HPUX_LONG_DOUBLE_LIBRARY 0
 #endif
 
-/* Linux kernel atomic operation support.  */
-#ifndef TARGET_SYNC_LIBCALL
-#define TARGET_SYNC_LIBCALL 0
-#endif
+/* Sync libcall support.  */
+#define TARGET_SYNC_LIBCALLS (flag_sync_libcalls)
+
+/* The maximum size of the sync library functions supported.  DImode
+   is supported on 32-bit targets using floating point loads and stores.  */
+#define MAX_SYNC_LIBFUNC_SIZE 8
 
 /* The following three defines are potential target switches.  The current
    defines are optimal given the current capabilities of GAS and GNU ld.  */
@@ -173,6 +175,8 @@ do {								\
        builtin_define("_PA_RISC1_0");				\
      if (HPUX_LONG_DOUBLE_LIBRARY)				\
        builtin_define("__SIZEOF_FLOAT128__=16");		\
+     if (TARGET_SOFT_FLOAT)					\
+       builtin_define("__SOFTFP__");				\
 } while (0)
 
 /* An old set of OS defines for various BSD-like systems.  */
diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md
index 41382271e54..71f391f2bf7 100644
--- a/gcc/config/pa/pa.md
+++ b/gcc/config/pa/pa.md
@@ -10360,7 +10360,23 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
 ;; doubleword loads and stores are not guaranteed to be atomic
 ;; when referencing the I/O address space.
 
-;; These patterns are at the bottom so the non atomic versions are preferred.
+;; Atomic and sync libcalls use different lock sets.  Great care is
+;; needed if both are used in a single application.
+
+;; Atomic load and store libcalls are enabled by the -matomic-libcalls
+;; option.  This option is not enabled by default as the generated
+;; libcalls depend on libatomic which is not built until the end of
+;; the gcc build.  For loads, we only need an atomic libcall for DImode.
+;; Sync libcalls are not generated when atomic libcalls are enabled.
+
+;; Sync libcalls are enabled by default when supported.  They can be
+;; disabled by the -fno-sync-libcalls option.  Sync libcalls always
+;; use a single memory store in their implementation, even for DImode.
+;; DImode stores are done using either std or fstd.  Thus, we only
+;; need a sync load libcall for DImode when we don't have an atomic
+;; processor load available for the mode (TARGET_SOFT_FLOAT).
+
+;; Implement atomic QImode store using exchange.
 
 (define_expand "atomic_storeqi"
   [(match_operand:QI 0 "memory_operand")                ;; memory
@@ -10368,19 +10384,30 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
    (match_operand:SI 2 "const_int_operand")]            ;; model
   ""
 {
-  if (TARGET_SYNC_LIBCALL)
+  rtx addr, libfunc;
+
+  if (TARGET_SYNC_LIBCALLS)
     {
-      rtx libfunc = optab_libfunc (sync_lock_test_and_set_optab, QImode);
-      rtx addr = convert_memory_address (Pmode, XEXP (operands[0], 0));
+      addr = convert_memory_address (Pmode, XEXP (operands[0], 0));
+      libfunc = optab_libfunc (sync_lock_test_and_set_optab, QImode);
+      emit_library_call (libfunc, LCT_NORMAL, VOIDmode, addr, Pmode,
+			 operands[1], QImode);
+      DONE;
+    }
 
+  if (TARGET_ATOMIC_LIBCALLS)
+    {
+      addr = convert_memory_address (Pmode, XEXP (operands[0], 0));
+      libfunc = init_one_libfunc ("__atomic_exchange_1");
       emit_library_call (libfunc, LCT_NORMAL, VOIDmode, addr, Pmode,
 			 operands[1], QImode);
       DONE;
     }
+
   FAIL;
 })
 
-;; Implement atomic HImode stores using exchange.
+;; Implement atomic HImode store using exchange.
 
 (define_expand "atomic_storehi"
   [(match_operand:HI 0 "memory_operand")                ;; memory
@@ -10388,15 +10415,26 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
    (match_operand:SI 2 "const_int_operand")]            ;; model
   ""
 {
-  if (TARGET_SYNC_LIBCALL)
+  rtx addr, libfunc;
+
+  if (TARGET_SYNC_LIBCALLS)
     {
-      rtx libfunc = optab_libfunc (sync_lock_test_and_set_optab, HImode);
-      rtx addr = convert_memory_address (Pmode, XEXP (operands[0], 0));
+      addr = convert_memory_address (Pmode, XEXP (operands[0], 0));
+      libfunc = optab_libfunc (sync_lock_test_and_set_optab, HImode);
+      emit_library_call (libfunc, LCT_NORMAL, VOIDmode, addr, Pmode,
+			 operands[1], HImode);
+      DONE;
+    }
 
+  if (TARGET_ATOMIC_LIBCALLS)
+    {
+      addr = convert_memory_address (Pmode, XEXP (operands[0], 0));
+      libfunc = init_one_libfunc ("__atomic_exchange_2");
       emit_library_call (libfunc, LCT_NORMAL, VOIDmode, addr, Pmode,
 			 operands[1], HImode);
       DONE;
     }
+
   FAIL;
 })
 
@@ -10408,20 +10446,39 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
    (match_operand:SI 2 "const_int_operand")]            ;; model
   ""
 {
-  if (TARGET_SYNC_LIBCALL)
+  rtx addr, libfunc;
+
+  if (TARGET_SYNC_LIBCALLS)
     {
-      rtx libfunc = optab_libfunc (sync_lock_test_and_set_optab, SImode);
-      rtx addr = convert_memory_address (Pmode, XEXP (operands[0], 0));
+      addr = convert_memory_address (Pmode, XEXP (operands[0], 0));
+      libfunc = optab_libfunc (sync_lock_test_and_set_optab, SImode);
+      emit_library_call (libfunc, LCT_NORMAL, VOIDmode, addr, Pmode,
+			 operands[1], SImode);
+      DONE;
+    }
 
+  if (TARGET_ATOMIC_LIBCALLS)
+    {
+      addr = convert_memory_address (Pmode, XEXP (operands[0], 0));
+      libfunc = init_one_libfunc ("__atomic_exchange_4");
       emit_library_call (libfunc, LCT_NORMAL, VOIDmode, addr, Pmode,
 			 operands[1], SImode);
       DONE;
     }
+
   FAIL;
 })
 
 ;; Implement atomic DImode load.
 
+;; We need an atomic or sync libcall whenever the processor load or
+;; store used for DImode is not atomic.  The 32-bit libatomic
+;; implementation uses a pair of stw instructions.  They are not
+;; atomic, so we need to call __atomic_load_8.  The linux libgcc
+;; sync implementation uses a std or fstd instruction.  They are
+;; atomic, so we only need to call __sync_load_8 when the load
+;; operation would not be atomic (e.g., 32-bit TARGET_SOFT_FLOAT).
+
 (define_expand "atomic_loaddi"
   [(match_operand:DI 0 "register_operand")              ;; val out
    (match_operand:DI 1 "memory_operand")                ;; memory
@@ -10429,12 +10486,35 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
   ""
 {
   enum memmodel model;
+  rtx addr, libfunc;
 
-  if (TARGET_64BIT || TARGET_SOFT_FLOAT)
+  if (TARGET_64BIT)
     FAIL;
 
+  if (TARGET_SYNC_LIBCALLS && MAX_SYNC_LIBFUNC_SIZE >= 8 && TARGET_SOFT_FLOAT)
+    {
+      addr = convert_memory_address (Pmode, XEXP (operands[1], 0));
+      libfunc = init_one_libfunc ("__sync_load_8");
+      emit_library_call_value (libfunc, operands[0], LCT_NORMAL, DImode,
+			       addr, Pmode);
+      DONE;
+    }
+
+  if (TARGET_ATOMIC_LIBCALLS && TARGET_SOFT_FLOAT)
+    {
+      addr = convert_memory_address (Pmode, XEXP (operands[1], 0));
+      libfunc = init_one_libfunc ("__atomic_load_8");
+      emit_library_call_value (libfunc, operands[0], LCT_NORMAL, DImode,
+			       addr, Pmode);
+      DONE;
+    }
+
+  if (TARGET_SOFT_FLOAT)
+    FAIL;
+
+  /* Fallback to processor load with barriers.  */
   model = memmodel_from_int (INTVAL (operands[2]));
-  operands[1] = force_reg (SImode, XEXP (operands[1], 0));
+  operands[1] = force_reg (Pmode, XEXP (operands[1], 0));
   if (is_mm_seq_cst (model))
     expand_mem_thread_fence (model);
   emit_insn (gen_atomic_loaddi_1 (operands[0], operands[1]));
@@ -10460,12 +10540,21 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
   ""
 {
   enum memmodel model;
+  rtx addr, libfunc;
 
-  if (TARGET_SYNC_LIBCALL)
+  if (TARGET_SYNC_LIBCALLS && MAX_SYNC_LIBFUNC_SIZE >= 8)
     {
-      rtx libfunc = optab_libfunc (sync_lock_test_and_set_optab, DImode);
-      rtx addr = convert_memory_address (Pmode, XEXP (operands[0], 0));
+      addr = convert_memory_address (Pmode, XEXP (operands[0], 0));
+      libfunc = optab_libfunc (sync_lock_test_and_set_optab, DImode);
+      emit_library_call (libfunc, LCT_NORMAL, VOIDmode, addr, Pmode,
+			 operands[1], DImode);
+      DONE;
+    }
 
+  if (TARGET_ATOMIC_LIBCALLS)
+    {
+      addr = convert_memory_address (Pmode, XEXP (operands[0], 0));
+      libfunc = init_one_libfunc ("__atomic_exchange_8");
       emit_library_call (libfunc, LCT_NORMAL, VOIDmode, addr, Pmode,
 			 operands[1], DImode);
       DONE;
@@ -10474,8 +10563,9 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
   if (TARGET_64BIT || TARGET_SOFT_FLOAT)
     FAIL;
 
+  /* Fallback to processor store with barriers.  */
   model = memmodel_from_int (INTVAL (operands[2]));
-  operands[0] = force_reg (SImode, XEXP (operands[0], 0));
+  operands[0] = force_reg (Pmode, XEXP (operands[0], 0));
   if (operands[1] != CONST0_RTX (DImode))
     operands[1] = force_reg (DImode, operands[1]);
   expand_mem_thread_fence (model);
diff --git a/gcc/config/pa/pa.opt b/gcc/config/pa/pa.opt
index 58b59f8f0e8..2d074f5fe88 100644
--- a/gcc/config/pa/pa.opt
+++ b/gcc/config/pa/pa.opt
@@ -37,6 +37,10 @@ march=2.0
 Target RejectNegative
 Generate PA2.0 code (requires binutils 2.10 or later).
 
+matomic-libcalls
+Target Var(TARGET_ATOMIC_LIBCALLS) Init(1)
+Generate libcalls for atomic loads and stores when sync libcalls are disabled.
+
 mbig-switch
 Target Ignore
 Does nothing.  Preserved for backward compatibility.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 701c228bd0a..474dcaed5dc 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -975,17 +975,18 @@ Objective-C and Objective-C++ Dialects}.
 
 @emph{HPPA Options}
 @gccoptlist{-march=@var{architecture-type} @gol
+-matomic-libcalls  -mbig-switch @gol
 -mcaller-copies  -mdisable-fpregs  -mdisable-indexing @gol
--mfast-indirect-calls  -mgas  -mgnu-ld   -mhp-ld @gol
+-mordered  -mfast-indirect-calls  -mgas  -mgnu-ld   -mhp-ld @gol
 -mfixed-range=@var{register-range} @gol
--mjump-in-delay  -mlinker-opt  -mlong-calls @gol
--mlong-load-store  -mno-disable-fpregs @gol
+-mcoherent-ldcw -mjump-in-delay  -mlinker-opt  -mlong-calls @gol
+-mlong-load-store  -mno-atomic-libcalls  -mno-disable-fpregs @gol
 -mno-disable-indexing  -mno-fast-indirect-calls  -mno-gas @gol
 -mno-jump-in-delay  -mno-long-load-store @gol
 -mno-portable-runtime  -mno-soft-float @gol
 -mno-space-regs  -msoft-float  -mpa-risc-1-0 @gol
 -mpa-risc-1-1  -mpa-risc-2-0  -mportable-runtime @gol
--mschedule=@var{cpu-type}  -mspace-regs  -msio  -mwsio @gol
+-mschedule=@var{cpu-type}  -mspace-regs  -msoft-mult  -msio  -mwsio @gol
 -munix=@var{unix-std}  -nolibdld  -static  -threads}
 
 @emph{IA-64 Options}
@@ -24891,6 +24892,33 @@ other way around.
 @opindex mpa-risc-2-0
 Synonyms for @option{-march=1.0}, @option{-march=1.1}, and @option{-march=2.0} respectively.
 
+@item -matomic-libcalls
+@opindex matomic-libcalls
+@opindex mno-atomic-libcalls
+Generate libcalls for atomic loads and stores when sync libcalls are disabled.
+This option is enabled by default.  It only affects the generation of
+atomic libcalls by the HPPA backend.
+
+Both the sync and @file{libatomic} libcall implementations use locking.
+As a result, processor stores are not atomic with respect to other
+atomic operations.  Processor loads up to DImode are atomic with
+respect to other atomic operations provided they are implemented as
+a single access.
+
+The PA-RISC architecture does not support any atomic operations in
+hardware except for the @code{ldcw} instruction.  Thus, all atomic
+support is implemented using sync and atomic libcalls.  Sync libcall
+support is in @file{libgcc.a}.  Atomic libcall support is in
+@file{libatomic}.
+
+This option generates @code{__atomic_exchange} calls for atomic stores.
+It also provides special handling for atomic DImode accesses on 32-bit
+targets.
+
+@item -mbig-switch
+@opindex mbig-switch
+Does nothing.  Preserved for backward compatibility.
+
 @item -mcaller-copies
 @opindex mcaller-copies
 The caller copies function arguments passed by hidden reference.  This
@@ -24899,30 +24927,19 @@ option should be used with care as it is not compatible with the default
 passed by hidden reference and the option provides better compatibility
 with OpenMP.
 
-@item -mjump-in-delay
-@opindex mjump-in-delay
-This option is ignored and provided for compatibility purposes only.
+@item -mcoherent-ldcw
+@opindex mcoherent-ldcw
+Use ldcw/ldcd coherent cache-control hint.
 
 @item -mdisable-fpregs
 @opindex mdisable-fpregs
-Prevent floating-point registers from being used in any manner.  This is
-necessary for compiling kernels that perform lazy context switching of
-floating-point registers.  If you use this option and attempt to perform
-floating-point operations, the compiler aborts.
+Disable floating-point registers.  Equivalent to @code{-msoft-float}.
 
 @item -mdisable-indexing
 @opindex mdisable-indexing
 Prevent the compiler from using indexing address modes.  This avoids some
 rather obscure problems when compiling MIG generated code under MACH@.
 
-@item -mno-space-regs
-@opindex mno-space-regs
-@opindex mspace-regs
-Generate code that assumes the target has no space registers.  This allows
-GCC to generate faster indirect calls and use unscaled index address modes.
-
-Such code is suitable for level 0 PA systems and kernels.
-
 @item -mfast-indirect-calls
 @opindex mfast-indirect-calls
 Generate code that assumes calls never cross space boundaries.  This
@@ -24939,57 +24956,10 @@ useful when compiling kernel code.  A register range is specified as
 two registers separated by a dash.  Multiple register ranges can be
 specified separated by a comma.
 
-@item -mlong-load-store
-@opindex mlong-load-store
-Generate 3-instruction load and store sequences as sometimes required by
-the HP-UX 10 linker.  This is equivalent to the @samp{+k} option to
-the HP compilers.
-
-@item -mportable-runtime
-@opindex mportable-runtime
-Use the portable calling conventions proposed by HP for ELF systems.
-
 @item -mgas
 @opindex mgas
 Enable the use of assembler directives only GAS understands.
 
-@item -mschedule=@var{cpu-type}
-@opindex mschedule
-Schedule code according to the constraints for the machine type
-@var{cpu-type}.  The choices for @var{cpu-type} are @samp{700}
-@samp{7100}, @samp{7100LC}, @samp{7200}, @samp{7300} and @samp{8000}.  Refer
-to @file{/usr/lib/sched.models} on an HP-UX system to determine the
-proper scheduling option for your machine.  The default scheduling is
-@samp{8000}.
-
-@item -mlinker-opt
-@opindex mlinker-opt
-Enable the optimization pass in the HP-UX linker.  Note this makes symbolic
-debugging impossible.  It also triggers a bug in the HP-UX 8 and HP-UX 9
-linkers in which they give bogus error messages when linking some programs.
-
-@item -msoft-float
-@opindex msoft-float
-Generate output containing library calls for floating point.
-@strong{Warning:} the requisite libraries are not available for all HPPA
-targets.  Normally the facilities of the machine's usual C compiler are
-used, but this cannot be done directly in cross-compilation.  You must make
-your own arrangements to provide suitable library functions for
-cross-compilation.
-
-@option{-msoft-float} changes the calling convention in the output file;
-therefore, it is only useful if you compile @emph{all} of a program with
-this option.  In particular, you need to compile @file{libgcc.a}, the
-library that comes with GCC, with @option{-msoft-float} in order for
-this to work.
-
-@item -msio
-@opindex msio
-Generate the predefine, @code{_SIO}, for server IO@.  The default is
-@option{-mwsio}.  This generates the predefines, @code{__hp9000s700},
-@code{__hp9000s700__} and @code{_WSIO}, for workstation IO@.  These
-options are available under HP-UX and HI-UX@.
-
 @item -mgnu-ld
 @opindex mgnu-ld
 Use options specific to GNU @command{ld}.
@@ -25019,6 +24989,12 @@ configure option, GCC's program search path, and finally by the user's
 `gcc -print-prog-name=ld`}.  This option is only available on the 64-bit
 HP-UX GCC, i.e.@: configured with @samp{hppa*64*-*-hpux*}.
 
+@item -mlinker-opt
+@opindex mlinker-opt
+Enable the optimization pass in the HP-UX linker.  Note this makes symbolic
+debugging impossible.  It also triggers a bug in the HP-UX 8 and HP-UX 9
+linkers in which they give bogus error messages when linking some programs.
+
 @item -mlong-calls
 @opindex mno-long-calls
 @opindex mlong-calls
@@ -25047,6 +25023,69 @@ symbol-difference or pc-relative calls should be relatively small.
 However, an indirect call is used on 32-bit ELF systems in pic code
 and it is quite long.
 
+@item -mlong-load-store
+@opindex mlong-load-store
+Generate 3-instruction load and store sequences as sometimes required by
+the HP-UX 10 linker.  This is equivalent to the @samp{+k} option to
+the HP compilers.
+
+@item -mjump-in-delay
+@opindex mjump-in-delay
+This option is ignored and provided for compatibility purposes only.
+
+@item -mno-space-regs
+@opindex mno-space-regs
+@opindex mspace-regs
+Generate code that assumes the target has no space registers.  This allows
+GCC to generate faster indirect calls and use unscaled index address modes.
+
+Such code is suitable for level 0 PA systems and kernels.
+
+@item -mordered
+@opindex mordered
+Assume memory references are ordered and barriers are not needed.
+
+@item -mportable-runtime
+@opindex mportable-runtime
+Use the portable calling conventions proposed by HP for ELF systems.
+
+@item -mschedule=@var{cpu-type}
+@opindex mschedule
+Schedule code according to the constraints for the machine type
+@var{cpu-type}.  The choices for @var{cpu-type} are @samp{700}
+@samp{7100}, @samp{7100LC}, @samp{7200}, @samp{7300} and @samp{8000}.  Refer
+to @file{/usr/lib/sched.models} on an HP-UX system to determine the
+proper scheduling option for your machine.  The default scheduling is
+@samp{8000}.
+
+@item -msio
+@opindex msio
+Generate the predefine, @code{_SIO}, for server IO@.  The default is
+@option{-mwsio}.  This generates the predefines, @code{__hp9000s700},
+@code{__hp9000s700__} and @code{_WSIO}, for workstation IO@.  These
+options are available under HP-UX and HI-UX@.
+
+@item -msoft-float
+@opindex msoft-float
+Generate output containing library calls for floating point.
+@strong{Warning:} the requisite libraries are not available for all HPPA
+targets.  Normally the facilities of the machine's usual C compiler are
+used, but this cannot be done directly in cross-compilation.  You must make
+your own arrangements to provide suitable library functions for
+cross-compilation.
+
+@option{-msoft-float} changes the calling convention in the output file;
+therefore, it is only useful if you compile @emph{all} of a program with
+this option.  In particular, you need to compile @file{libgcc.a}, the
+library that comes with GCC, with @option{-msoft-float} in order for
+this to work.
+
+@item -msoft-mult
+@opindex msoft-mult
+Use software integer multiplication.
+
+This disables the use of the @code{xmpyu} instruction.
+
 @item -munix=@var{unix-std}
 @opindex march
 Generate compiler predefines and select a startfile for the specified
diff --git a/libgcc/config.host b/libgcc/config.host
index d2087654c40..3e2c9109ab1 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -633,7 +633,7 @@ h8300-*-linux*)
 	tm_file="$tm_file h8300/h8300-lib.h"
 	;;
 hppa*64*-*-linux*)
-	tmake_file="$tmake_file pa/t-linux64 pa/t-dimode"
+	tmake_file="$tmake_file pa/t-pa64-linux pa/t-dimode"
 	tmake_file="$tmake_file pa/t-softfp-sfdftf t-softfp"
 	extra_parts="crtbegin.o crtbeginS.o crtbeginT.o crtend.o crtendS.o"
 	;;
@@ -649,7 +649,7 @@ hppa*-*-linux*)
 	md_unwind_header=pa/linux-unwind.h
 	;;
 hppa*64*-*-hpux11*)
-	tmake_file="$tmake_file pa/t-hpux pa/t-pa64 pa/t-dimode"
+	tmake_file="$tmake_file pa/t-pa64-hpux pa/t-dimode"
 	tmake_file="$tmake_file pa/t-stublib t-libgcc-pic t-slibgcc"
 	# Set the libgcc version number
 	if test x$ac_cv_sjlj_exceptions = xyes; then
diff --git a/libgcc/config/pa/linux-atomic.c b/libgcc/config/pa/linux-atomic.c
index 10d7f4217f5..1978e681f77 100644
--- a/libgcc/config/pa/linux-atomic.c
+++ b/libgcc/config/pa/linux-atomic.c
@@ -32,6 +32,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 
 typedef unsigned char u8;
 typedef short unsigned int u16;
+typedef unsigned int u32;
 #ifdef __LP64__
 typedef long unsigned int u64;
 #else
@@ -115,6 +116,36 @@ __kernel_cmpxchg2 (volatile void *mem, const void *oldval, const void *newval,
 #define MASK_1 0xffu
 #define MASK_2 0xffffu
 
+/* Load value with an atomic processor load if possible.  */
+#define ATOMIC_LOAD(TYPE, WIDTH)					\
+  static inline TYPE							\
+  atomic_load_##WIDTH (volatile void *ptr)				\
+  {									\
+    return *(volatile TYPE *)ptr;					\
+  }
+
+#if defined(__LP64__) || defined(__SOFTFP__)
+ATOMIC_LOAD (u64, 8)
+#else
+static inline u64
+atomic_load_8 (volatile void *ptr)
+{
+  u64 result;
+  double tmp;
+
+  asm volatile ("{fldds|fldd} 0(%2),%1\n\t"
+		"{fstds|fstd} %1,-16(%%sp)\n\t"
+		"{ldws|ldw} -16(%%sp),%0\n\t"
+		"{ldws|ldw} -12(%%sp),%R0"
+		: "=r" (result), "=f" (tmp) : "r" (ptr): "memory");
+  return result;
+}
+#endif
+
+ATOMIC_LOAD (u32, 4)
+ATOMIC_LOAD (u16, 2)
+ATOMIC_LOAD (u8, 1)
+
 #define FETCH_AND_OP_2(OP, PFX_OP, INF_OP, TYPE, WIDTH, INDEX)		\
   TYPE HIDDEN								\
   __sync_fetch_and_##OP##_##WIDTH (volatile void *ptr, TYPE val)	\
@@ -123,7 +154,7 @@ __kernel_cmpxchg2 (volatile void *mem, const void *oldval, const void *newval,
     long failure;							\
 									\
     do {								\
-      tmp = __atomic_load_n ((volatile TYPE *)ptr, __ATOMIC_RELAXED);	\
+      tmp = atomic_load_##WIDTH ((volatile TYPE *)ptr);			\
       newval = PFX_OP (tmp INF_OP val);					\
       failure = __kernel_cmpxchg2 (ptr, &tmp, &newval, INDEX);		\
     } while (failure != 0);						\
@@ -160,7 +191,7 @@ FETCH_AND_OP_2 (nand, ~, &, u8, 1, 0)
     long failure;							\
 									\
     do {								\
-      tmp = __atomic_load_n ((volatile TYPE *)ptr, __ATOMIC_RELAXED);	\
+      tmp = atomic_load_##WIDTH ((volatile TYPE *)ptr);			\
       newval = PFX_OP (tmp INF_OP val);					\
       failure = __kernel_cmpxchg2 (ptr, &tmp, &newval, INDEX);		\
     } while (failure != 0);						\
@@ -197,8 +228,7 @@ OP_AND_FETCH_2 (nand, ~, &, u8, 1, 0)
     long failure;							\
 									\
     do {								\
-      tmp = __atomic_load_n ((volatile unsigned int *)ptr,		\
-			     __ATOMIC_RELAXED);				\
+      tmp = atomic_load_4 ((volatile unsigned int *)ptr);		\
       failure = __kernel_cmpxchg (ptr, tmp, PFX_OP (tmp INF_OP val));	\
     } while (failure != 0);						\
 									\
@@ -220,8 +250,7 @@ FETCH_AND_OP_WORD (nand, ~, &)
     long failure;							\
 									\
     do {								\
-      tmp = __atomic_load_n ((volatile unsigned int *)ptr,		\
-			     __ATOMIC_RELAXED);				\
+      tmp = atomic_load_4 ((volatile unsigned int *)ptr);		\
       failure = __kernel_cmpxchg (ptr, tmp, PFX_OP (tmp INF_OP val));	\
     } while (failure != 0);						\
 									\
@@ -247,8 +276,7 @@ typedef unsigned char bool;
 									\
     while (1)								\
       {									\
-	actual_oldval = __atomic_load_n ((volatile TYPE *)ptr,		\
-					 __ATOMIC_RELAXED);		\
+	actual_oldval = atomic_load_##WIDTH ((volatile TYPE *)ptr);	\
 									\
 	if (__builtin_expect (oldval != actual_oldval, 0))		\
 	  return actual_oldval;						\
@@ -281,8 +309,7 @@ __sync_val_compare_and_swap_4 (volatile void *ptr, unsigned int oldval,
     
   while (1)
     {
-      actual_oldval = __atomic_load_n ((volatile unsigned int *)ptr,
-				       __ATOMIC_RELAXED);
+      actual_oldval = atomic_load_4 ((volatile unsigned int *)ptr);
 
       if (__builtin_expect (oldval != actual_oldval, 0))
 	return actual_oldval;
@@ -310,8 +337,7 @@ TYPE HIDDEN								\
     long failure;							\
 									\
     do {								\
-      oldval = __atomic_load_n ((volatile TYPE *)ptr,			\
-				__ATOMIC_RELAXED);			\
+      oldval = atomic_load_##WIDTH ((volatile TYPE *)ptr);		\
       failure = __kernel_cmpxchg2 (ptr, &oldval, &val, INDEX);		\
     } while (failure != 0);						\
 									\
@@ -322,14 +348,14 @@ SYNC_LOCK_TEST_AND_SET_2 (u64, 8, 3)
 SYNC_LOCK_TEST_AND_SET_2 (u16, 2, 1)
 SYNC_LOCK_TEST_AND_SET_2 (u8, 1, 0)
 
-unsigned int HIDDEN
+u32 HIDDEN
 __sync_lock_test_and_set_4 (volatile void *ptr, unsigned int val)
 {
   long failure;
   unsigned int oldval;
 
   do {
-    oldval = __atomic_load_n ((volatile unsigned int *)ptr, __ATOMIC_RELAXED);
+    oldval = atomic_load_4 ((volatile unsigned int *)ptr);
     failure = __kernel_cmpxchg (ptr, oldval, val);
   } while (failure != 0);
 
@@ -344,8 +370,7 @@ __sync_lock_test_and_set_4 (volatile void *ptr, unsigned int val)
     long failure;						\
 								\
     do {							\
-      oldval = __atomic_load_n ((volatile TYPE *)ptr,		\
-				__ATOMIC_RELAXED);		\
+      oldval = atomic_load_##WIDTH ((volatile TYPE *)ptr);	\
       failure = __kernel_cmpxchg2 (ptr, &oldval, &val, INDEX);	\
     } while (failure != 0);					\
   }
@@ -361,7 +386,27 @@ __sync_lock_release_4 (volatile void *ptr)
   unsigned int oldval;
 
   do {
-    oldval = __atomic_load_n ((volatile unsigned int *)ptr, __ATOMIC_RELAXED);
+    oldval = atomic_load_4 ((volatile unsigned int *)ptr);
     failure = __kernel_cmpxchg (ptr, oldval, 0);
   } while (failure != 0);
 }
+
+#ifndef __LP64__
+#define SYNC_LOCK_LOAD_2(TYPE, WIDTH, INDEX)				\
+  TYPE __sync_lock_load_##WIDTH (volatile void *) HIDDEN;		\
+  TYPE									\
+  __sync_lock_load_##WIDTH (volatile void *ptr)				\
+  {									\
+    TYPE oldval;							\
+    long failure;							\
+									\
+    do {								\
+      oldval = atomic_load_##WIDTH ((volatile TYPE *)ptr);		\
+      failure = __kernel_cmpxchg2 (ptr, &oldval, &oldval, INDEX);	\
+    } while (failure != 0);						\
+									\
+    return oldval;							\
+  }
+
+SYNC_LOCK_LOAD_2 (u64, 8, 3)
+#endif
diff --git a/libgcc/config/pa/sync-libfuncs.c b/libgcc/config/pa/sync-libfuncs.c
new file mode 100644
index 00000000000..c70be0fde73
--- /dev/null
+++ b/libgcc/config/pa/sync-libfuncs.c
@@ -0,0 +1,324 @@
+/* PA-RISC sync libfunc support.
+   Copyright (C) 2008-2023 Free Software Foundation, Inc.
+   Based on code contributed by CodeSourcery for ARM EABI Linux.
+   Modifications for PA Linux by Helge Deller <deller@gmx.de>
+   Revised for general use by John David Anglin <danglin@gcc.gnu.org>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+typedef unsigned char u8;
+typedef short unsigned int u16;
+typedef unsigned int u32;
+#ifdef __LP64__
+typedef long unsigned int u64;
+#else
+typedef long long unsigned int u64;
+#endif
+
+/* PA-RISC 2.0 supports out-of-order execution for loads and stores.
+   Thus, we need to synchonize memory accesses.  For more info, see:
+   "Advanced Performance Features of the 64-bit PA-8000" by Doug Hunt.  */
+
+typedef volatile int __attribute__((aligned (16))) ldcw_t;
+static ldcw_t __atomicity_lock = 1;
+
+/* We want default visibility for the sync routines.  */
+#undef VISIBILITY
+#if defined(__hpux__) && !defined(__LP64__)
+#define VISIBILITY
+#else
+#define VISIBILITY __attribute__ ((visibility ("default")))
+#endif
+
+/* Perform ldcw operation in cache when possible.  The ldcw instruction
+   is a full barrier.  */
+#ifndef _PA_LDCW_INSN
+# ifdef _PA_RISC2_0
+# define _PA_LDCW_INSN "ldcw,co"
+# else
+# define _PA_LDCW_INSN "ldcw"
+# endif
+#endif
+
+static inline void
+__sync_spin_lock (void)
+{
+  ldcw_t *lock = &__atomicity_lock;
+  int tmp;
+
+  __asm__ __volatile__ (_PA_LDCW_INSN " 0(%1),%0\n\t"
+			"cmpib,<>,n 0,%0,.+20\n\t"
+			"ldw,ma 0(%1),%0\n\t"
+			"cmpib,<> 0,%0,.-12\n\t"
+			"nop\n\t"
+			"b,n .-12"
+			: "=&r" (tmp)
+			: "r" (lock)
+			: "memory");
+}
+
+static inline void
+__sync_spin_unlock (void)
+{
+  ldcw_t *lock = &__atomicity_lock;
+  int tmp = 1;
+
+  /* Use ordered store for release.  */
+  __asm__ __volatile__ ("stw,ma %1,0(%0)"
+			: : "r" (lock), "r" (tmp) : "memory");
+}
+
+/* Load value with an atomic processor load if possible.  */
+#define ATOMIC_LOAD(TYPE, WIDTH)					\
+  static inline TYPE							\
+  atomic_load_##WIDTH (volatile void *ptr)				\
+  {									\
+    return *(volatile TYPE *)ptr;					\
+  }
+
+#if defined(__LP64__) || defined(__SOFTFP__)
+ATOMIC_LOAD (u64, 8)
+#else
+static inline u64
+atomic_load_8 (volatile void *ptr)
+{
+  u64 result;
+  double tmp;
+
+  asm volatile ("{fldds|fldd} 0(%2),%1\n\t"
+		"{fstds|fstd} %1,-16(%%sp)\n\t"
+		"{ldws|ldw} -16(%%sp),%0\n\t"
+		"{ldws|ldw} -12(%%sp),%R0"
+		: "=r" (result), "=f" (tmp) : "r" (ptr): "memory");
+  return result;
+}
+#endif
+
+ATOMIC_LOAD (u32, 4)
+ATOMIC_LOAD (u16, 2)
+ATOMIC_LOAD (u8, 1)
+
+/* Store value with an atomic processor store if possible.  */
+#define ATOMIC_STORE(TYPE, WIDTH)					\
+  static inline void							\
+  atomic_store_##WIDTH (volatile void *ptr, TYPE value)			\
+  {									\
+    *(volatile TYPE *)ptr = value;					\
+  }
+
+#if defined(__LP64__) || defined(__SOFTFP__)
+ATOMIC_STORE (u64, 8)
+#else
+static inline void
+atomic_store_8 (volatile void *ptr, u64 value)
+{
+  double tmp;
+
+  asm volatile ("stws|stw} %2,-16(%%sp)\n\t"
+		"{stws|stw} %R2,-12(%%sp)\n\t"
+		"{fldds|fldd} -16(%%sp),%1\n\t"
+		"{fstds|fstd} %1,0(%0)"
+		: "=m" (ptr), "=&f" (tmp) : "r" (value): "memory");
+}
+#endif
+
+ATOMIC_STORE (u32, 4)
+ATOMIC_STORE (u16, 2)
+ATOMIC_STORE (u8, 1)
+
+#define FETCH_AND_OP(OP, PFX_OP, INF_OP, TYPE, WIDTH)			\
+  TYPE VISIBILITY							\
+  __sync_fetch_and_##OP##_##WIDTH (volatile void *ptr, TYPE val)	\
+  {									\
+    TYPE tmp, newval;							\
+									\
+    __sync_spin_lock();							\
+    tmp = atomic_load_##WIDTH (ptr);					\
+    newval = PFX_OP (tmp INF_OP val);					\
+    atomic_store_##WIDTH (ptr, newval);					\
+    __sync_spin_unlock();						\
+									\
+    return tmp;								\
+  }
+
+FETCH_AND_OP (add,   , +, u64, 8)
+FETCH_AND_OP (sub,   , -, u64, 8)
+FETCH_AND_OP (or,    , |, u64, 8)
+FETCH_AND_OP (and,   , &, u64, 8)
+FETCH_AND_OP (xor,   , ^, u64, 8)
+FETCH_AND_OP (nand, ~, &, u64, 8)
+
+FETCH_AND_OP (add,   , +, u32, 4)
+FETCH_AND_OP (sub,   , -, u32, 4)
+FETCH_AND_OP (or,    , |, u32, 4)
+FETCH_AND_OP (and,   , &, u32, 4)
+FETCH_AND_OP (xor,   , ^, u32, 4)
+FETCH_AND_OP (nand, ~, &, u32, 4)
+
+FETCH_AND_OP (add,   , +, u16, 2)
+FETCH_AND_OP (sub,   , -, u16, 2)
+FETCH_AND_OP (or,    , |, u16, 2)
+FETCH_AND_OP (and,   , &, u16, 2)
+FETCH_AND_OP (xor,   , ^, u16, 2)
+FETCH_AND_OP (nand, ~, &, u16, 2)
+
+FETCH_AND_OP (add,   , +, u8, 1)
+FETCH_AND_OP (sub,   , -, u8, 1)
+FETCH_AND_OP (or,    , |, u8, 1)
+FETCH_AND_OP (and,   , &, u8, 1)
+FETCH_AND_OP (xor,   , ^, u8, 1)
+FETCH_AND_OP (nand, ~, &, u8, 1)
+
+#define OP_AND_FETCH(OP, PFX_OP, INF_OP, TYPE, WIDTH)			\
+  TYPE VISIBILITY 							\
+  __sync_##OP##_and_fetch_##WIDTH (volatile void *ptr, TYPE val)	\
+  {									\
+    TYPE tmp, newval;							\
+									\
+    __sync_spin_lock();							\
+    tmp = atomic_load_##WIDTH (ptr);					\
+    newval = PFX_OP (tmp INF_OP val);					\
+    atomic_store_##WIDTH (ptr, newval);					\
+    __sync_spin_unlock();						\
+									\
+    return newval;							\
+  }
+
+OP_AND_FETCH (add,   , +, u64, 8)
+OP_AND_FETCH (sub,   , -, u64, 8)
+OP_AND_FETCH (or,    , |, u64, 8)
+OP_AND_FETCH (and,   , &, u64, 8)
+OP_AND_FETCH (xor,   , ^, u64, 8)
+OP_AND_FETCH (nand, ~, &, u64, 8)
+
+OP_AND_FETCH (add,   , +, u32, 4)
+OP_AND_FETCH (sub,   , -, u32, 4)
+OP_AND_FETCH (or,    , |, u32, 4)
+OP_AND_FETCH (and,   , &, u32, 4)
+OP_AND_FETCH (xor,   , ^, u32, 4)
+OP_AND_FETCH (nand, ~, &, u32, 4)
+
+OP_AND_FETCH (add,   , +, u16, 2)
+OP_AND_FETCH (sub,   , -, u16, 2)
+OP_AND_FETCH (or,    , |, u16, 2)
+OP_AND_FETCH (and,   , &, u16, 2)
+OP_AND_FETCH (xor,   , ^, u16, 2)
+OP_AND_FETCH (nand, ~, &, u16, 2)
+
+OP_AND_FETCH (add,   , +, u8, 1)
+OP_AND_FETCH (sub,   , -, u8, 1)
+OP_AND_FETCH (or,    , |, u8, 1)
+OP_AND_FETCH (and,   , &, u8, 1)
+OP_AND_FETCH (xor,   , ^, u8, 1)
+OP_AND_FETCH (nand, ~, &, u8, 1)
+
+#define COMPARE_AND_SWAP(TYPE, WIDTH)					\
+  TYPE VISIBILITY 							\
+  __sync_val_compare_and_swap_##WIDTH (volatile void *ptr, TYPE oldval,	\
+				       TYPE newval)			\
+  {									\
+    TYPE actual_oldval;							\
+									\
+    __sync_spin_lock();							\
+    actual_oldval = atomic_load_##WIDTH (ptr);				\
+    if (actual_oldval == oldval)					\
+      atomic_store_##WIDTH (ptr, newval);				\
+    __sync_spin_unlock();						\
+									\
+    return actual_oldval;						\
+  }									\
+									\
+  _Bool VISIBILITY							\
+  __sync_bool_compare_and_swap_##WIDTH (volatile void *ptr,		\
+					TYPE oldval, TYPE newval)	\
+  {									\
+    TYPE actual_oldval;							\
+    _Bool result;							\
+									\
+    __sync_spin_lock();							\
+    actual_oldval = atomic_load_##WIDTH (ptr);				\
+    result = (actual_oldval == oldval);					\
+    if (result)								\
+      atomic_store_##WIDTH (ptr, newval);				\
+    __sync_spin_unlock();						\
+									\
+    return result;							\
+  }
+
+COMPARE_AND_SWAP (u64, 8)
+COMPARE_AND_SWAP (u32, 4)
+COMPARE_AND_SWAP (u16, 2)
+COMPARE_AND_SWAP (u8, 1)
+
+#define SYNC_LOCK_TEST_AND_SET(TYPE, WIDTH)				\
+TYPE VISIBILITY 							\
+  __sync_lock_test_and_set_##WIDTH (volatile void *ptr, TYPE val)	\
+  {									\
+    TYPE oldval;							\
+									\
+    __sync_spin_lock();							\
+    oldval = atomic_load_##WIDTH (ptr);					\
+    atomic_store_##WIDTH (ptr, val);					\
+    __sync_spin_unlock();						\
+									\
+    return oldval;							\
+  }
+
+SYNC_LOCK_TEST_AND_SET (u64, 8)
+SYNC_LOCK_TEST_AND_SET (u32, 4)
+SYNC_LOCK_TEST_AND_SET (u16, 2)
+SYNC_LOCK_TEST_AND_SET (u8, 1)
+
+#define SYNC_LOCK_RELEASE(TYPE, WIDTH)				\
+  void VISIBILITY						\
+  __sync_lock_release_##WIDTH (volatile void *ptr)		\
+  {								\
+    TYPE val = 0;						\
+								\
+    __sync_spin_lock();						\
+    atomic_store_##WIDTH (ptr, val);				\
+    __sync_spin_unlock();					\
+  }
+
+SYNC_LOCK_RELEASE (u64, 8)
+SYNC_LOCK_RELEASE (u32, 4)
+SYNC_LOCK_RELEASE (u16, 2)
+SYNC_LOCK_RELEASE (u8, 1)
+
+#define SYNC_LOCK_LOAD(TYPE, WIDTH)					\
+TYPE VISIBILITY __sync_lock_load_##WIDTH (volatile void *); 		\
+TYPE VISIBILITY 							\
+  __sync_lock_load_##WIDTH (volatile void *ptr)				\
+  {									\
+    TYPE oldval;							\
+									\
+    __sync_spin_lock();							\
+    oldval = atomic_load_##WIDTH (ptr);					\
+    __sync_spin_unlock();						\
+									\
+    return oldval;							\
+  }
+
+SYNC_LOCK_LOAD (u64, 8)
+SYNC_LOCK_LOAD (u32, 4)
+SYNC_LOCK_LOAD (u16, 2)
+SYNC_LOCK_LOAD (u8, 1)
diff --git a/libgcc/config/pa/t-netbsd b/libgcc/config/pa/t-netbsd
index 8b99068ce42..13943940a37 100644
--- a/libgcc/config/pa/t-netbsd
+++ b/libgcc/config/pa/t-netbsd
@@ -7,3 +7,4 @@ LIB1ASMFUNCS = _divI _divU _remI _remU _div_const _mulI _dyncall
 HOST_LIBGCC2_CFLAGS += -DELF=1 -DLINUX=1
 
 LIB2ADD = $(srcdir)/config/pa/fptr.c
+LIB2ADD_ST = $(srcdir)/config/pa/sync-libfuncs.c
diff --git a/libgcc/config/pa/t-openbsd b/libgcc/config/pa/t-openbsd
index 8b99068ce42..13943940a37 100644
--- a/libgcc/config/pa/t-openbsd
+++ b/libgcc/config/pa/t-openbsd
@@ -7,3 +7,4 @@ LIB1ASMFUNCS = _divI _divU _remI _remU _div_const _mulI _dyncall
 HOST_LIBGCC2_CFLAGS += -DELF=1 -DLINUX=1
 
 LIB2ADD = $(srcdir)/config/pa/fptr.c
+LIB2ADD_ST = $(srcdir)/config/pa/sync-libfuncs.c
diff --git a/libgcc/config/pa/t-pa64-hpux b/libgcc/config/pa/t-pa64-hpux
new file mode 100644
index 00000000000..55194e8f379
--- /dev/null
+++ b/libgcc/config/pa/t-pa64-hpux
@@ -0,0 +1,4 @@
+LIB2ADD = $(srcdir)/config/pa/quadlib.c
+LIB2ADD_ST = $(srcdir)/config/pa/sync-libfuncs.c
+
+HOST_LIBGCC2_CFLAGS += -frandom-seed=fixed-seed -Dpa64=1 -DELF=1 -mlong-calls
diff --git a/libgcc/config/pa/t-pa64-linux b/libgcc/config/pa/t-pa64-linux
new file mode 100644
index 00000000000..026b48b02e5
--- /dev/null
+++ b/libgcc/config/pa/t-pa64-linux
@@ -0,0 +1,8 @@
+# Plug millicode routines into libgcc.a  We want these on both native and
+# cross compiles.
+LIB1ASMSRC = pa/milli64.S
+LIB1ASMFUNCS = _divI _divU _remI _remU _div_const _mulI
+
+HOST_LIBGCC2_CFLAGS += -Dpa64=1 -DELF=1 -DLINUX=1
+
+LIB2ADD_ST = $(srcdir)/config/pa/linux-atomic.c