From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1665) id BB5863858C60; Sun, 19 Dec 2021 23:49:53 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org BB5863858C60 MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: =?utf-8?q?Fran=E0=A4=A5=E0=A4=88ois-Xavier_Coudert?= To: gcc-cvs@gcc.gnu.org Subject: [gcc r12-6069] Fortran: add support for IEEE intrinsics on aarch64 non-glibc targets X-Act-Checkin: gcc X-Git-Author: Francois-Xavier Coudert X-Git-Refname: refs/heads/master X-Git-Oldrev: 78fe0f23c73bf3b5d49dd2992d445bcc5db5a55c X-Git-Newrev: 220b9bdfe8faebdd2aea0ab7cea81c162d42d8e0 Message-Id: <20211219234953.BB5863858C60@sourceware.org> Date: Sun, 19 Dec 2021 23:49:53 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 19 Dec 2021 23:49:53 -0000 https://gcc.gnu.org/g:220b9bdfe8faebdd2aea0ab7cea81c162d42d8e0 commit r12-6069-g220b9bdfe8faebdd2aea0ab7cea81c162d42d8e0 Author: Francois-Xavier Coudert Date: Mon Dec 20 00:45:31 2021 +0100 Fortran: add support for IEEE intrinsics on aarch64 non-glibc targets This enables IEEE support on the upcoming aarch64-apple-darwin target, and has been tested for some time in an external port. libgfortran/ChangeLog: * configure.host: Add aarch64-apple-darwin support. * config/fpu-aarch64.h: New file. Diff: --- libgfortran/config/fpu-aarch64.h | 331 +++++++++++++++++++++++++++++++++++++++ libgfortran/configure.host | 18 ++- 2 files changed, 346 insertions(+), 3 deletions(-) diff --git a/libgfortran/config/fpu-aarch64.h b/libgfortran/config/fpu-aarch64.h new file mode 100644 index 00000000000..0746f42938a --- /dev/null +++ b/libgfortran/config/fpu-aarch64.h @@ -0,0 +1,331 @@ +/* FPU-related code for aarch64. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Francois-Xavier Coudert + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + + +/* Rounding mask and modes */ + +#define FPCR_RM_MASK 0x0c00000 +#define FE_TONEAREST 0x0000000 +#define FE_UPWARD 0x0400000 +#define FE_DOWNWARD 0x0800000 +#define FE_TOWARDZERO 0x0c00000 +#define FE_MAP_FZ 0x1000000 + +/* Exceptions */ + +#define FE_INVALID 1 +#define FE_DIVBYZERO 2 +#define FE_OVERFLOW 4 +#define FE_UNDERFLOW 8 +#define FE_INEXACT 16 + +#define FE_ALL_EXCEPT (FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW | FE_INEXACT) +#define FE_EXCEPT_SHIFT 8 + + + +/* This structure corresponds to the layout of the block + written by FSTENV. */ +struct fenv +{ + unsigned int __fpcr; + unsigned int __fpsr; +}; + +/* Check we can actually store the FPU state in the allocated size. */ +_Static_assert (sizeof(struct fenv) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE, + "GFC_FPE_STATE_BUFFER_SIZE is too small"); + + + +void +set_fpu (void) +{ + if (options.fpe & GFC_FPE_DENORMAL) + estr_write ("Fortran runtime warning: Floating point 'denormal operand' " + "exception not supported.\n"); + + set_fpu_trap_exceptions (options.fpe, 0); +} + + +int +get_fpu_trap_exceptions (void) +{ + unsigned int fpcr, exceptions; + int res = 0; + + fpcr = __builtin_aarch64_get_fpcr(); + exceptions = (fpcr >> FE_EXCEPT_SHIFT) & FE_ALL_EXCEPT; + + if (exceptions & FE_INVALID) res |= GFC_FPE_INVALID; + if (exceptions & FE_DIVBYZERO) res |= GFC_FPE_ZERO; + if (exceptions & FE_OVERFLOW) res |= GFC_FPE_OVERFLOW; + if (exceptions & FE_UNDERFLOW) res |= GFC_FPE_UNDERFLOW; + if (exceptions & FE_INEXACT) res |= GFC_FPE_INEXACT; + + return res; +} + + +void set_fpu_trap_exceptions (int trap, int notrap) +{ + unsigned int mode_set = 0, mode_clr = 0; + unsigned int fpsr, fpsr_new; + unsigned int fpcr, fpcr_new; + + if (trap & GFC_FPE_INVALID) + mode_set |= FE_INVALID; + if (notrap & GFC_FPE_INVALID) + mode_clr |= FE_INVALID; + + if (trap & GFC_FPE_ZERO) + mode_set |= FE_DIVBYZERO; + if (notrap & GFC_FPE_ZERO) + mode_clr |= FE_DIVBYZERO; + + if (trap & GFC_FPE_OVERFLOW) + mode_set |= FE_OVERFLOW; + if (notrap & GFC_FPE_OVERFLOW) + mode_clr |= FE_OVERFLOW; + + if (trap & GFC_FPE_UNDERFLOW) + mode_set |= FE_UNDERFLOW; + if (notrap & GFC_FPE_UNDERFLOW) + mode_clr |= FE_UNDERFLOW; + + if (trap & GFC_FPE_INEXACT) + mode_set |= FE_INEXACT; + if (notrap & GFC_FPE_INEXACT) + mode_clr |= FE_INEXACT; + + /* Clear stalled exception flags. */ + fpsr = __builtin_aarch64_get_fpsr(); + fpsr_new = fpsr & ~FE_ALL_EXCEPT; + if (fpsr_new != fpsr) + __builtin_aarch64_set_fpsr(fpsr_new); + + fpcr_new = fpcr = __builtin_aarch64_get_fpcr(); + fpcr_new |= (mode_set << FE_EXCEPT_SHIFT); + fpcr_new &= ~(mode_clr << FE_EXCEPT_SHIFT); + + if (fpcr_new != fpcr) + __builtin_aarch64_set_fpcr(fpcr_new); +} + + +int +support_fpu_flag (int flag) +{ + if (flag & GFC_FPE_DENORMAL) + return 0; + + return 1; +} + + +int +support_fpu_trap (int flag) +{ + if (flag & GFC_FPE_DENORMAL) + return 0; + + return 1; +} + + +int +get_fpu_except_flags (void) +{ + int result; + unsigned int fpsr; + + result = 0; + fpsr = __builtin_aarch64_get_fpsr() & FE_ALL_EXCEPT; + + if (fpsr & FE_INVALID) + result |= GFC_FPE_INVALID; + if (fpsr & FE_DIVBYZERO) + result |= GFC_FPE_ZERO; + if (fpsr & FE_OVERFLOW) + result |= GFC_FPE_OVERFLOW; + if (fpsr & FE_UNDERFLOW) + result |= GFC_FPE_UNDERFLOW; + if (fpsr & FE_INEXACT) + result |= GFC_FPE_INEXACT; + + return result; +} + + +void +set_fpu_except_flags (int set, int clear) +{ + unsigned int exc_set = 0, exc_clr = 0; + unsigned int fpsr, fpsr_new; + + if (set & GFC_FPE_INVALID) + exc_set |= FE_INVALID; + else if (clear & GFC_FPE_INVALID) + exc_clr |= FE_INVALID; + + if (set & GFC_FPE_ZERO) + exc_set |= FE_DIVBYZERO; + else if (clear & GFC_FPE_ZERO) + exc_clr |= FE_DIVBYZERO; + + if (set & GFC_FPE_OVERFLOW) + exc_set |= FE_OVERFLOW; + else if (clear & GFC_FPE_OVERFLOW) + exc_clr |= FE_OVERFLOW; + + if (set & GFC_FPE_UNDERFLOW) + exc_set |= FE_UNDERFLOW; + else if (clear & GFC_FPE_UNDERFLOW) + exc_clr |= FE_UNDERFLOW; + + if (set & GFC_FPE_INEXACT) + exc_set |= FE_INEXACT; + else if (clear & GFC_FPE_INEXACT) + exc_clr |= FE_INEXACT; + + fpsr_new = fpsr = __builtin_aarch64_get_fpsr(); + fpsr_new &= ~exc_clr; + fpsr_new |= exc_set; + + if (fpsr_new != fpsr) + __builtin_aarch64_set_fpsr(fpsr_new); +} + + +void +get_fpu_state (void *state) +{ + struct fenv *envp = state; + envp->__fpcr = __builtin_aarch64_get_fpcr(); + envp->__fpsr = __builtin_aarch64_get_fpsr(); +} + + +void +set_fpu_state (void *state) +{ + struct fenv *envp = state; + __builtin_aarch64_set_fpcr(envp->__fpcr); + __builtin_aarch64_set_fpsr(envp->__fpsr); +} + + +int +get_fpu_rounding_mode (void) +{ + unsigned int fpcr = __builtin_aarch64_get_fpcr(); + fpcr &= FPCR_RM_MASK; + + switch (fpcr) + { + case FE_TONEAREST: + return GFC_FPE_TONEAREST; + case FE_UPWARD: + return GFC_FPE_UPWARD; + case FE_DOWNWARD: + return GFC_FPE_DOWNWARD; + case FE_TOWARDZERO: + return GFC_FPE_TOWARDZERO; + default: + return 0; /* Should be unreachable. */ + } +} + + +void +set_fpu_rounding_mode (int round) +{ + unsigned int fpcr, round_mode; + + switch (round) + { + case GFC_FPE_TONEAREST: + round_mode = FE_TONEAREST; + break; + case GFC_FPE_UPWARD: + round_mode = FE_UPWARD; + break; + case GFC_FPE_DOWNWARD: + round_mode = FE_DOWNWARD; + break; + case GFC_FPE_TOWARDZERO: + round_mode = FE_TOWARDZERO; + break; + default: + return; /* Should be unreachable. */ + } + + fpcr = __builtin_aarch64_get_fpcr(); + + /* Only set FPCR if requested mode is different from current. */ + round_mode = (fpcr ^ round_mode) & FPCR_RM_MASK; + if (round_mode != 0) + __builtin_aarch64_set_fpcr(fpcr ^ round_mode); +} + + +int +support_fpu_rounding_mode (int mode __attribute__((unused))) +{ + return 1; +} + + +int +support_fpu_underflow_control (int kind __attribute__((unused))) +{ + /* Not supported for binary128. */ + return (kind == 4 || kind == 8) ? 1 : 0; +} + + +int +get_fpu_underflow_mode (void) +{ + unsigned int fpcr = __builtin_aarch64_get_fpcr(); + + /* Return 0 for abrupt underflow (flush to zero), 1 for gradual underflow. */ + return (fpcr & FE_MAP_FZ) ? 0 : 1; +} + + +void +set_fpu_underflow_mode (int gradual __attribute__((unused))) +{ + unsigned int fpcr = __builtin_aarch64_get_fpcr(); + + if (gradual) + fpcr &= ~FE_MAP_FZ; + else + fpcr |= FE_MAP_FZ; + + __builtin_aarch64_set_fpcr(fpcr); +} diff --git a/libgfortran/configure.host b/libgfortran/configure.host index e9d92c9d34d..3d6c2db7772 100644 --- a/libgfortran/configure.host +++ b/libgfortran/configure.host @@ -39,17 +39,29 @@ if test "x${have_feenableexcept}" = "xyes"; then ieee_support='yes' fi -# x86 asm should be used instead of glibc, since glibc doesn't support -# the x86 denormal exception. case "${host_cpu}" in + + # x86 asm should be used instead of glibc, since glibc doesn't support + # the x86 denormal exception. i?86 | x86_64) if test "x${have_soft_float}" = "xyes"; then fpu_host='fpu-generic' + ieee_support='no' else fpu_host='fpu-387' + ieee_support='yes' fi - ieee_support='yes' ;; + + # use asm on aarch64-darwin + aarch64) + case "${host_os}" in + darwin*) + fpu_host='fpu-aarch64' + ieee_support='yes' + ;; + esac + esac # Some targets require additional compiler options for NaN/Inf.