From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by sourceware.org (Postfix) with ESMTP id 9199F3858C50 for ; Tue, 1 Nov 2022 14:42:06 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 9199F3858C50 Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=arm.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=arm.com Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 8BCF41FB; Tue, 1 Nov 2022 07:42:12 -0700 (PDT) Received: from localhost (e121540-lin.manchester.arm.com [10.32.98.62]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id 541C73F703; Tue, 1 Nov 2022 07:42:05 -0700 (PDT) From: Richard Sandiford To: Tamar Christina Mail-Followup-To: Tamar Christina ,gcc-patches@gcc.gnu.org, nd@arm.com, Richard.Earnshaw@arm.com, Marcus.Shawcroft@arm.com, Kyrylo.Tkachov@arm.com, richard.sandiford@arm.com Cc: gcc-patches@gcc.gnu.org, nd@arm.com, Richard.Earnshaw@arm.com, Marcus.Shawcroft@arm.com, Kyrylo.Tkachov@arm.com Subject: Re: [PATCH 4/8]AArch64 aarch64: Implement widening reduction patterns References: Date: Tue, 01 Nov 2022 14:41:58 +0000 In-Reply-To: (Tamar Christina's message of "Mon, 31 Oct 2022 11:58:09 +0000") Message-ID: User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/26.3 (gnu/linux) MIME-Version: 1.0 Content-Type: text/plain X-Spam-Status: No, score=-42.6 required=5.0 tests=BAYES_00,GIT_PATCH_0,KAM_DMARC_NONE,KAM_DMARC_STATUS,KAM_LAZY_DOMAIN_SECURITY,KAM_LOTSOFHASH,SPF_HELO_NONE,SPF_NONE,TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: Tamar Christina writes: > Hi All, > > This implements the new widening reduction optab in the backend. > Instead of introducing a duplicate definition for the same thing I have > renamed the intrinsics defintions to use the same optab. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > * config/aarch64/aarch64-simd-builtins.def (saddlv, uaddlv): Rename to > reduc_splus_widen_scal_ and reduc_uplus_widen_scal_ respectively. > * config/aarch64/aarch64-simd.md (aarch64_addlv): Renamed to > ... > (reduc_plus_widen_scal_): ... This. > * config/aarch64/arm_neon.h (vaddlv_s8, vaddlv_s16, vaddlv_u8, > vaddlv_u16, vaddlvq_s8, vaddlvq_s16, vaddlvq_s32, vaddlvq_u8, > vaddlvq_u16, vaddlvq_u32, vaddlv_s32, vaddlv_u32): Use it. OK, thanks. Richard > --- inline copy of patch -- > diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def > index cf46b31627b84476a25762ffc708fd84a4086e43..a4b21e1495c5699d8557a4bcb9e73ef98ae60b35 100644 > --- a/gcc/config/aarch64/aarch64-simd-builtins.def > +++ b/gcc/config/aarch64/aarch64-simd-builtins.def > @@ -190,9 +190,9 @@ > BUILTIN_VDQV_L (UNOP, saddlp, 0, NONE) > BUILTIN_VDQV_L (UNOPU, uaddlp, 0, NONE) > > - /* Implemented by aarch64_addlv. */ > - BUILTIN_VDQV_L (UNOP, saddlv, 0, NONE) > - BUILTIN_VDQV_L (UNOPU, uaddlv, 0, NONE) > + /* Implemented by reduc_plus_widen_scal_. */ > + BUILTIN_VDQV_L (UNOP, reduc_splus_widen_scal_, 10, NONE) > + BUILTIN_VDQV_L (UNOPU, reduc_uplus_widen_scal_, 10, NONE) > > /* Implemented by aarch64_abd. */ > BUILTIN_VDQ_BHSI (BINOP, sabd, 0, NONE) > diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md > index cf8c094bd4b76981cef2dd5dd7b8e6be0d56101f..25aed74f8cf939562ed65a578fe32ca76605b58a 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -3455,7 +3455,7 @@ (define_expand "reduc_plus_scal_v4sf" > DONE; > }) > > -(define_insn "aarch64_addlv" > +(define_insn "reduc_plus_widen_scal_" > [(set (match_operand: 0 "register_operand" "=w") > (unspec: [(match_operand:VDQV_L 1 "register_operand" "w")] > USADDLV))] > diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h > index cf6af728ca99dae1cb6ab647466cfec32f7e913e..7b2c4c016191bcd6c3e075d27810faedb23854b7 100644 > --- a/gcc/config/aarch64/arm_neon.h > +++ b/gcc/config/aarch64/arm_neon.h > @@ -3664,70 +3664,70 @@ __extension__ extern __inline int16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vaddlv_s8 (int8x8_t __a) > { > - return __builtin_aarch64_saddlvv8qi (__a); > + return __builtin_aarch64_reduc_splus_widen_scal_v8qi (__a); > } > > __extension__ extern __inline int32_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vaddlv_s16 (int16x4_t __a) > { > - return __builtin_aarch64_saddlvv4hi (__a); > + return __builtin_aarch64_reduc_splus_widen_scal_v4hi (__a); > } > > __extension__ extern __inline uint16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vaddlv_u8 (uint8x8_t __a) > { > - return __builtin_aarch64_uaddlvv8qi_uu (__a); > + return __builtin_aarch64_reduc_uplus_widen_scal_v8qi_uu (__a); > } > > __extension__ extern __inline uint32_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vaddlv_u16 (uint16x4_t __a) > { > - return __builtin_aarch64_uaddlvv4hi_uu (__a); > + return __builtin_aarch64_reduc_uplus_widen_scal_v4hi_uu (__a); > } > > __extension__ extern __inline int16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vaddlvq_s8 (int8x16_t __a) > { > - return __builtin_aarch64_saddlvv16qi (__a); > + return __builtin_aarch64_reduc_splus_widen_scal_v16qi (__a); > } > > __extension__ extern __inline int32_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vaddlvq_s16 (int16x8_t __a) > { > - return __builtin_aarch64_saddlvv8hi (__a); > + return __builtin_aarch64_reduc_splus_widen_scal_v8hi (__a); > } > > __extension__ extern __inline int64_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vaddlvq_s32 (int32x4_t __a) > { > - return __builtin_aarch64_saddlvv4si (__a); > + return __builtin_aarch64_reduc_splus_widen_scal_v4si (__a); > } > > __extension__ extern __inline uint16_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vaddlvq_u8 (uint8x16_t __a) > { > - return __builtin_aarch64_uaddlvv16qi_uu (__a); > + return __builtin_aarch64_reduc_uplus_widen_scal_v16qi_uu (__a); > } > > __extension__ extern __inline uint32_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vaddlvq_u16 (uint16x8_t __a) > { > - return __builtin_aarch64_uaddlvv8hi_uu (__a); > + return __builtin_aarch64_reduc_uplus_widen_scal_v8hi_uu (__a); > } > > __extension__ extern __inline uint64_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vaddlvq_u32 (uint32x4_t __a) > { > - return __builtin_aarch64_uaddlvv4si_uu (__a); > + return __builtin_aarch64_reduc_uplus_widen_scal_v4si_uu (__a); > } > > __extension__ extern __inline float32x2_t > @@ -6461,14 +6461,14 @@ __extension__ extern __inline int64_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vaddlv_s32 (int32x2_t __a) > { > - return __builtin_aarch64_saddlvv2si (__a); > + return __builtin_aarch64_reduc_splus_widen_scal_v2si (__a); > } > > __extension__ extern __inline uint64_t > __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) > vaddlv_u32 (uint32x2_t __a) > { > - return __builtin_aarch64_uaddlvv2si_uu (__a); > + return __builtin_aarch64_reduc_uplus_widen_scal_v2si_uu (__a); > } > > __extension__ extern __inline int16x4_t