public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] aarch64: Relax flags of saturation builtins
@ 2021-01-19 17:31 Kyrylo Tkachov
  0 siblings, 0 replies; only message in thread
From: Kyrylo Tkachov @ 2021-01-19 17:31 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1002 bytes --]

Hi all,

This patch relaxes the flags for the saturating arithmetic builtins to NONE, allowing for more optimisation.
Bootstrapped and tested on aarch64-none-linux-gnu.

Pushing to trunk.
Thanks,
Kyrill

gcc/ChangeLog

	* config/aarch64/aarch64-simd-builtins.def (sqshl, uqshl, sqrshl, uqrshl,
	sqadd, uqadd, sqsub, uqsub, suqadd, usqadd, sqmovn, uqmovn, sqxtn2,
	uqxtn2, sqabs, sqneg, sqdmlal, sqdmlsl, sqdmlal_lane, sqdmlsl_lane,
	sqdmlal_laneq, sqdmlsl_laneq, sqdmlal_n, sqdmlsl_n, sqdmlal2,
	sqdmlsl2, sqdmlal2_lane, sqdmlsl2_lane, sqdmlal2_laneq, sqdmlsl2_laneq,
	sqdmlal2_n, sqdmlsl2_n, sqdmull, sqdmull_lane, sqdmull_laneq, sqdmull_n,
	sqdmull2, sqdmull2_lane, sqdmull2_laneq, sqdmull2_n, sqdmulh, sqrdmulh,
	sqdmulh_lane, sqdmulh_laneq, sqrdmulh_lane, sqrdmulh_laneq, sqshrun_n,
	sqrshrun_n, sqshrn_n, uqshrn_n, sqrshrn_n, uqrshrn_n, sqshlu_n, sqshl_n,
	uqshl_n, sqrdmlah, sqrdmlsh, sqrdmlah_lane, sqrdmlsh_lane, sqrdmlah_laneq,
	sqrdmlsh_laneq, sqmovun): Use NONE flags.

[-- Attachment #2: sat-flags.patch --]
[-- Type: application/octet-stream, Size: 8774 bytes --]

diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 73a24d59745ab03fbed213b01eb3134d053295e1..168e47da95f03d48d280fa2b2d4e6b7895357984 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -56,18 +56,18 @@
   BUILTIN_VB (UNOP, popcount, 2, ALL)
 
   /* Implemented by aarch64_<sur>q<r>shl<mode>.  */
-  BUILTIN_VSDQ_I (BINOP, sqshl, 0, ALL)
-  BUILTIN_VSDQ_I (BINOP_UUS, uqshl, 0, ALL)
-  BUILTIN_VSDQ_I (BINOP, sqrshl, 0, ALL)
-  BUILTIN_VSDQ_I (BINOP_UUS, uqrshl, 0, ALL)
+  BUILTIN_VSDQ_I (BINOP, sqshl, 0, NONE)
+  BUILTIN_VSDQ_I (BINOP_UUS, uqshl, 0, NONE)
+  BUILTIN_VSDQ_I (BINOP, sqrshl, 0, NONE)
+  BUILTIN_VSDQ_I (BINOP_UUS, uqrshl, 0, NONE)
   /* Implemented by aarch64_<su_optab><optab><mode>.  */
-  BUILTIN_VSDQ_I (BINOP, sqadd, 0, ALL)
-  BUILTIN_VSDQ_I (BINOPU, uqadd, 0, ALL)
-  BUILTIN_VSDQ_I (BINOP, sqsub, 0, ALL)
-  BUILTIN_VSDQ_I (BINOPU, uqsub, 0, ALL)
+  BUILTIN_VSDQ_I (BINOP, sqadd, 0, NONE)
+  BUILTIN_VSDQ_I (BINOPU, uqadd, 0, NONE)
+  BUILTIN_VSDQ_I (BINOP, sqsub, 0, NONE)
+  BUILTIN_VSDQ_I (BINOPU, uqsub, 0, NONE)
   /* Implemented by aarch64_<sur>qadd<mode>.  */
-  BUILTIN_VSDQ_I (BINOP_SSU, suqadd, 0, ALL)
-  BUILTIN_VSDQ_I (BINOP_UUS, usqadd, 0, ALL)
+  BUILTIN_VSDQ_I (BINOP_SSU, suqadd, 0, NONE)
+  BUILTIN_VSDQ_I (BINOP_UUS, usqadd, 0, NONE)
 
   /* Implemented by aarch64_get_dreg<VSTRUCT:mode><VDC:mode>.  */
   BUILTIN_VDC (GETREG, get_dregoi, 0, AUTO_FP)
@@ -186,40 +186,40 @@
   BUILTIN_VQW (TERNOP, smlsl_hi, 0, NONE)
   BUILTIN_VQW (TERNOPU, umlsl_hi, 0, NONE)
 
-  BUILTIN_VSQN_HSDI (UNOPUS, sqmovun, 0, ALL)
+  BUILTIN_VSQN_HSDI (UNOPUS, sqmovun, 0, NONE)
   /* Implemented by aarch64_<sur>qmovn<mode>.  */
-  BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0, ALL)
-  BUILTIN_VSQN_HSDI (UNOP, uqmovn, 0, ALL)
+  BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0, NONE)
+  BUILTIN_VSQN_HSDI (UNOP, uqmovn, 0, NONE)
 
   /* Implemented by aarch64_<su>qxtn2<mode>.  */
-  BUILTIN_VQN (BINOP, sqxtn2, 0, ALL)
-  BUILTIN_VQN (BINOPU, uqxtn2, 0, ALL)
+  BUILTIN_VQN (BINOP, sqxtn2, 0, NONE)
+  BUILTIN_VQN (BINOPU, uqxtn2, 0, NONE)
 
   /* Implemented by aarch64_s<optab><mode>.  */
-  BUILTIN_VSDQ_I (UNOP, sqabs, 0, ALL)
-  BUILTIN_VSDQ_I (UNOP, sqneg, 0, ALL)
+  BUILTIN_VSDQ_I (UNOP, sqabs, 0, NONE)
+  BUILTIN_VSDQ_I (UNOP, sqneg, 0, NONE)
 
   /* Implemented by aarch64_sqdml<SBINQOPS:as>l<mode>.  */
-  BUILTIN_VSD_HSI (TERNOP, sqdmlal, 0, ALL)
-  BUILTIN_VSD_HSI (TERNOP, sqdmlsl, 0, ALL)
+  BUILTIN_VSD_HSI (TERNOP, sqdmlal, 0, NONE)
+  BUILTIN_VSD_HSI (TERNOP, sqdmlsl, 0, NONE)
   /* Implemented by aarch64_sqdml<SBINQOPS:as>l_lane<mode>.  */
-  BUILTIN_VSD_HSI (QUADOP_LANE, sqdmlal_lane, 0, ALL)
-  BUILTIN_VSD_HSI (QUADOP_LANE, sqdmlsl_lane, 0, ALL)
+  BUILTIN_VSD_HSI (QUADOP_LANE, sqdmlal_lane, 0, NONE)
+  BUILTIN_VSD_HSI (QUADOP_LANE, sqdmlsl_lane, 0, NONE)
   /* Implemented by aarch64_sqdml<SBINQOPS:as>l_laneq<mode>.  */
-  BUILTIN_VSD_HSI (QUADOP_LANE, sqdmlal_laneq, 0, ALL)
-  BUILTIN_VSD_HSI (QUADOP_LANE, sqdmlsl_laneq, 0, ALL)
+  BUILTIN_VSD_HSI (QUADOP_LANE, sqdmlal_laneq, 0, NONE)
+  BUILTIN_VSD_HSI (QUADOP_LANE, sqdmlsl_laneq, 0, NONE)
   /* Implemented by aarch64_sqdml<SBINQOPS:as>l_n<mode>.  */
-  BUILTIN_VD_HSI (TERNOP, sqdmlal_n, 0, ALL)
-  BUILTIN_VD_HSI (TERNOP, sqdmlsl_n, 0, ALL)
-
-  BUILTIN_VQ_HSI (TERNOP, sqdmlal2, 0, ALL)
-  BUILTIN_VQ_HSI (TERNOP, sqdmlsl2, 0, ALL)
-  BUILTIN_VQ_HSI (QUADOP_LANE, sqdmlal2_lane, 0, ALL)
-  BUILTIN_VQ_HSI (QUADOP_LANE, sqdmlsl2_lane, 0, ALL)
-  BUILTIN_VQ_HSI (QUADOP_LANE, sqdmlal2_laneq, 0, ALL)
-  BUILTIN_VQ_HSI (QUADOP_LANE, sqdmlsl2_laneq, 0, ALL)
-  BUILTIN_VQ_HSI (TERNOP, sqdmlal2_n, 0, ALL)
-  BUILTIN_VQ_HSI (TERNOP, sqdmlsl2_n, 0, ALL)
+  BUILTIN_VD_HSI (TERNOP, sqdmlal_n, 0, NONE)
+  BUILTIN_VD_HSI (TERNOP, sqdmlsl_n, 0, NONE)
+
+  BUILTIN_VQ_HSI (TERNOP, sqdmlal2, 0, NONE)
+  BUILTIN_VQ_HSI (TERNOP, sqdmlsl2, 0, NONE)
+  BUILTIN_VQ_HSI (QUADOP_LANE, sqdmlal2_lane, 0, NONE)
+  BUILTIN_VQ_HSI (QUADOP_LANE, sqdmlsl2_lane, 0, NONE)
+  BUILTIN_VQ_HSI (QUADOP_LANE, sqdmlal2_laneq, 0, NONE)
+  BUILTIN_VQ_HSI (QUADOP_LANE, sqdmlsl2_laneq, 0, NONE)
+  BUILTIN_VQ_HSI (TERNOP, sqdmlal2_n, 0, NONE)
+  BUILTIN_VQ_HSI (TERNOP, sqdmlsl2_n, 0, NONE)
 
   BUILTIN_VD_BHSI (BINOP, intrinsic_vec_smult_lo_, 0, NONE)
   BUILTIN_VD_BHSI (BINOPU, intrinsic_vec_umult_lo_, 0, NONE)
@@ -236,22 +236,22 @@
   BUILTIN_VD_HSI (TERNOPU_LANE, vec_umult_laneq_, 0, ALL)
   BUILTIN_VD_HSI (QUADOPU_LANE, vec_umlal_laneq_, 0, ALL)
 
-  BUILTIN_VSD_HSI (BINOP, sqdmull, 0, ALL)
-  BUILTIN_VSD_HSI (TERNOP_LANE, sqdmull_lane, 0, ALL)
-  BUILTIN_VSD_HSI (TERNOP_LANE, sqdmull_laneq, 0, ALL)
-  BUILTIN_VD_HSI (BINOP, sqdmull_n, 0, ALL)
-  BUILTIN_VQ_HSI (BINOP, sqdmull2, 0, ALL)
-  BUILTIN_VQ_HSI (TERNOP_LANE, sqdmull2_lane, 0, ALL)
-  BUILTIN_VQ_HSI (TERNOP_LANE, sqdmull2_laneq, 0, ALL)
-  BUILTIN_VQ_HSI (BINOP, sqdmull2_n, 0, ALL)
+  BUILTIN_VSD_HSI (BINOP, sqdmull, 0, NONE)
+  BUILTIN_VSD_HSI (TERNOP_LANE, sqdmull_lane, 0, NONE)
+  BUILTIN_VSD_HSI (TERNOP_LANE, sqdmull_laneq, 0, NONE)
+  BUILTIN_VD_HSI (BINOP, sqdmull_n, 0, NONE)
+  BUILTIN_VQ_HSI (BINOP, sqdmull2, 0, NONE)
+  BUILTIN_VQ_HSI (TERNOP_LANE, sqdmull2_lane, 0, NONE)
+  BUILTIN_VQ_HSI (TERNOP_LANE, sqdmull2_laneq, 0, NONE)
+  BUILTIN_VQ_HSI (BINOP, sqdmull2_n, 0, NONE)
   /* Implemented by aarch64_sq<r>dmulh<mode>.  */
-  BUILTIN_VSDQ_HSI (BINOP, sqdmulh, 0, ALL)
-  BUILTIN_VSDQ_HSI (BINOP, sqrdmulh, 0, ALL)
+  BUILTIN_VSDQ_HSI (BINOP, sqdmulh, 0, NONE)
+  BUILTIN_VSDQ_HSI (BINOP, sqrdmulh, 0, NONE)
   /* Implemented by aarch64_sq<r>dmulh_lane<q><mode>.  */
-  BUILTIN_VSDQ_HSI (TERNOP_LANE, sqdmulh_lane, 0, ALL)
-  BUILTIN_VSDQ_HSI (TERNOP_LANE, sqdmulh_laneq, 0, ALL)
-  BUILTIN_VSDQ_HSI (TERNOP_LANE, sqrdmulh_lane, 0, ALL)
-  BUILTIN_VSDQ_HSI (TERNOP_LANE, sqrdmulh_laneq, 0, ALL)
+  BUILTIN_VSDQ_HSI (TERNOP_LANE, sqdmulh_lane, 0, NONE)
+  BUILTIN_VSDQ_HSI (TERNOP_LANE, sqdmulh_laneq, 0, NONE)
+  BUILTIN_VSDQ_HSI (TERNOP_LANE, sqrdmulh_lane, 0, NONE)
+  BUILTIN_VSDQ_HSI (TERNOP_LANE, sqrdmulh_laneq, 0, NONE)
 
   BUILTIN_VSDQ_I_DI (BINOP, ashl, 3, ALL)
   /* Implemented by aarch64_<sur>shl<mode>.  */
@@ -311,12 +311,12 @@
   BUILTIN_VQW (SHIFTIMM, sshll2_n, 0, ALL)
   BUILTIN_VQW (SHIFTIMM, ushll2_n, 0, ALL)
   /* Implemented by aarch64_<sur>q<r>shr<u>n_n<mode>.  */
-  BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n, 0, ALL)
-  BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n, 0, ALL)
-  BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n, 0, ALL)
-  BUILTIN_VSQN_HSDI (USHIFTIMM, uqshrn_n, 0, ALL)
-  BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0, ALL)
-  BUILTIN_VSQN_HSDI (USHIFTIMM, uqrshrn_n, 0, ALL)
+  BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n, 0, NONE)
+  BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n, 0, NONE)
+  BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n, 0, NONE)
+  BUILTIN_VSQN_HSDI (USHIFTIMM, uqshrn_n, 0, NONE)
+  BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0, NONE)
+  BUILTIN_VSQN_HSDI (USHIFTIMM, uqrshrn_n, 0, NONE)
   /* Implemented by aarch64_<sur>q<r>shr<u>n2_n<mode>.  */
   BUILTIN_VQN (SHIFT2IMM_UUSS, sqshrun2_n, 0, NONE)
   BUILTIN_VQN (SHIFT2IMM_UUSS, sqrshrun2_n, 0, NONE)
@@ -331,9 +331,9 @@
   VAR2 (SHIFTINSERTP, ssli_n, 0, ALL, di, v2di)
   BUILTIN_VSDQ_I_DI (USHIFTACC, usli_n, 0, ALL)
   /* Implemented by aarch64_<sur>qshl<u>_n<mode>.  */
-  BUILTIN_VSDQ_I (SHIFTIMM_USS, sqshlu_n, 0, ALL)
-  BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n, 0, ALL)
-  BUILTIN_VSDQ_I (USHIFTIMM, uqshl_n, 0, ALL)
+  BUILTIN_VSDQ_I (SHIFTIMM_USS, sqshlu_n, 0, NONE)
+  BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n, 0, NONE)
+  BUILTIN_VSDQ_I (USHIFTIMM, uqshl_n, 0, NONE)
 
   /* Implemented by aarch64_xtn2<mode>.  */
   BUILTIN_VQN (UNOP, xtn2, 0, NONE)
@@ -606,16 +606,16 @@
   /* Builtins for ARMv8.1-A Adv.SIMD instructions.  */
 
   /* Implemented by aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>.  */
-  BUILTIN_VSDQ_HSI (TERNOP, sqrdmlah, 0, ALL)
-  BUILTIN_VSDQ_HSI (TERNOP, sqrdmlsh, 0, ALL)
+  BUILTIN_VSDQ_HSI (TERNOP, sqrdmlah, 0, NONE)
+  BUILTIN_VSDQ_HSI (TERNOP, sqrdmlsh, 0, NONE)
 
   /* Implemented by aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>.  */
-  BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlah_lane, 0, ALL)
-  BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlsh_lane, 0, ALL)
+  BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlah_lane, 0, NONE)
+  BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlsh_lane, 0, NONE)
 
   /* Implemented by aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>.  */
-  BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlah_laneq, 0, ALL)
-  BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlsh_laneq, 0, ALL)
+  BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlah_laneq, 0, NONE)
+  BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlsh_laneq, 0, NONE)
 
   /* Implemented by <FCVT_F2FIXED/FIXED2F:fcvt_fixed_insn><*><*>3.  */
   BUILTIN_VSDQ_HSDI (SHIFTIMM, scvtf, 3, ALL)

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-01-19 17:31 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-01-19 17:31 [PATCH] aarch64: Relax flags of saturation builtins Kyrylo Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).