From: Joel Hutton <Joel.Hutton@arm.com>
To: GCC Patches <gcc-patches@gcc.gnu.org>
Cc: nd <nd@arm.com>
Subject: [AArch64] Use scvtf fbits option where appropriate
Date: Thu, 13 Jun 2019 16:04:00 -0000 [thread overview]
Message-ID: <DB6PR0801MB205410FFC9CB7FD4B643E765F5EF0@DB6PR0801MB2054.eurprd08.prod.outlook.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 2247 bytes --]
Hi all,
There was previously no backend pattern to utilise the scvtf fbits option. Where a fixed point is converted to a float, and divided by a power of 2, (or multiplied by the reciprocal of a power of 2), this can be combined into a single scvtf with fbits operation. This patch adds a pattern to combine these instructions, and adds a helper function.
For the following test case:
float f(int a) { return ((float) a) / 65536.0; }
double g(int a) { return ((double) a) / 4096.0; }
the output generated is currently:
f:
scvtf s1, w0 // 6 [c=8 l=4] floatsisf2/1
mov w1, 931135488 // 17 [c=4 l=4] *movsi_aarch64/3
fmov s0, w1 // 18 [c=4 l=4] *movsf_aarch64/1
fmul s0, s1, s0 // 13 [c=8 l=4] mulsf3
ret // 24 [c=0 l=4] *do_return
g:
scvtf d1, w0 // 6 [c=8 l=4] floatsidf2
mov x1, 4553139223271571456 // 17 [c=4 l=4] *movdi_aarch64/3
fmov d0, x1 // 18 [c=4 l=4] *movdf_aarch64/1
fmul d0, d1, d0 // 13 [c=8 l=4] muldf3
ret // 24 [c=0 l=4] *do_return
The output with this patch applied is:
f:
scvtf s0, w0, #16 // 13 [c=24 l=4] *combine_scvtf_SI_sf3/1
ret // 22 [c=0 l=4] *do_return
g:
scvtf d0, w0, #12 // 13 [c=24 l=4] *combine_scvtf_SI_df3
ret // 22 [c=0 l=4] *do_return
gcc/ChangeLog:
2019-06-12 Joel Hutton <Joel.Hutton@arm.com>
* config/aarch64/aarch64-protos.h (aarch64_fpconst_pow2_recip): New prototype
* config/aarch64/aarch64.c (aarch64_fpconst_pow2_recip): New function
* config/aarch64/aarch64.md (*aarch64_<su_optab>cvtf_<fcvt_target>_<GPF:mode>2_mult): New pattern
(aarch64_<su_optab>cvtf_<fcvt_iesize>_<GPF:mode>2_mult): New pattern
* config/aarch64/constraints.md (Dt): New constraint
* config/aarch64/predicates.md (aarch64_fpconst_pow2_recip): New predicate
gcc/testsuite/ChangeLog:
2019-06-12 Joel Hutton <Joel.Hutton@arm.com>
* gcc.target/aarch64/fmul_scvtf.c: New test.
Bootstrapped and regression tested on aarch64-linux-none target.
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-SCVTF-fbits.patch --]
[-- Type: text/x-patch; name="0001-SCVTF-fbits.patch", Size: 10298 bytes --]
From 6aac0d56dc7d34e7a6fcabc1e8b0c7c291c0d51a Mon Sep 17 00:00:00 2001
From: Joel Hutton <Joel.Hutton@arm.com>
Date: Thu, 13 Jun 2019 11:08:56 +0100
Subject: [PATCH] SCVTF fbits
---
gcc/config/aarch64/aarch64-protos.h | 1 +
gcc/config/aarch64/aarch64.c | 30 ++++
gcc/config/aarch64/aarch64.md | 34 +++++
gcc/config/aarch64/constraints.md | 7 +
gcc/config/aarch64/predicates.md | 4 +
gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c | 140 ++++++++++++++++++
6 files changed, 216 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 1e3b1c91db1..ad1ba458a3f 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -494,6 +494,7 @@ enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx);
enum reg_class aarch64_regno_regclass (unsigned);
int aarch64_asm_preferred_eh_data_format (int, int);
int aarch64_fpconst_pow_of_2 (rtx);
+int aarch64_fpconst_pow2_recip (rtx);
machine_mode aarch64_hard_regno_caller_save_mode (unsigned, unsigned,
machine_mode);
int aarch64_uxt_size (int, HOST_WIDE_INT);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 9a035dd9ed8..49a7d9256cf 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -18707,6 +18707,36 @@ aarch64_fpconst_pow_of_2 (rtx x)
return exact_log2 (real_to_integer (r));
}
+/* If X is a positive CONST_DOUBLE with a value that is the reciprocal of a
+ power of 2 (i.e 1/2^n) return the number of float bits. e.g. for x==(1/2^n)
+ return log2 (n). Otherwise return 0. */
+int
+aarch64_fpconst_pow2_recip (rtx x)
+{
+ REAL_VALUE_TYPE r0;
+
+ if (!CONST_DOUBLE_P (x))
+ return 0;
+
+ r0 = *CONST_DOUBLE_REAL_VALUE (x);
+ if (exact_real_inverse (DFmode, &r0)
+ && !REAL_VALUE_NEGATIVE (r0))
+ {
+ if (exact_real_truncate (DFmode, &r0))
+ {
+ HOST_WIDE_INT value = real_to_integer (&r0);
+ value = value & 0xffffffff;
+ if ((value != 0) && ( (value & (value - 1)) == 0))
+ {
+ int ret = exact_log2 (value);
+ gcc_assert (IN_RANGE (ret, 0, 31));
+ return ret;
+ }
+ }
+ }
+ return 0;
+}
+
/* If X is a vector of equal CONST_DOUBLE values and that value is
Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 526c7fb0dab..60bcf1bc8d9 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -6016,6 +6016,40 @@
[(set_attr "type" "f_cvtf2i")]
)
+(define_insn "*aarch64_<su_optab>cvtf_<fcvt_target>_<GPF:mode>2_mult"
+ [(set (match_operand:GPF 0 "register_operand" "=w,w")
+ (mult:GPF (FLOATUORS:GPF
+ (match_operand:<FCVT_TARGET> 1 "register_operand" "w,?r"))
+ (match_operand 2 "aarch64_fp_pow2_recip""Dt,Dt")))]
+ "TARGET_FLOAT"
+ {
+ operands[2] = GEN_INT (aarch64_fpconst_pow2_recip (operands[2]));
+ switch (which_alternative)
+ {
+ case 0:
+ return "<su_optab>cvtf\t%<GPF:s>0, %<s>1, #%2";
+ case 1:
+ return "<su_optab>cvtf\t%<GPF:s>0, %<w1>1, #%2";
+ }
+ return "";
+ }
+ [(set_attr "type" "neon_int_to_fp_<Vetype>,f_cvti2f")
+ (set_attr "arch" "simd,fp")]
+)
+
+(define_insn "*aarch64_<su_optab>cvtf_<fcvt_iesize>_<GPF:mode>2_mult"
+ [(set (match_operand:GPF 0 "register_operand" "=w")
+ (mult:GPF (FLOATUORS:GPF
+ (match_operand:<FCVT_IESIZE> 1 "register_operand" "r"))
+ (match_operand 2 "aarch64_fp_pow2_recip" "Dt")))]
+ "TARGET_FLOAT"
+ {
+ operands[2] = GEN_INT (aarch64_fpconst_pow2_recip (operands[2]));
+ return "<su_optab>cvtf\t%<GPF:s>0, %<w2>1, #%2";
+ }
+ [(set_attr "type" "f_cvti2f")]
+)
+
(define_insn "<optab><fcvt_target><GPF:mode>2"
[(set (match_operand:GPF 0 "register_operand" "=w,w")
(FLOATUORS:GPF (match_operand:<FCVT_TARGET> 1 "register_operand" "w,?r")))]
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 21f9549e660..a7731a033ea 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -329,6 +329,13 @@
(match_test "aarch64_simd_scalar_immediate_valid_for_move (op,
QImode)")))
+(define_constraint "Dt"
+ "@internal
+ A const_double which is the reciprocal of an exact power of two, can be
+ used in an scvtf with fract bits operation"
+ (and (match_code "const_double")
+ (match_test "aarch64_fpconst_pow2_recip (op)")))
+
(define_constraint "Dl"
"@internal
A constraint that matches vector of immediates for left shifts."
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 10100ca830a..993289014f0 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -98,6 +98,10 @@
(and (match_code "const_double")
(match_test "aarch64_fpconst_pow_of_2 (op) > 0")))
+(define_predicate "aarch64_fp_pow2_recip"
+ (and (match_code "const_double")
+ (match_test "aarch64_fpconst_pow2_recip (op)")))
+
(define_predicate "aarch64_fp_vec_pow2"
(match_test "aarch64_vec_fpconst_pow_of_2 (op) > 0"))
diff --git a/gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c b/gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c
new file mode 100644
index 00000000000..e8d1de6279b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c
@@ -0,0 +1,140 @@
+/* { dg-do run } */
+/* { dg-options "-save-temps -O2 -fno-inline" } */
+
+#define FUNC_DEFS(__a) \
+ float \
+fsfoo##__a (int x) \
+{ \
+ return ((float) x)/(1u << __a); \
+} \
+float \
+fusfoo##__a (unsigned int x) \
+{ \
+ return ((float) x)/(1u << __a); \
+} \
+float \
+fslfoo##__a (long x) \
+{ \
+ return ((float) x)/(1u << __a); \
+} \
+float \
+fulfoo##__a (unsigned long x) \
+{ \
+ return ((float) x)/(1u << __a); \
+} \
+
+#define FUNC_DEFD(__a) \
+double \
+dsfoo##__a (int x) \
+{ \
+ return ((double) x)/(1u << __a);\
+} \
+double \
+dusfoo##__a (unsigned int x) \
+{ \
+ return ((double) x)/(1u << __a);\
+} \
+double \
+dslfoo##__a (long x) \
+{ \
+ return ((double) x)/(1u << __a);\
+} \
+double \
+dulfoo##__a (unsigned long x) \
+{ \
+ return ((double) x)/(1u << __a);\
+}
+
+FUNC_DEFS (4)
+ /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#4" 1 } } */
+ /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#4" 1 } } */
+ /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#4" 1 } } */
+ /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#4" 1 } } */
+
+FUNC_DEFD (4)
+ /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#4" 1 } } */
+ /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#4" 1 } } */
+ /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#4" 1 } } */
+ /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#4" 1 } } */
+
+FUNC_DEFS (8)
+ /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#8" 1 } } */
+ /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#8" 1 } } */
+ /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#8" 1 } } */
+ /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#8" 1 } } */
+
+FUNC_DEFD (8)
+ /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#8" 1 } } */
+ /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#8" 1 } } */
+ /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#8" 1 } } */
+ /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#8" 1 } } */
+
+FUNC_DEFS (16)
+ /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#16" 1 } } */
+ /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#16" 1 } } */
+ /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#16" 1 } } */
+ /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#16" 1 } } */
+
+FUNC_DEFD (16)
+ /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#16" 1 } } */
+ /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#16" 1 } } */
+ /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#16" 1 } } */
+ /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#16" 1 } } */
+
+FUNC_DEFS (31)
+ /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#31" 1 } } */
+ /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#31" 1 } } */
+ /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#31" 1 } } */
+ /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#31" 1 } } */
+
+FUNC_DEFD (31)
+ /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#31" 1 } } */
+ /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#31" 1 } } */
+ /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#31" 1 } } */
+ /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#31" 1 } } */
+
+#define FUNC_TESTS(__a, __b) \
+do \
+{ \
+ if (fsfoo##__a (__b) != ((int) i) * (1.0f/(1u << __a)) ) \
+ __builtin_abort (); \
+ if (fusfoo##__a (__b) != ((int) i) * (1.0f/(1u << __a)) ) \
+ __builtin_abort (); \
+ if (fslfoo##__a (__b) != ((int) i) * (1.0f/(1u << __a)) ) \
+ __builtin_abort (); \
+ if (fulfoo##__a (__b) != ((int) i) * (1.0f/(1u << __a)) ) \
+ __builtin_abort (); \
+} while (0)
+
+#define FUNC_TESTD(__a, __b) \
+do \
+{ \
+ if (fsfoo##__a (__b) != ((int) i) * (1.0d/(1u << __a)) ) \
+ __builtin_abort (); \
+ if (fusfoo##__a (__b) != ((int) i) * (1.0d/(1u << __a)) ) \
+ __builtin_abort (); \
+ if (fslfoo##__a (__b) != ((int) i) * (1.0d/(1u << __a)) ) \
+ __builtin_abort (); \
+ if (fulfoo##__a (__b) != ((int) i) * (1.0d/(1u << __a)) ) \
+ __builtin_abort (); \
+} while (0)
+
+ int
+main (void)
+{
+ int i;
+
+ for (i = 0; i < 32; i ++)
+ {
+ FUNC_TESTS (4, i);
+ FUNC_TESTS (8, i);
+ FUNC_TESTS (16, i);
+ FUNC_TESTS (31, i);
+
+ FUNC_TESTD (4, i);
+ FUNC_TESTD (8, i);
+ FUNC_TESTD (16, i);
+ FUNC_TESTD (31, i);
+ }
+ return 0;
+}
--
2.17.1
next reply other threads:[~2019-06-13 16:04 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-06-13 16:04 Joel Hutton [this message]
2019-06-13 17:26 Wilco Dijkstra
2019-06-18 9:11 ` Joel Hutton
2019-06-18 10:37 ` Richard Earnshaw (lists)
2019-06-18 11:12 ` Wilco Dijkstra
2019-06-18 12:30 ` Richard Sandiford
2019-06-18 15:34 ` Joel Hutton
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=DB6PR0801MB205410FFC9CB7FD4B643E765F5EF0@DB6PR0801MB2054.eurprd08.prod.outlook.com \
--to=joel.hutton@arm.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=nd@arm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).