[AArch64] Use scvtf fbits option where appropriate

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [AArch64] Use scvtf fbits option where appropriate
@ 2019-06-13 16:04 Joel Hutton
  0 siblings, 0 replies; 7+ messages in thread
From: Joel Hutton @ 2019-06-13 16:04 UTC (permalink / raw)
  To: GCC Patches; +Cc: nd

[-- Attachment #1: Type: text/plain, Size: 2247 bytes --]

Hi all,

There was previously no backend pattern to utilise the scvtf fbits option. Where a fixed point is converted to a float, and divided by a power of 2, (or multiplied by the reciprocal of a power of 2), this can be combined into a single scvtf with fbits operation. This patch adds a pattern to combine these instructions, and adds a helper function.

For the following test case:

float f(int a) { return ((float) a) / 65536.0; }
double g(int a) { return ((double) a) / 4096.0; }

the output generated is currently:

f:
        scvtf   s1, w0  // 6    [c=8 l=4]  floatsisf2/1
        mov     w1, 931135488   // 17   [c=4 l=4]  *movsi_aarch64/3
        fmov    s0, w1  // 18   [c=4 l=4]  *movsf_aarch64/1
        fmul    s0, s1, s0      // 13   [c=8 l=4]  mulsf3
        ret             // 24   [c=0 l=4]  *do_return
g:
        scvtf   d1, w0  // 6    [c=8 l=4]  floatsidf2
        mov     x1, 4553139223271571456 // 17   [c=4 l=4] *movdi_aarch64/3
        fmov    d0, x1  // 18   [c=4 l=4]  *movdf_aarch64/1
        fmul    d0, d1, d0      // 13   [c=8 l=4]  muldf3
        ret             // 24   [c=0 l=4]  *do_return
        
The output with this patch applied is:

f:
        scvtf   s0, w0, #16     // 13   [c=24 l=4]  *combine_scvtf_SI_sf3/1
        ret             // 22   [c=0 l=4]  *do_return
g:
        scvtf   d0, w0, #12     // 13   [c=24 l=4]  *combine_scvtf_SI_df3
        ret             // 22   [c=0 l=4]  *do_return
        
gcc/ChangeLog:

2019-06-12  Joel Hutton  <Joel.Hutton@arm.com>

        * config/aarch64/aarch64-protos.h (aarch64_fpconst_pow2_recip): New prototype
        * config/aarch64/aarch64.c (aarch64_fpconst_pow2_recip): New function
        * config/aarch64/aarch64.md (*aarch64_<su_optab>cvtf_<fcvt_target>_<GPF:mode>2_mult): New pattern
        (aarch64_<su_optab>cvtf_<fcvt_iesize>_<GPF:mode>2_mult): New pattern
        * config/aarch64/constraints.md (Dt): New constraint
        * config/aarch64/predicates.md (aarch64_fpconst_pow2_recip): New predicate

gcc/testsuite/ChangeLog:

2019-06-12  Joel Hutton  <Joel.Hutton@arm.com>

        * gcc.target/aarch64/fmul_scvtf.c: New test.

Bootstrapped and regression tested on aarch64-linux-none target.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-SCVTF-fbits.patch --]
[-- Type: text/x-patch; name="0001-SCVTF-fbits.patch", Size: 10298 bytes --]

From 6aac0d56dc7d34e7a6fcabc1e8b0c7c291c0d51a Mon Sep 17 00:00:00 2001
From: Joel Hutton <Joel.Hutton@arm.com>
Date: Thu, 13 Jun 2019 11:08:56 +0100
Subject: [PATCH] SCVTF fbits

---
 gcc/config/aarch64/aarch64-protos.h           |   1 +
 gcc/config/aarch64/aarch64.c                  |  30 ++++
 gcc/config/aarch64/aarch64.md                 |  34 +++++
 gcc/config/aarch64/constraints.md             |   7 +
 gcc/config/aarch64/predicates.md              |   4 +
 gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c | 140 ++++++++++++++++++
 6 files changed, 216 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 1e3b1c91db1..ad1ba458a3f 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -494,6 +494,7 @@ enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx);
 enum reg_class aarch64_regno_regclass (unsigned);
 int aarch64_asm_preferred_eh_data_format (int, int);
 int aarch64_fpconst_pow_of_2 (rtx);
+int aarch64_fpconst_pow2_recip (rtx);
 machine_mode aarch64_hard_regno_caller_save_mode (unsigned, unsigned,
 						       machine_mode);
 int aarch64_uxt_size (int, HOST_WIDE_INT);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 9a035dd9ed8..49a7d9256cf 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -18707,6 +18707,36 @@ aarch64_fpconst_pow_of_2 (rtx x)
   return exact_log2 (real_to_integer (r));
 }
 
+/* If X is a positive CONST_DOUBLE with a value that is the reciprocal of a
+   power of 2 (i.e 1/2^n) return the number of float bits. e.g. for x==(1/2^n)
+   return log2 (n). Otherwise return 0.  */
+int
+aarch64_fpconst_pow2_recip (rtx x)
+{
+  REAL_VALUE_TYPE r0;
+
+  if (!CONST_DOUBLE_P (x))
+    return 0;
+
+  r0 = *CONST_DOUBLE_REAL_VALUE (x);
+  if (exact_real_inverse (DFmode, &r0)
+      && !REAL_VALUE_NEGATIVE (r0))
+    {
+      if (exact_real_truncate (DFmode, &r0))
+	{
+	  HOST_WIDE_INT value = real_to_integer (&r0);
+	  value = value & 0xffffffff;
+	  if ((value != 0) && ( (value & (value - 1)) == 0))
+	    {
+	      int ret = exact_log2 (value);
+	      gcc_assert (IN_RANGE (ret, 0, 31));
+	      return ret;
+	    }
+	}
+    }
+  return 0;
+}
+
 /* If X is a vector of equal CONST_DOUBLE values and that value is
    Y, return the aarch64_fpconst_pow_of_2 of Y.  Otherwise return -1.  */
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 526c7fb0dab..60bcf1bc8d9 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -6016,6 +6016,40 @@
   [(set_attr "type" "f_cvtf2i")]
 )
 
+(define_insn "*aarch64_<su_optab>cvtf_<fcvt_target>_<GPF:mode>2_mult"
+  [(set (match_operand:GPF 0 "register_operand" "=w,w")
+	(mult:GPF (FLOATUORS:GPF
+		   (match_operand:<FCVT_TARGET> 1 "register_operand" "w,?r"))
+		   (match_operand 2 "aarch64_fp_pow2_recip""Dt,Dt")))]
+  "TARGET_FLOAT"
+  {
+    operands[2] = GEN_INT (aarch64_fpconst_pow2_recip (operands[2]));
+    switch (which_alternative)
+    {
+      case 0:
+	return "<su_optab>cvtf\t%<GPF:s>0, %<s>1, #%2";
+      case 1:
+	return "<su_optab>cvtf\t%<GPF:s>0, %<w1>1, #%2";
+    }
+    return "";
+  }
+  [(set_attr "type" "neon_int_to_fp_<Vetype>,f_cvti2f")
+   (set_attr "arch" "simd,fp")]
+)
+
+(define_insn "*aarch64_<su_optab>cvtf_<fcvt_iesize>_<GPF:mode>2_mult"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+	(mult:GPF (FLOATUORS:GPF
+		   (match_operand:<FCVT_IESIZE> 1 "register_operand" "r"))
+		   (match_operand 2 "aarch64_fp_pow2_recip" "Dt")))]
+  "TARGET_FLOAT"
+  {
+    operands[2] = GEN_INT (aarch64_fpconst_pow2_recip (operands[2]));
+    return "<su_optab>cvtf\t%<GPF:s>0, %<w2>1, #%2";
+  }
+  [(set_attr "type" "f_cvti2f")]
+)
+
 (define_insn "<optab><fcvt_target><GPF:mode>2"
   [(set (match_operand:GPF 0 "register_operand" "=w,w")
         (FLOATUORS:GPF (match_operand:<FCVT_TARGET> 1 "register_operand" "w,?r")))]
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 21f9549e660..a7731a033ea 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -329,6 +329,13 @@
       (match_test "aarch64_simd_scalar_immediate_valid_for_move (op,
 						 QImode)")))
 
+(define_constraint "Dt"
+  "@internal
+ A const_double which is the reciprocal of an exact power of two, can be
+ used in an scvtf with fract bits operation"
+ (and (match_code "const_double")
+      (match_test "aarch64_fpconst_pow2_recip (op)")))
+
 (define_constraint "Dl"
   "@internal
  A constraint that matches vector of immediates for left shifts."
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 10100ca830a..993289014f0 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -98,6 +98,10 @@
   (and (match_code "const_double")
 	(match_test "aarch64_fpconst_pow_of_2 (op) > 0")))
 
+(define_predicate "aarch64_fp_pow2_recip"
+  (and (match_code "const_double")
+       (match_test "aarch64_fpconst_pow2_recip (op)")))
+
 (define_predicate "aarch64_fp_vec_pow2"
   (match_test "aarch64_vec_fpconst_pow_of_2 (op) > 0"))
 
diff --git a/gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c b/gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c
new file mode 100644
index 00000000000..e8d1de6279b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c
@@ -0,0 +1,140 @@
+/* { dg-do run } */
+/* { dg-options "-save-temps -O2 -fno-inline" } */
+
+#define FUNC_DEFS(__a)			\
+	float				\
+fsfoo##__a (int x)			\
+{					\
+	return ((float) x)/(1u << __a);	\
+}					\
+float					\
+fusfoo##__a (unsigned int x)		\
+{					\
+	return ((float) x)/(1u << __a);	\
+}					\
+float					\
+fslfoo##__a (long x)			\
+{					\
+	return ((float) x)/(1u << __a);	\
+}					\
+float					\
+fulfoo##__a (unsigned long x)		\
+{					\
+	return ((float) x)/(1u << __a);	\
+}					\
+
+#define FUNC_DEFD(__a)			\
+double					\
+dsfoo##__a (int x)			\
+{					\
+	return ((double) x)/(1u << __a);\
+}					\
+double					\
+dusfoo##__a (unsigned int x)		\
+{					\
+	return ((double) x)/(1u << __a);\
+}					\
+double					\
+dslfoo##__a (long x)			\
+{					\
+	return ((double) x)/(1u << __a);\
+}					\
+double					\
+dulfoo##__a (unsigned long x)		\
+{					\
+	return ((double) x)/(1u << __a);\
+}
+
+FUNC_DEFS (4)
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#4" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#4" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#4" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#4" 1 } } */
+
+FUNC_DEFD (4)
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#4" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#4" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#4" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#4" 1 } } */
+
+FUNC_DEFS (8)
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#8" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#8" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#8" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#8" 1 } } */
+
+FUNC_DEFD (8)
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#8" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#8" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#8" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#8" 1 } } */
+
+FUNC_DEFS (16)
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#16" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#16" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#16" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#16" 1 } } */
+
+FUNC_DEFD (16)
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#16" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#16" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#16" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#16" 1 } } */
+
+FUNC_DEFS (31)
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#31" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#31" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#31" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#31" 1 } } */
+
+FUNC_DEFD (31)
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#31" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#31" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#31" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#31" 1 } } */
+
+#define FUNC_TESTS(__a, __b)						\
+do									\
+{									\
+	if (fsfoo##__a (__b) !=  ((int) i) * (1.0f/(1u << __a)) )	\
+	__builtin_abort ();						\
+	if (fusfoo##__a (__b) != ((int) i) * (1.0f/(1u << __a)) )	\
+	__builtin_abort ();						\
+	if (fslfoo##__a (__b) != ((int) i) * (1.0f/(1u << __a)) )	\
+	__builtin_abort ();						\
+	if (fulfoo##__a (__b) != ((int) i) * (1.0f/(1u << __a)) )	\
+	__builtin_abort ();						\
+} while (0)
+
+#define FUNC_TESTD(__a, __b)						\
+do									\
+{									\
+	if (fsfoo##__a (__b) !=  ((int) i) * (1.0d/(1u << __a)) )	\
+	__builtin_abort ();						\
+	if (fusfoo##__a (__b) != ((int) i) * (1.0d/(1u << __a)) )	\
+	__builtin_abort ();						\
+	if (fslfoo##__a (__b) != ((int) i) * (1.0d/(1u << __a)) )	\
+	__builtin_abort ();						\
+	if (fulfoo##__a (__b) != ((int) i) * (1.0d/(1u << __a)) )	\
+	__builtin_abort ();						\
+} while (0)
+
+	int
+main (void)
+{
+	int i;
+
+	for (i = 0; i < 32; i ++)
+	{
+		FUNC_TESTS (4, i);
+		FUNC_TESTS (8, i);
+		FUNC_TESTS (16, i);
+		FUNC_TESTS (31, i);
+
+		FUNC_TESTD (4, i);
+		FUNC_TESTD (8, i);
+		FUNC_TESTD (16, i);
+		FUNC_TESTD (31, i);
+	}
+	return 0;
+}
-- 
2.17.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [AArch64] Use scvtf fbits option where appropriate
  2019-06-18 12:30       ` Richard Sandiford
@ 2019-06-18 15:34         ` Joel Hutton
  0 siblings, 0 replies; 7+ messages in thread
From: Joel Hutton @ 2019-06-18 15:34 UTC (permalink / raw)
  To: Richard Sandiford; +Cc: Wilco Dijkstra, Richard Earnshaw, nd, GCC Patches

[-- Attachment #1: Type: text/plain, Size: 1431 bytes --]


On 18/06/2019 11:37, Richard Earnshaw (lists) wrote:
> Start sentences with a capital letter.  End them with a full stop.
> "inequal" isn't a word: you probably mean "unequal".

I've fixed this, the iterator is, however defined as 'fcvt_iesize'
and described in the adjacent comment in iterators.md as 'inequal'.
I've addressed your other comments.

On 18/06/2019 13:30, Richard Sandiford wrote:
> Wilco Dijkstra <Wilco.Dijkstra@arm.com> writes:
>>   > +/* If X is a positive CONST_DOUBLE with a value that is the 
>> reciprocal of a
>>   > +   power of 2 (i.e 1/2^n) return the number of float bits. e.g. 
>> for x==(1/2^n)
>>   > +   return n. Otherwise return -1.  */
>>   > +int
>>   > +aarch64_fpconst_pow2_recip (rtx x)
>>   > +{
>>   > +  REAL_VALUE_TYPE r0;
>>   > +
>>   > +  if (!CONST_DOUBLE_P (x))
>>   > +    return -1;
>>> CONST_DOUBLE can be used for things other than floating point.  You
>>> should really check that the mode on the double in is in class 
>>> MODE_FLOAT.
>>   Several other functions (eg aarch64_fpconst_pow_of_2) do the same 
>> since
>> this function is only called with HF/SF/DF mode. We could add an 
>> assert for
>> SCALAR_FLOAT_MODE_P (but then aarch64_fpconst_pow_of_2 should do
>> the same).
> IMO we should leave it as-is.  aarch64.h has:
I've gone with the majority and left it as-is, but I don't have strong 
feelings on it.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-SCVTF-fbits.patch --]
[-- Type: text/x-patch; name="0001-SCVTF-fbits.patch", Size: 10833 bytes --]

From 1e44ef7e999527a0b03316cf0ea002f8d4437052 Mon Sep 17 00:00:00 2001
From: Joel Hutton <Joel.Hutton@arm.com>
Date: Thu, 13 Jun 2019 11:08:56 +0100
Subject: [PATCH] SCVTF fbits

---
 gcc/config/aarch64/aarch64-protos.h           |   1 +
 gcc/config/aarch64/aarch64.c                  |  23 +++
 gcc/config/aarch64/aarch64.md                 |  39 +++++
 gcc/config/aarch64/constraints.md             |   7 +
 gcc/config/aarch64/predicates.md              |   4 +
 gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c | 140 ++++++++++++++++++
 6 files changed, 214 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 1e3b1c91db1..ad1ba458a3f 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -494,6 +494,7 @@ enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx);
 enum reg_class aarch64_regno_regclass (unsigned);
 int aarch64_asm_preferred_eh_data_format (int, int);
 int aarch64_fpconst_pow_of_2 (rtx);
+int aarch64_fpconst_pow2_recip (rtx);
 machine_mode aarch64_hard_regno_caller_save_mode (unsigned, unsigned,
 						       machine_mode);
 int aarch64_uxt_size (int, HOST_WIDE_INT);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 9a035dd9ed8..028da32174d 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -18707,6 +18707,29 @@ aarch64_fpconst_pow_of_2 (rtx x)
   return exact_log2 (real_to_integer (r));
 }
 
+/* If X is a positive CONST_DOUBLE with a value that is the reciprocal of a
+   power of 2 (i.e 1/2^n) return the number of float bits. e.g. for x==(1/2^n)
+   return n. Otherwise return -1.  */
+
+int
+aarch64_fpconst_pow2_recip (rtx x)
+{
+  REAL_VALUE_TYPE r0;
+
+  if (!CONST_DOUBLE_P (x))
+    return -1;
+
+  r0 = *CONST_DOUBLE_REAL_VALUE (x);
+  if (exact_real_inverse (DFmode, &r0)
+      && !REAL_VALUE_NEGATIVE (r0))
+    {
+	int ret = exact_log2 (real_to_integer (&r0));
+	if (ret >= 1 && ret <= 31)
+	    return ret;
+    }
+  return -1;
+}
+
 /* If X is a vector of equal CONST_DOUBLE values and that value is
    Y, return the aarch64_fpconst_pow_of_2 of Y.  Otherwise return -1.  */
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 526c7fb0dab..c7c6a18b0ff 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -6016,6 +6016,44 @@
   [(set_attr "type" "f_cvtf2i")]
 )
 
+;; Equal width integer to fp combine.
+(define_insn "*aarch64_<su_optab>cvtf_<fcvt_target>_<GPF:mode>2_mult"
+  [(set (match_operand:GPF 0 "register_operand" "=w,w")
+	(mult:GPF (FLOATUORS:GPF
+		   (match_operand:<FCVT_TARGET> 1 "register_operand" "w,?r"))
+		   (match_operand:GPF 2 "aarch64_fp_pow2_recip" "Dt,Dt")))]
+  "TARGET_FLOAT"
+  {
+    operands[2] = GEN_INT (aarch64_fpconst_pow2_recip (operands[2]));
+    switch (which_alternative)
+    {
+      case 0:
+	return "<su_optab>cvtf\t%<GPF:s>0, %<s>1, #%2";
+      case 1:
+	return "<su_optab>cvtf\t%<GPF:s>0, %<w1>1, #%2";
+      default:
+	gcc_unreachable ();
+    }
+  }
+  [(set_attr "type" "neon_int_to_fp_<Vetype>,f_cvti2f")
+   (set_attr "arch" "simd,fp")]
+)
+
+;; Unequal width integer to fp combine.
+(define_insn "*aarch64_<su_optab>cvtf_<fcvt_iesize>_<GPF:mode>2_mult"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+	(mult:GPF (FLOATUORS:GPF
+		   (match_operand:<FCVT_IESIZE> 1 "register_operand" "r"))
+		   (match_operand:GPF 2 "aarch64_fp_pow2_recip" "Dt")))]
+  "TARGET_FLOAT"
+  {
+    operands[2] = GEN_INT (aarch64_fpconst_pow2_recip (operands[2]));
+    return "<su_optab>cvtf\t%<GPF:s>0, %<w2>1, #%2";
+  }
+  [(set_attr "type" "f_cvti2f")]
+)
+
+;; Equal width integer to fp conversion.
 (define_insn "<optab><fcvt_target><GPF:mode>2"
   [(set (match_operand:GPF 0 "register_operand" "=w,w")
         (FLOATUORS:GPF (match_operand:<FCVT_TARGET> 1 "register_operand" "w,?r")))]
@@ -6027,6 +6065,7 @@
    (set_attr "arch" "simd,fp")]
 )
 
+;; Unequal width integer to fp conversions.
 (define_insn "<optab><fcvt_iesize><GPF:mode>2"
   [(set (match_operand:GPF 0 "register_operand" "=w")
         (FLOATUORS:GPF (match_operand:<FCVT_IESIZE> 1 "register_operand" "r")))]
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 21f9549e660..b0caa13b435 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -329,6 +329,13 @@
       (match_test "aarch64_simd_scalar_immediate_valid_for_move (op,
 						 QImode)")))
 
+(define_constraint "Dt"
+  "@internal
+ A const_double which is the reciprocal of an exact power of two, can be
+ used in an scvtf with fract bits operation"
+ (and (match_code "const_double")
+      (match_test "aarch64_fpconst_pow2_recip (op) > 0")))
+
 (define_constraint "Dl"
   "@internal
  A constraint that matches vector of immediates for left shifts."
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 10100ca830a..da295981286 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -98,6 +98,10 @@
   (and (match_code "const_double")
 	(match_test "aarch64_fpconst_pow_of_2 (op) > 0")))
 
+(define_predicate "aarch64_fp_pow2_recip"
+  (and (match_code "const_double")
+       (match_test "aarch64_fpconst_pow2_recip (op) > 0")))
+
 (define_predicate "aarch64_fp_vec_pow2"
   (match_test "aarch64_vec_fpconst_pow_of_2 (op) > 0"))
 
diff --git a/gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c b/gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c
new file mode 100644
index 00000000000..e8d1de6279b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c
@@ -0,0 +1,140 @@
+/* { dg-do run } */
+/* { dg-options "-save-temps -O2 -fno-inline" } */
+
+#define FUNC_DEFS(__a)			\
+	float				\
+fsfoo##__a (int x)			\
+{					\
+	return ((float) x)/(1u << __a);	\
+}					\
+float					\
+fusfoo##__a (unsigned int x)		\
+{					\
+	return ((float) x)/(1u << __a);	\
+}					\
+float					\
+fslfoo##__a (long x)			\
+{					\
+	return ((float) x)/(1u << __a);	\
+}					\
+float					\
+fulfoo##__a (unsigned long x)		\
+{					\
+	return ((float) x)/(1u << __a);	\
+}					\
+
+#define FUNC_DEFD(__a)			\
+double					\
+dsfoo##__a (int x)			\
+{					\
+	return ((double) x)/(1u << __a);\
+}					\
+double					\
+dusfoo##__a (unsigned int x)		\
+{					\
+	return ((double) x)/(1u << __a);\
+}					\
+double					\
+dslfoo##__a (long x)			\
+{					\
+	return ((double) x)/(1u << __a);\
+}					\
+double					\
+dulfoo##__a (unsigned long x)		\
+{					\
+	return ((double) x)/(1u << __a);\
+}
+
+FUNC_DEFS (4)
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#4" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#4" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#4" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#4" 1 } } */
+
+FUNC_DEFD (4)
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#4" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#4" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#4" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#4" 1 } } */
+
+FUNC_DEFS (8)
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#8" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#8" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#8" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#8" 1 } } */
+
+FUNC_DEFD (8)
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#8" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#8" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#8" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#8" 1 } } */
+
+FUNC_DEFS (16)
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#16" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#16" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#16" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#16" 1 } } */
+
+FUNC_DEFD (16)
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#16" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#16" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#16" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#16" 1 } } */
+
+FUNC_DEFS (31)
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#31" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#31" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#31" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#31" 1 } } */
+
+FUNC_DEFD (31)
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#31" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#31" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#31" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#31" 1 } } */
+
+#define FUNC_TESTS(__a, __b)						\
+do									\
+{									\
+	if (fsfoo##__a (__b) !=  ((int) i) * (1.0f/(1u << __a)) )	\
+	__builtin_abort ();						\
+	if (fusfoo##__a (__b) != ((int) i) * (1.0f/(1u << __a)) )	\
+	__builtin_abort ();						\
+	if (fslfoo##__a (__b) != ((int) i) * (1.0f/(1u << __a)) )	\
+	__builtin_abort ();						\
+	if (fulfoo##__a (__b) != ((int) i) * (1.0f/(1u << __a)) )	\
+	__builtin_abort ();						\
+} while (0)
+
+#define FUNC_TESTD(__a, __b)						\
+do									\
+{									\
+	if (fsfoo##__a (__b) !=  ((int) i) * (1.0d/(1u << __a)) )	\
+	__builtin_abort ();						\
+	if (fusfoo##__a (__b) != ((int) i) * (1.0d/(1u << __a)) )	\
+	__builtin_abort ();						\
+	if (fslfoo##__a (__b) != ((int) i) * (1.0d/(1u << __a)) )	\
+	__builtin_abort ();						\
+	if (fulfoo##__a (__b) != ((int) i) * (1.0d/(1u << __a)) )	\
+	__builtin_abort ();						\
+} while (0)
+
+	int
+main (void)
+{
+	int i;
+
+	for (i = 0; i < 32; i ++)
+	{
+		FUNC_TESTS (4, i);
+		FUNC_TESTS (8, i);
+		FUNC_TESTS (16, i);
+		FUNC_TESTS (31, i);
+
+		FUNC_TESTD (4, i);
+		FUNC_TESTD (8, i);
+		FUNC_TESTD (16, i);
+		FUNC_TESTD (31, i);
+	}
+	return 0;
+}
-- 
2.17.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [AArch64] Use scvtf fbits option where appropriate
  2019-06-18 11:12     ` Wilco Dijkstra
@ 2019-06-18 12:30       ` Richard Sandiford
  2019-06-18 15:34         ` Joel Hutton
  0 siblings, 1 reply; 7+ messages in thread
From: Richard Sandiford @ 2019-06-18 12:30 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: Richard Earnshaw, Joel Hutton, nd, GCC Patches

Wilco Dijkstra <Wilco.Dijkstra@arm.com> writes:
>  > +/* If X is a positive CONST_DOUBLE with a value that is the reciprocal of a
>  > +   power of 2 (i.e 1/2^n) return the number of float bits. e.g. for x==(1/2^n)
>  > +   return n. Otherwise return -1.  */
>  > +int
>  > +aarch64_fpconst_pow2_recip (rtx x)
>  > +{
>  > +  REAL_VALUE_TYPE r0;
>  > +
>  > +  if (!CONST_DOUBLE_P (x))
>  > +    return -1;
>  
>> CONST_DOUBLE can be used for things other than floating point.  You
>> should really check that the mode on the double in is in class MODE_FLOAT.
>  
> Several other functions (eg aarch64_fpconst_pow_of_2) do the same since
> this function is only called with HF/SF/DF mode. We could add an assert for
> SCALAR_FLOAT_MODE_P (but then aarch64_fpconst_pow_of_2 should do
> the same).

IMO we should leave it as-is.  aarch64.h has:

#define TARGET_SUPPORTS_WIDE_INT 1

which makes it invalid to use CONST_DOUBLE for anything other than
floating-point constants.  The handling of CONST_DOUBLEs with integer
modes is effectively compiled out in key places so it would be very hard
to create one accidentally.  And even if somehow we did, it would fail
noisily in other ways.

So I think it would be redundant to assert that CONST_DOUBLE has a float
mode here, much like we (rightly) don't assert that CONST_VECTORs have
vector modes.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [AArch64] Use scvtf fbits option where appropriate
  2019-06-18 10:37   ` Richard Earnshaw (lists)
@ 2019-06-18 11:12     ` Wilco Dijkstra
  2019-06-18 12:30       ` Richard Sandiford
  0 siblings, 1 reply; 7+ messages in thread
From: Wilco Dijkstra @ 2019-06-18 11:12 UTC (permalink / raw)
  To: Richard Earnshaw, Joel Hutton; +Cc: nd, GCC Patches

Hi,

And a few more comments:

 > +/* If X is a positive CONST_DOUBLE with a value that is the reciprocal of a
 > +   power of 2 (i.e 1/2^n) return the number of float bits. e.g. for x==(1/2^n)
 > +   return n. Otherwise return -1.  */
 > +int
 > +aarch64_fpconst_pow2_recip (rtx x)
 > +{
 > +  REAL_VALUE_TYPE r0;
 > +
 > +  if (!CONST_DOUBLE_P (x))
 > +    return -1;
 
> CONST_DOUBLE can be used for things other than floating point.  You
> should really check that the mode on the double in is in class MODE_FLOAT.
 
Several other functions (eg aarch64_fpconst_pow_of_2) do the same since
this function is only called with HF/SF/DF mode. We could add an assert for
SCALAR_FLOAT_MODE_P (but then aarch64_fpconst_pow_of_2 should do
the same).

 > +
 > +  r0 = *CONST_DOUBLE_REAL_VALUE (x);
 > +  if (exact_real_inverse (DFmode, &r0)
 > +      && !REAL_VALUE_NEGATIVE (r0))
 > +    {
 > +     int ret = exact_log2 (real_to_integer (&r0));
 > +     if (ret >= 1 && ret <= 31)
 > +       {
 > +         return ret;
 > +       }

Redundant braces

 > +     else
 > +       {
 > +         return -1;
 > +       }

The else is redundant because...

 > +    }
 > +  return -1;

... of this.

 > +}
 > +
 >  /* If X is a vector of equal CONST_DOUBLE values and that value is
 >     Y, return the aarch64_fpconst_pow_of_2 of Y.  Otherwise return -1.  */
 >  
 > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
 > index 526c7fb0dab..d9812aa238e 100644
 > --- a/gcc/config/aarch64/aarch64.md
 > +++ b/gcc/config/aarch64/aarch64.md
 > @@ -6016,6 +6016,44 @@
 >    [(set_attr "type" "f_cvtf2i")]
 >  )
 >  
 > +;; equal width integer to fp combine
 > +(define_insn "*aarch64_<su_optab>cvtf_<fcvt_target>_<GPF:mode>2_mult"
 > +  [(set (match_operand:GPF 0 "register_operand" "=w,w")
 > +     (mult:GPF (FLOATUORS:GPF
 > +                (match_operand:<FCVT_TARGET> 1 "register_operand" "w,?r"))
 > +                (match_operand 2 "aarch64_fp_pow2_recip""Dt,Dt")))]
 
 > Missing mode on operand 2.  Missing white space between constraint and
 > predicate.

Yes, operand 2 should use GPF as well (odd this doesn't give a warning at least).

Also the indentation is off - the multiply operands should be indented to the
same level - match operand 1 should be indented more to the right.

Wilco

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [AArch64] Use scvtf fbits option where appropriate
  2019-06-18  9:11 ` Joel Hutton
@ 2019-06-18 10:37   ` Richard Earnshaw (lists)
  2019-06-18 11:12     ` Wilco Dijkstra
  0 siblings, 1 reply; 7+ messages in thread
From: Richard Earnshaw (lists) @ 2019-06-18 10:37 UTC (permalink / raw)
  To: Joel Hutton, Wilco Dijkstra; +Cc: nd, GCC Patches

On 18/06/2019 10:11, Joel Hutton wrote:
> Hi,
> 
> On 13/06/2019 18:26, Wilco Dijkstra wrote:
>> Wouldn't it be easier to just do exact_log2 (real_to_integer (&r0))
>> and then check the range is in 1..31?
> I've revised this section.
>> --- a/gcc/config/aarch64/aarch64.md
>> +++ b/gcc/config/aarch64/aarch64.md
>> @@ -6016,6 +6016,40 @@
>>     [(set_attr "type" "f_cvtf2i")]
>>   )
>>   
>> +(define_insn "*aarch64_<su_optab>cvtf_<fcvt_target>_<GPF:mode>2_mult"
>> +  [(set (match_operand:GPF 0 "register_operand" "=w,w")
>> +	(mult:GPF (FLOATUORS:GPF
>> +		   (match_operand:<FCVT_TARGET> 1 "register_operand" "w,?r"))
>> +		   (match_operand 2 "aarch64_fp_pow2_recip""Dt,Dt")))]
>>
>> We should add a comment before both define_insn similar to the other
>> conversions, explaining what they do and why there are 2 separate patterns
>> (the default versions of the conversions appear to be missing a comment too).
> I've added comments to the new and existing patterns
> 
> 
> 0001-SCVTF-fbits.patch
> 
> From 5a9dfa6c6eb1c5b9c8c464780b7098058989d472 Mon Sep 17 00:00:00 2001
> From: Joel Hutton <Joel.Hutton@arm.com>
> Date: Thu, 13 Jun 2019 11:08:56 +0100
> Subject: [PATCH] SCVTF fbits
> 
> ---
>  gcc/config/aarch64/aarch64-protos.h           |   1 +
>  gcc/config/aarch64/aarch64.c                  |  28 ++++
>  gcc/config/aarch64/aarch64.md                 |  39 +++++
>  gcc/config/aarch64/constraints.md             |   7 +
>  gcc/config/aarch64/predicates.md              |   4 +
>  gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c | 140 ++++++++++++++++++
>  6 files changed, 219 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c
> 
> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
> index 1e3b1c91db1..ad1ba458a3f 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -494,6 +494,7 @@ enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx);
>  enum reg_class aarch64_regno_regclass (unsigned);
>  int aarch64_asm_preferred_eh_data_format (int, int);
>  int aarch64_fpconst_pow_of_2 (rtx);
> +int aarch64_fpconst_pow2_recip (rtx);
>  machine_mode aarch64_hard_regno_caller_save_mode (unsigned, unsigned,
>  						       machine_mode);
>  int aarch64_uxt_size (int, HOST_WIDE_INT);
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 9a035dd9ed8..424ca6c9932 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -18707,6 +18707,34 @@ aarch64_fpconst_pow_of_2 (rtx x)
>    return exact_log2 (real_to_integer (r));
>  }
>  
> +/* If X is a positive CONST_DOUBLE with a value that is the reciprocal of a
> +   power of 2 (i.e 1/2^n) return the number of float bits. e.g. for x==(1/2^n)
> +   return n. Otherwise return -1.  */
> +int
> +aarch64_fpconst_pow2_recip (rtx x)
> +{
> +  REAL_VALUE_TYPE r0;
> +
> +  if (!CONST_DOUBLE_P (x))
> +    return -1;

CONST_DOUBLE can be used for things other than floating point.  You
should really check that the mode on the double in is in class MODE_FLOAT.

> +
> +  r0 = *CONST_DOUBLE_REAL_VALUE (x);
> +  if (exact_real_inverse (DFmode, &r0)
> +      && !REAL_VALUE_NEGATIVE (r0))
> +    {
> +	int ret = exact_log2 (real_to_integer (&r0));
> +	if (ret >= 1 && ret <= 31)
> +	  {
> +	    return ret;
> +	  }
> +	else
> +	  {
> +	    return -1;
> +	  }
> +    }
> +  return -1;
> +}
> +
>  /* If X is a vector of equal CONST_DOUBLE values and that value is
>     Y, return the aarch64_fpconst_pow_of_2 of Y.  Otherwise return -1.  */
>  
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 526c7fb0dab..d9812aa238e 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -6016,6 +6016,44 @@
>    [(set_attr "type" "f_cvtf2i")]
>  )
>  
> +;; equal width integer to fp combine
> +(define_insn "*aarch64_<su_optab>cvtf_<fcvt_target>_<GPF:mode>2_mult"
> +  [(set (match_operand:GPF 0 "register_operand" "=w,w")
> +	(mult:GPF (FLOATUORS:GPF
> +		   (match_operand:<FCVT_TARGET> 1 "register_operand" "w,?r"))
> +		   (match_operand 2 "aarch64_fp_pow2_recip""Dt,Dt")))]

Missing mode on operand 2.  Missing white space between constraint and
predicate.

> +  "TARGET_FLOAT"
> +  {
> +    operands[2] = GEN_INT (aarch64_fpconst_pow2_recip (operands[2]));
> +    switch (which_alternative)
> +    {
> +      case 0:
> +	return "<su_optab>cvtf\t%<GPF:s>0, %<s>1, #%2";
> +      case 1:
> +	return "<su_optab>cvtf\t%<GPF:s>0, %<w1>1, #%2";
> +      default:
> +	gcc_unreachable();
> +    }
> +  }
> +  [(set_attr "type" "neon_int_to_fp_<Vetype>,f_cvti2f")
> +   (set_attr "arch" "simd,fp")]
> +)
> +
> +;; inequal width integer to fp combine
> +(define_insn "*aarch64_<su_optab>cvtf_<fcvt_iesize>_<GPF:mode>2_mult"
> +  [(set (match_operand:GPF 0 "register_operand" "=w")
> +	(mult:GPF (FLOATUORS:GPF
> +		   (match_operand:<FCVT_IESIZE> 1 "register_operand" "r"))
> +		   (match_operand 2 "aarch64_fp_pow2_recip" "Dt")))]

Likewise.

> +  "TARGET_FLOAT"
> +  {
> +    operands[2] = GEN_INT (aarch64_fpconst_pow2_recip (operands[2]));
> +    return "<su_optab>cvtf\t%<GPF:s>0, %<w2>1, #%2";
> +  }
> +  [(set_attr "type" "f_cvti2f")]
> +)
> +
> +;; equal width integer to fp conversion
>  (define_insn "<optab><fcvt_target><GPF:mode>2"
>    [(set (match_operand:GPF 0 "register_operand" "=w,w")
>          (FLOATUORS:GPF (match_operand:<FCVT_TARGET> 1 "register_operand" "w,?r")))]
> @@ -6027,6 +6065,7 @@
>     (set_attr "arch" "simd,fp")]
>  )
>  
> +;; inequal width integer to fp conversions

Start sentences with a capital letter.  End them with a full stop.
"inequal" isn't a word: you probably mean "unequal".

>  (define_insn "<optab><fcvt_iesize><GPF:mode>2"
>    [(set (match_operand:GPF 0 "register_operand" "=w")
>          (FLOATUORS:GPF (match_operand:<FCVT_IESIZE> 1 "register_operand" "r")))]
> diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
> index 21f9549e660..a7731a033ea 100644
> --- a/gcc/config/aarch64/constraints.md
> +++ b/gcc/config/aarch64/constraints.md
> @@ -329,6 +329,13 @@
>        (match_test "aarch64_simd_scalar_immediate_valid_for_move (op,
>  						 QImode)")))
>  
> +(define_constraint "Dt"
> +  "@internal
> + A const_double which is the reciprocal of an exact power of two, can be
> + used in an scvtf with fract bits operation"
> + (and (match_code "const_double")
> +      (match_test "aarch64_fpconst_pow2_recip (op)")))

The test returns -1 on failure, but you're using this as a boolean
predicate (ie != 0).

R.


> +
>  (define_constraint "Dl"
>    "@internal
>   A constraint that matches vector of immediates for left shifts."
> diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
> index 10100ca830a..da295981286 100644
> --- a/gcc/config/aarch64/predicates.md
> +++ b/gcc/config/aarch64/predicates.md
> @@ -98,6 +98,10 @@
>    (and (match_code "const_double")
>  	(match_test "aarch64_fpconst_pow_of_2 (op) > 0")))
>  
> +(define_predicate "aarch64_fp_pow2_recip"
> +  (and (match_code "const_double")
> +       (match_test "aarch64_fpconst_pow2_recip (op) > 0")))
> +
>  (define_predicate "aarch64_fp_vec_pow2"
>    (match_test "aarch64_vec_fpconst_pow_of_2 (op) > 0"))
>  
> diff --git a/gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c b/gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c
> new file mode 100644
> index 00000000000..e8d1de6279b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c
> @@ -0,0 +1,140 @@
> +/* { dg-do run } */
> +/* { dg-options "-save-temps -O2 -fno-inline" } */
> +
> +#define FUNC_DEFS(__a)			\
> +	float				\
> +fsfoo##__a (int x)			\
> +{					\
> +	return ((float) x)/(1u << __a);	\
> +}					\
> +float					\
> +fusfoo##__a (unsigned int x)		\
> +{					\
> +	return ((float) x)/(1u << __a);	\
> +}					\
> +float					\
> +fslfoo##__a (long x)			\
> +{					\
> +	return ((float) x)/(1u << __a);	\
> +}					\
> +float					\
> +fulfoo##__a (unsigned long x)		\
> +{					\
> +	return ((float) x)/(1u << __a);	\
> +}					\
> +
> +#define FUNC_DEFD(__a)			\
> +double					\
> +dsfoo##__a (int x)			\
> +{					\
> +	return ((double) x)/(1u << __a);\
> +}					\
> +double					\
> +dusfoo##__a (unsigned int x)		\
> +{					\
> +	return ((double) x)/(1u << __a);\
> +}					\
> +double					\
> +dslfoo##__a (long x)			\
> +{					\
> +	return ((double) x)/(1u << __a);\
> +}					\
> +double					\
> +dulfoo##__a (unsigned long x)		\
> +{					\
> +	return ((double) x)/(1u << __a);\
> +}
> +
> +FUNC_DEFS (4)
> +	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#4" 1 } } */
> +	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#4" 1 } } */
> +	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#4" 1 } } */
> +	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#4" 1 } } */
> +
> +FUNC_DEFD (4)
> +	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#4" 1 } } */
> +	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#4" 1 } } */
> +	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#4" 1 } } */
> +	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#4" 1 } } */
> +
> +FUNC_DEFS (8)
> +	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#8" 1 } } */
> +	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#8" 1 } } */
> +	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#8" 1 } } */
> +	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#8" 1 } } */
> +
> +FUNC_DEFD (8)
> +	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#8" 1 } } */
> +	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#8" 1 } } */
> +	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#8" 1 } } */
> +	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#8" 1 } } */
> +
> +FUNC_DEFS (16)
> +	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#16" 1 } } */
> +	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#16" 1 } } */
> +	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#16" 1 } } */
> +	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#16" 1 } } */
> +
> +FUNC_DEFD (16)
> +	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#16" 1 } } */
> +	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#16" 1 } } */
> +	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#16" 1 } } */
> +	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#16" 1 } } */
> +
> +FUNC_DEFS (31)
> +	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#31" 1 } } */
> +	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#31" 1 } } */
> +	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#31" 1 } } */
> +	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#31" 1 } } */
> +
> +FUNC_DEFD (31)
> +	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#31" 1 } } */
> +	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#31" 1 } } */
> +	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#31" 1 } } */
> +	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#31" 1 } } */
> +
> +#define FUNC_TESTS(__a, __b)						\
> +do									\
> +{									\
> +	if (fsfoo##__a (__b) !=  ((int) i) * (1.0f/(1u << __a)) )	\
> +	__builtin_abort ();						\
> +	if (fusfoo##__a (__b) != ((int) i) * (1.0f/(1u << __a)) )	\
> +	__builtin_abort ();						\
> +	if (fslfoo##__a (__b) != ((int) i) * (1.0f/(1u << __a)) )	\
> +	__builtin_abort ();						\
> +	if (fulfoo##__a (__b) != ((int) i) * (1.0f/(1u << __a)) )	\
> +	__builtin_abort ();						\
> +} while (0)
> +
> +#define FUNC_TESTD(__a, __b)						\
> +do									\
> +{									\
> +	if (fsfoo##__a (__b) !=  ((int) i) * (1.0d/(1u << __a)) )	\
> +	__builtin_abort ();						\
> +	if (fusfoo##__a (__b) != ((int) i) * (1.0d/(1u << __a)) )	\
> +	__builtin_abort ();						\
> +	if (fslfoo##__a (__b) != ((int) i) * (1.0d/(1u << __a)) )	\
> +	__builtin_abort ();						\
> +	if (fulfoo##__a (__b) != ((int) i) * (1.0d/(1u << __a)) )	\
> +	__builtin_abort ();						\
> +} while (0)
> +
> +	int
> +main (void)
> +{
> +	int i;
> +
> +	for (i = 0; i < 32; i ++)
> +	{
> +		FUNC_TESTS (4, i);
> +		FUNC_TESTS (8, i);
> +		FUNC_TESTS (16, i);
> +		FUNC_TESTS (31, i);
> +
> +		FUNC_TESTD (4, i);
> +		FUNC_TESTD (8, i);
> +		FUNC_TESTD (16, i);
> +		FUNC_TESTD (31, i);
> +	}
> +	return 0;
> +}
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [AArch64] Use scvtf fbits option where appropriate
  2019-06-13 17:26 Wilco Dijkstra
@ 2019-06-18  9:11 ` Joel Hutton
  2019-06-18 10:37   ` Richard Earnshaw (lists)
  0 siblings, 1 reply; 7+ messages in thread
From: Joel Hutton @ 2019-06-18  9:11 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: nd, GCC Patches

[-- Attachment #1: Type: text/plain, Size: 927 bytes --]

Hi,

On 13/06/2019 18:26, Wilco Dijkstra wrote:
> Wouldn't it be easier to just do exact_log2 (real_to_integer (&r0))
> and then check the range is in 1..31?
I've revised this section.
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -6016,6 +6016,40 @@
>     [(set_attr "type" "f_cvtf2i")]
>   )
>   
> +(define_insn "*aarch64_<su_optab>cvtf_<fcvt_target>_<GPF:mode>2_mult"
> +  [(set (match_operand:GPF 0 "register_operand" "=w,w")
> +	(mult:GPF (FLOATUORS:GPF
> +		   (match_operand:<FCVT_TARGET> 1 "register_operand" "w,?r"))
> +		   (match_operand 2 "aarch64_fp_pow2_recip""Dt,Dt")))]
>
> We should add a comment before both define_insn similar to the other
> conversions, explaining what they do and why there are 2 separate patterns
> (the default versions of the conversions appear to be missing a comment too).
I've added comments to the new and existing patterns

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-SCVTF-fbits.patch --]
[-- Type: text/x-patch; name="0001-SCVTF-fbits.patch", Size: 10867 bytes --]

From 5a9dfa6c6eb1c5b9c8c464780b7098058989d472 Mon Sep 17 00:00:00 2001
From: Joel Hutton <Joel.Hutton@arm.com>
Date: Thu, 13 Jun 2019 11:08:56 +0100
Subject: [PATCH] SCVTF fbits

---
 gcc/config/aarch64/aarch64-protos.h           |   1 +
 gcc/config/aarch64/aarch64.c                  |  28 ++++
 gcc/config/aarch64/aarch64.md                 |  39 +++++
 gcc/config/aarch64/constraints.md             |   7 +
 gcc/config/aarch64/predicates.md              |   4 +
 gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c | 140 ++++++++++++++++++
 6 files changed, 219 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 1e3b1c91db1..ad1ba458a3f 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -494,6 +494,7 @@ enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx);
 enum reg_class aarch64_regno_regclass (unsigned);
 int aarch64_asm_preferred_eh_data_format (int, int);
 int aarch64_fpconst_pow_of_2 (rtx);
+int aarch64_fpconst_pow2_recip (rtx);
 machine_mode aarch64_hard_regno_caller_save_mode (unsigned, unsigned,
 						       machine_mode);
 int aarch64_uxt_size (int, HOST_WIDE_INT);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 9a035dd9ed8..424ca6c9932 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -18707,6 +18707,34 @@ aarch64_fpconst_pow_of_2 (rtx x)
   return exact_log2 (real_to_integer (r));
 }
 
+/* If X is a positive CONST_DOUBLE with a value that is the reciprocal of a
+   power of 2 (i.e 1/2^n) return the number of float bits. e.g. for x==(1/2^n)
+   return n. Otherwise return -1.  */
+int
+aarch64_fpconst_pow2_recip (rtx x)
+{
+  REAL_VALUE_TYPE r0;
+
+  if (!CONST_DOUBLE_P (x))
+    return -1;
+
+  r0 = *CONST_DOUBLE_REAL_VALUE (x);
+  if (exact_real_inverse (DFmode, &r0)
+      && !REAL_VALUE_NEGATIVE (r0))
+    {
+	int ret = exact_log2 (real_to_integer (&r0));
+	if (ret >= 1 && ret <= 31)
+	  {
+	    return ret;
+	  }
+	else
+	  {
+	    return -1;
+	  }
+    }
+  return -1;
+}
+
 /* If X is a vector of equal CONST_DOUBLE values and that value is
    Y, return the aarch64_fpconst_pow_of_2 of Y.  Otherwise return -1.  */
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 526c7fb0dab..d9812aa238e 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -6016,6 +6016,44 @@
   [(set_attr "type" "f_cvtf2i")]
 )
 
+;; equal width integer to fp combine
+(define_insn "*aarch64_<su_optab>cvtf_<fcvt_target>_<GPF:mode>2_mult"
+  [(set (match_operand:GPF 0 "register_operand" "=w,w")
+	(mult:GPF (FLOATUORS:GPF
+		   (match_operand:<FCVT_TARGET> 1 "register_operand" "w,?r"))
+		   (match_operand 2 "aarch64_fp_pow2_recip""Dt,Dt")))]
+  "TARGET_FLOAT"
+  {
+    operands[2] = GEN_INT (aarch64_fpconst_pow2_recip (operands[2]));
+    switch (which_alternative)
+    {
+      case 0:
+	return "<su_optab>cvtf\t%<GPF:s>0, %<s>1, #%2";
+      case 1:
+	return "<su_optab>cvtf\t%<GPF:s>0, %<w1>1, #%2";
+      default:
+	gcc_unreachable();
+    }
+  }
+  [(set_attr "type" "neon_int_to_fp_<Vetype>,f_cvti2f")
+   (set_attr "arch" "simd,fp")]
+)
+
+;; inequal width integer to fp combine
+(define_insn "*aarch64_<su_optab>cvtf_<fcvt_iesize>_<GPF:mode>2_mult"
+  [(set (match_operand:GPF 0 "register_operand" "=w")
+	(mult:GPF (FLOATUORS:GPF
+		   (match_operand:<FCVT_IESIZE> 1 "register_operand" "r"))
+		   (match_operand 2 "aarch64_fp_pow2_recip" "Dt")))]
+  "TARGET_FLOAT"
+  {
+    operands[2] = GEN_INT (aarch64_fpconst_pow2_recip (operands[2]));
+    return "<su_optab>cvtf\t%<GPF:s>0, %<w2>1, #%2";
+  }
+  [(set_attr "type" "f_cvti2f")]
+)
+
+;; equal width integer to fp conversion
 (define_insn "<optab><fcvt_target><GPF:mode>2"
   [(set (match_operand:GPF 0 "register_operand" "=w,w")
         (FLOATUORS:GPF (match_operand:<FCVT_TARGET> 1 "register_operand" "w,?r")))]
@@ -6027,6 +6065,7 @@
    (set_attr "arch" "simd,fp")]
 )
 
+;; inequal width integer to fp conversions
 (define_insn "<optab><fcvt_iesize><GPF:mode>2"
   [(set (match_operand:GPF 0 "register_operand" "=w")
         (FLOATUORS:GPF (match_operand:<FCVT_IESIZE> 1 "register_operand" "r")))]
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 21f9549e660..a7731a033ea 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -329,6 +329,13 @@
       (match_test "aarch64_simd_scalar_immediate_valid_for_move (op,
 						 QImode)")))
 
+(define_constraint "Dt"
+  "@internal
+ A const_double which is the reciprocal of an exact power of two, can be
+ used in an scvtf with fract bits operation"
+ (and (match_code "const_double")
+      (match_test "aarch64_fpconst_pow2_recip (op)")))
+
 (define_constraint "Dl"
   "@internal
  A constraint that matches vector of immediates for left shifts."
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 10100ca830a..da295981286 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -98,6 +98,10 @@
   (and (match_code "const_double")
 	(match_test "aarch64_fpconst_pow_of_2 (op) > 0")))
 
+(define_predicate "aarch64_fp_pow2_recip"
+  (and (match_code "const_double")
+       (match_test "aarch64_fpconst_pow2_recip (op) > 0")))
+
 (define_predicate "aarch64_fp_vec_pow2"
   (match_test "aarch64_vec_fpconst_pow_of_2 (op) > 0"))
 
diff --git a/gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c b/gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c
new file mode 100644
index 00000000000..e8d1de6279b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c
@@ -0,0 +1,140 @@
+/* { dg-do run } */
+/* { dg-options "-save-temps -O2 -fno-inline" } */
+
+#define FUNC_DEFS(__a)			\
+	float				\
+fsfoo##__a (int x)			\
+{					\
+	return ((float) x)/(1u << __a);	\
+}					\
+float					\
+fusfoo##__a (unsigned int x)		\
+{					\
+	return ((float) x)/(1u << __a);	\
+}					\
+float					\
+fslfoo##__a (long x)			\
+{					\
+	return ((float) x)/(1u << __a);	\
+}					\
+float					\
+fulfoo##__a (unsigned long x)		\
+{					\
+	return ((float) x)/(1u << __a);	\
+}					\
+
+#define FUNC_DEFD(__a)			\
+double					\
+dsfoo##__a (int x)			\
+{					\
+	return ((double) x)/(1u << __a);\
+}					\
+double					\
+dusfoo##__a (unsigned int x)		\
+{					\
+	return ((double) x)/(1u << __a);\
+}					\
+double					\
+dslfoo##__a (long x)			\
+{					\
+	return ((double) x)/(1u << __a);\
+}					\
+double					\
+dulfoo##__a (unsigned long x)		\
+{					\
+	return ((double) x)/(1u << __a);\
+}
+
+FUNC_DEFS (4)
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#4" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#4" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#4" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#4" 1 } } */
+
+FUNC_DEFD (4)
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#4" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#4" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#4" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#4" 1 } } */
+
+FUNC_DEFS (8)
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#8" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#8" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#8" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#8" 1 } } */
+
+FUNC_DEFD (8)
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#8" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#8" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#8" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#8" 1 } } */
+
+FUNC_DEFS (16)
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#16" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#16" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#16" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#16" 1 } } */
+
+FUNC_DEFD (16)
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#16" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#16" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#16" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#16" 1 } } */
+
+FUNC_DEFS (31)
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#31" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#31" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#31" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#31" 1 } } */
+
+FUNC_DEFD (31)
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#31" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#31" 1 } } */
+	/* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#31" 1 } } */
+	/* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#31" 1 } } */
+
+#define FUNC_TESTS(__a, __b)						\
+do									\
+{									\
+	if (fsfoo##__a (__b) !=  ((int) i) * (1.0f/(1u << __a)) )	\
+	__builtin_abort ();						\
+	if (fusfoo##__a (__b) != ((int) i) * (1.0f/(1u << __a)) )	\
+	__builtin_abort ();						\
+	if (fslfoo##__a (__b) != ((int) i) * (1.0f/(1u << __a)) )	\
+	__builtin_abort ();						\
+	if (fulfoo##__a (__b) != ((int) i) * (1.0f/(1u << __a)) )	\
+	__builtin_abort ();						\
+} while (0)
+
+#define FUNC_TESTD(__a, __b)						\
+do									\
+{									\
+	if (fsfoo##__a (__b) !=  ((int) i) * (1.0d/(1u << __a)) )	\
+	__builtin_abort ();						\
+	if (fusfoo##__a (__b) != ((int) i) * (1.0d/(1u << __a)) )	\
+	__builtin_abort ();						\
+	if (fslfoo##__a (__b) != ((int) i) * (1.0d/(1u << __a)) )	\
+	__builtin_abort ();						\
+	if (fulfoo##__a (__b) != ((int) i) * (1.0d/(1u << __a)) )	\
+	__builtin_abort ();						\
+} while (0)
+
+	int
+main (void)
+{
+	int i;
+
+	for (i = 0; i < 32; i ++)
+	{
+		FUNC_TESTS (4, i);
+		FUNC_TESTS (8, i);
+		FUNC_TESTS (16, i);
+		FUNC_TESTS (31, i);
+
+		FUNC_TESTD (4, i);
+		FUNC_TESTD (8, i);
+		FUNC_TESTD (16, i);
+		FUNC_TESTD (31, i);
+	}
+	return 0;
+}
-- 
2.17.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [AArch64] Use scvtf fbits option where appropriate
@ 2019-06-13 17:26 Wilco Dijkstra
  2019-06-18  9:11 ` Joel Hutton
  0 siblings, 1 reply; 7+ messages in thread
From: Wilco Dijkstra @ 2019-06-13 17:26 UTC (permalink / raw)
  To: Joel Hutton; +Cc: nd, GCC Patches

Hi Joel,

A few comments below:

+/* If X is a positive CONST_DOUBLE with a value that is the reciprocal of a
+   power of 2 (i.e 1/2^n) return the number of float bits. e.g. for x==(1/2^n)
+   return log2 (n). Otherwise return 0.  */
+int
+aarch64_fpconst_pow2_recip (rtx x)
+{
+  REAL_VALUE_TYPE r0;
+
+  if (!CONST_DOUBLE_P (x))
+    return 0;
+
+  r0 = *CONST_DOUBLE_REAL_VALUE (x);
+  if (exact_real_inverse (DFmode, &r0)
+      && !REAL_VALUE_NEGATIVE (r0))
+    {
+      if (exact_real_truncate (DFmode, &r0))

Truncate to double? That doesn't do anything...

+	{
+	  HOST_WIDE_INT value = real_to_integer (&r0);
+	  value = value & 0xffffffff;
+	  if ((value != 0) && ( (value & (value - 1)) == 0))
+	    {
+	      int ret = exact_log2 (value);
+	      gcc_assert (IN_RANGE (ret, 0, 31));
+	      return ret;
+	    }

Wouldn't it be easier to just do exact_log2 (real_to_integer (&r0))
and then check the range is in 1..31?

--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -6016,6 +6016,40 @@
   [(set_attr "type" "f_cvtf2i")]
 )
 
+(define_insn "*aarch64_<su_optab>cvtf_<fcvt_target>_<GPF:mode>2_mult"
+  [(set (match_operand:GPF 0 "register_operand" "=w,w")
+	(mult:GPF (FLOATUORS:GPF
+		   (match_operand:<FCVT_TARGET> 1 "register_operand" "w,?r"))
+		   (match_operand 2 "aarch64_fp_pow2_recip""Dt,Dt")))]

We should add a comment before both define_insn similar to the other
conversions, explaining what they do and why there are 2 separate patterns
(the default versions of the conversions appear to be missing a comment too).

Wilco


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2019-06-18 15:34 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-13 16:04 [AArch64] Use scvtf fbits option where appropriate Joel Hutton
2019-06-13 17:26 Wilco Dijkstra
2019-06-18  9:11 ` Joel Hutton
2019-06-18 10:37   ` Richard Earnshaw (lists)
2019-06-18 11:12     ` Wilco Dijkstra
2019-06-18 12:30       ` Richard Sandiford
2019-06-18 15:34         ` Joel Hutton

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).