From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 43908 invoked by alias); 18 Jun 2019 10:37:33 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 43899 invoked by uid 89); 18 Jun 2019 10:37:32 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-17.4 required=5.0 tests=AWL,BAYES_00,GIT_PATCH_0,GIT_PATCH_1,GIT_PATCH_2,GIT_PATCH_3,SPF_PASS autolearn=ham version=3.3.1 spammy=white X-HELO: foss.arm.com Received: from foss.arm.com (HELO foss.arm.com) (217.140.110.172) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Tue, 18 Jun 2019 10:37:30 +0000 Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 6913A344; Tue, 18 Jun 2019 03:37:28 -0700 (PDT) Received: from e120077-lin.cambridge.arm.com (e120077-lin.cambridge.arm.com [10.2.206.226]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id 6BDA23F246; Tue, 18 Jun 2019 03:39:13 -0700 (PDT) Subject: Re: [AArch64] Use scvtf fbits option where appropriate To: Joel Hutton , Wilco Dijkstra Cc: nd , GCC Patches References: <3157239f-48e6-bbd6-122c-d173b361bebd@arm.com> From: "Richard Earnshaw (lists)" Openpgp: preference=signencrypt Message-ID: Date: Tue, 18 Jun 2019 10:37:00 -0000 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Thunderbird/60.6.1 MIME-Version: 1.0 In-Reply-To: <3157239f-48e6-bbd6-122c-d173b361bebd@arm.com> Content-Type: text/plain; charset=us-ascii Content-Transfer-Encoding: 7bit X-SW-Source: 2019-06/txt/msg01028.txt.bz2 On 18/06/2019 10:11, Joel Hutton wrote: > Hi, > > On 13/06/2019 18:26, Wilco Dijkstra wrote: >> Wouldn't it be easier to just do exact_log2 (real_to_integer (&r0)) >> and then check the range is in 1..31? > I've revised this section. >> --- a/gcc/config/aarch64/aarch64.md >> +++ b/gcc/config/aarch64/aarch64.md >> @@ -6016,6 +6016,40 @@ >> [(set_attr "type" "f_cvtf2i")] >> ) >> >> +(define_insn "*aarch64_cvtf__2_mult" >> + [(set (match_operand:GPF 0 "register_operand" "=w,w") >> + (mult:GPF (FLOATUORS:GPF >> + (match_operand: 1 "register_operand" "w,?r")) >> + (match_operand 2 "aarch64_fp_pow2_recip""Dt,Dt")))] >> >> We should add a comment before both define_insn similar to the other >> conversions, explaining what they do and why there are 2 separate patterns >> (the default versions of the conversions appear to be missing a comment too). > I've added comments to the new and existing patterns > > > 0001-SCVTF-fbits.patch > > From 5a9dfa6c6eb1c5b9c8c464780b7098058989d472 Mon Sep 17 00:00:00 2001 > From: Joel Hutton > Date: Thu, 13 Jun 2019 11:08:56 +0100 > Subject: [PATCH] SCVTF fbits > > --- > gcc/config/aarch64/aarch64-protos.h | 1 + > gcc/config/aarch64/aarch64.c | 28 ++++ > gcc/config/aarch64/aarch64.md | 39 +++++ > gcc/config/aarch64/constraints.md | 7 + > gcc/config/aarch64/predicates.md | 4 + > gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c | 140 ++++++++++++++++++ > 6 files changed, 219 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c > > diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h > index 1e3b1c91db1..ad1ba458a3f 100644 > --- a/gcc/config/aarch64/aarch64-protos.h > +++ b/gcc/config/aarch64/aarch64-protos.h > @@ -494,6 +494,7 @@ enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx); > enum reg_class aarch64_regno_regclass (unsigned); > int aarch64_asm_preferred_eh_data_format (int, int); > int aarch64_fpconst_pow_of_2 (rtx); > +int aarch64_fpconst_pow2_recip (rtx); > machine_mode aarch64_hard_regno_caller_save_mode (unsigned, unsigned, > machine_mode); > int aarch64_uxt_size (int, HOST_WIDE_INT); > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index 9a035dd9ed8..424ca6c9932 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -18707,6 +18707,34 @@ aarch64_fpconst_pow_of_2 (rtx x) > return exact_log2 (real_to_integer (r)); > } > > +/* If X is a positive CONST_DOUBLE with a value that is the reciprocal of a > + power of 2 (i.e 1/2^n) return the number of float bits. e.g. for x==(1/2^n) > + return n. Otherwise return -1. */ > +int > +aarch64_fpconst_pow2_recip (rtx x) > +{ > + REAL_VALUE_TYPE r0; > + > + if (!CONST_DOUBLE_P (x)) > + return -1; CONST_DOUBLE can be used for things other than floating point. You should really check that the mode on the double in is in class MODE_FLOAT. > + > + r0 = *CONST_DOUBLE_REAL_VALUE (x); > + if (exact_real_inverse (DFmode, &r0) > + && !REAL_VALUE_NEGATIVE (r0)) > + { > + int ret = exact_log2 (real_to_integer (&r0)); > + if (ret >= 1 && ret <= 31) > + { > + return ret; > + } > + else > + { > + return -1; > + } > + } > + return -1; > +} > + > /* If X is a vector of equal CONST_DOUBLE values and that value is > Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */ > > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index 526c7fb0dab..d9812aa238e 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -6016,6 +6016,44 @@ > [(set_attr "type" "f_cvtf2i")] > ) > > +;; equal width integer to fp combine > +(define_insn "*aarch64_cvtf__2_mult" > + [(set (match_operand:GPF 0 "register_operand" "=w,w") > + (mult:GPF (FLOATUORS:GPF > + (match_operand: 1 "register_operand" "w,?r")) > + (match_operand 2 "aarch64_fp_pow2_recip""Dt,Dt")))] Missing mode on operand 2. Missing white space between constraint and predicate. > + "TARGET_FLOAT" > + { > + operands[2] = GEN_INT (aarch64_fpconst_pow2_recip (operands[2])); > + switch (which_alternative) > + { > + case 0: > + return "cvtf\t%0, %1, #%2"; > + case 1: > + return "cvtf\t%0, %1, #%2"; > + default: > + gcc_unreachable(); > + } > + } > + [(set_attr "type" "neon_int_to_fp_,f_cvti2f") > + (set_attr "arch" "simd,fp")] > +) > + > +;; inequal width integer to fp combine > +(define_insn "*aarch64_cvtf__2_mult" > + [(set (match_operand:GPF 0 "register_operand" "=w") > + (mult:GPF (FLOATUORS:GPF > + (match_operand: 1 "register_operand" "r")) > + (match_operand 2 "aarch64_fp_pow2_recip" "Dt")))] Likewise. > + "TARGET_FLOAT" > + { > + operands[2] = GEN_INT (aarch64_fpconst_pow2_recip (operands[2])); > + return "cvtf\t%0, %1, #%2"; > + } > + [(set_attr "type" "f_cvti2f")] > +) > + > +;; equal width integer to fp conversion > (define_insn "2" > [(set (match_operand:GPF 0 "register_operand" "=w,w") > (FLOATUORS:GPF (match_operand: 1 "register_operand" "w,?r")))] > @@ -6027,6 +6065,7 @@ > (set_attr "arch" "simd,fp")] > ) > > +;; inequal width integer to fp conversions Start sentences with a capital letter. End them with a full stop. "inequal" isn't a word: you probably mean "unequal". > (define_insn "2" > [(set (match_operand:GPF 0 "register_operand" "=w") > (FLOATUORS:GPF (match_operand: 1 "register_operand" "r")))] > diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md > index 21f9549e660..a7731a033ea 100644 > --- a/gcc/config/aarch64/constraints.md > +++ b/gcc/config/aarch64/constraints.md > @@ -329,6 +329,13 @@ > (match_test "aarch64_simd_scalar_immediate_valid_for_move (op, > QImode)"))) > > +(define_constraint "Dt" > + "@internal > + A const_double which is the reciprocal of an exact power of two, can be > + used in an scvtf with fract bits operation" > + (and (match_code "const_double") > + (match_test "aarch64_fpconst_pow2_recip (op)"))) The test returns -1 on failure, but you're using this as a boolean predicate (ie != 0). R. > + > (define_constraint "Dl" > "@internal > A constraint that matches vector of immediates for left shifts." > diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md > index 10100ca830a..da295981286 100644 > --- a/gcc/config/aarch64/predicates.md > +++ b/gcc/config/aarch64/predicates.md > @@ -98,6 +98,10 @@ > (and (match_code "const_double") > (match_test "aarch64_fpconst_pow_of_2 (op) > 0"))) > > +(define_predicate "aarch64_fp_pow2_recip" > + (and (match_code "const_double") > + (match_test "aarch64_fpconst_pow2_recip (op) > 0"))) > + > (define_predicate "aarch64_fp_vec_pow2" > (match_test "aarch64_vec_fpconst_pow_of_2 (op) > 0")) > > diff --git a/gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c b/gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c > new file mode 100644 > index 00000000000..e8d1de6279b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/fmul_scvtf.c > @@ -0,0 +1,140 @@ > +/* { dg-do run } */ > +/* { dg-options "-save-temps -O2 -fno-inline" } */ > + > +#define FUNC_DEFS(__a) \ > + float \ > +fsfoo##__a (int x) \ > +{ \ > + return ((float) x)/(1u << __a); \ > +} \ > +float \ > +fusfoo##__a (unsigned int x) \ > +{ \ > + return ((float) x)/(1u << __a); \ > +} \ > +float \ > +fslfoo##__a (long x) \ > +{ \ > + return ((float) x)/(1u << __a); \ > +} \ > +float \ > +fulfoo##__a (unsigned long x) \ > +{ \ > + return ((float) x)/(1u << __a); \ > +} \ > + > +#define FUNC_DEFD(__a) \ > +double \ > +dsfoo##__a (int x) \ > +{ \ > + return ((double) x)/(1u << __a);\ > +} \ > +double \ > +dusfoo##__a (unsigned int x) \ > +{ \ > + return ((double) x)/(1u << __a);\ > +} \ > +double \ > +dslfoo##__a (long x) \ > +{ \ > + return ((double) x)/(1u << __a);\ > +} \ > +double \ > +dulfoo##__a (unsigned long x) \ > +{ \ > + return ((double) x)/(1u << __a);\ > +} > + > +FUNC_DEFS (4) > + /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#4" 1 } } */ > + /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#4" 1 } } */ > + /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#4" 1 } } */ > + /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#4" 1 } } */ > + > +FUNC_DEFD (4) > + /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#4" 1 } } */ > + /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#4" 1 } } */ > + /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#4" 1 } } */ > + /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#4" 1 } } */ > + > +FUNC_DEFS (8) > + /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#8" 1 } } */ > + /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#8" 1 } } */ > + /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#8" 1 } } */ > + /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#8" 1 } } */ > + > +FUNC_DEFD (8) > + /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#8" 1 } } */ > + /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#8" 1 } } */ > + /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#8" 1 } } */ > + /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#8" 1 } } */ > + > +FUNC_DEFS (16) > + /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#16" 1 } } */ > + /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#16" 1 } } */ > + /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#16" 1 } } */ > + /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#16" 1 } } */ > + > +FUNC_DEFD (16) > + /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#16" 1 } } */ > + /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#16" 1 } } */ > + /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#16" 1 } } */ > + /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#16" 1 } } */ > + > +FUNC_DEFS (31) > + /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#31" 1 } } */ > + /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#31" 1 } } */ > + /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#31" 1 } } */ > + /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#31" 1 } } */ > + > +FUNC_DEFD (31) > + /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#31" 1 } } */ > + /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#31" 1 } } */ > + /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#31" 1 } } */ > + /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#31" 1 } } */ > + > +#define FUNC_TESTS(__a, __b) \ > +do \ > +{ \ > + if (fsfoo##__a (__b) != ((int) i) * (1.0f/(1u << __a)) ) \ > + __builtin_abort (); \ > + if (fusfoo##__a (__b) != ((int) i) * (1.0f/(1u << __a)) ) \ > + __builtin_abort (); \ > + if (fslfoo##__a (__b) != ((int) i) * (1.0f/(1u << __a)) ) \ > + __builtin_abort (); \ > + if (fulfoo##__a (__b) != ((int) i) * (1.0f/(1u << __a)) ) \ > + __builtin_abort (); \ > +} while (0) > + > +#define FUNC_TESTD(__a, __b) \ > +do \ > +{ \ > + if (fsfoo##__a (__b) != ((int) i) * (1.0d/(1u << __a)) ) \ > + __builtin_abort (); \ > + if (fusfoo##__a (__b) != ((int) i) * (1.0d/(1u << __a)) ) \ > + __builtin_abort (); \ > + if (fslfoo##__a (__b) != ((int) i) * (1.0d/(1u << __a)) ) \ > + __builtin_abort (); \ > + if (fulfoo##__a (__b) != ((int) i) * (1.0d/(1u << __a)) ) \ > + __builtin_abort (); \ > +} while (0) > + > + int > +main (void) > +{ > + int i; > + > + for (i = 0; i < 32; i ++) > + { > + FUNC_TESTS (4, i); > + FUNC_TESTS (8, i); > + FUNC_TESTS (16, i); > + FUNC_TESTS (31, i); > + > + FUNC_TESTD (4, i); > + FUNC_TESTD (8, i); > + FUNC_TESTD (16, i); > + FUNC_TESTD (31, i); > + } > + return 0; > +} >