From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 107753 invoked by alias); 19 Dec 2017 20:37:47 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 107661 invoked by uid 89); 19 Dec 2017 20:37:47 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-11.7 required=5.0 tests=AWL,BAYES_00,GIT_PATCH_2,GIT_PATCH_3,RCVD_IN_DNSWL_NONE,SPF_PASS autolearn=ham version=3.3.2 spammy=in0, preparation X-HELO: mail-wm0-f51.google.com Received: from mail-wm0-f51.google.com (HELO mail-wm0-f51.google.com) (74.125.82.51) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Tue, 19 Dec 2017 20:37:44 +0000 Received: by mail-wm0-f51.google.com with SMTP id t8so6085995wmc.3 for ; Tue, 19 Dec 2017 12:37:44 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:mail-followup-to:cc:subject:references :date:in-reply-to:message-id:user-agent:mime-version; bh=8WzaRgE+pAIwqiaE1Dsj1UGJiMdondoDs3TsoDDvPDI=; b=t1UgLD7mxOw8c2FGRy1AbUTBFFQceA0n/7zhfdlOvvpF13fK0K878MPyhgkHqHw0+V /+N5sjtsxw3u1nFtC8AAaWP59HHJLiF1sAVs6psmr5lIqUHQTjy4Z0fTsRPpUU4WnO7a DaOeZfkQjQonVHlpRZvw7GmfCiqMolqe5erMmXL9woZHi/9yt5/UZRMNvM6hG/r0BaUG QM9ylAh/acEC73YzT1c/uBgeDHouRG4l99bCdXmZj41lQllqZJtICbvjIrBoH6Jhq6je H7e4P0newf+ca7tSODmKZSf+OCR239cp4Y4pB3zCKUdQDx6F7RzVYm7xQdKtyIUymCXk 4RIQ== X-Gm-Message-State: AKGB3mJDRfS1A1ERN6jCAvrUmaA6WVHndsVs130F/BpRn+2LHuf17S5c KO3mAlp8q27XanGWKdJkkFwTpw== X-Google-Smtp-Source: ACJfBovwKQVJDzlCH/6euNXktMBvGSxtya4mhMhRf3JEs2dF5exBSBNcrNdJJ8d4J4ZMWtVek+uPuA== X-Received: by 10.28.194.139 with SMTP id s133mr4841944wmf.14.1513715862535; Tue, 19 Dec 2017 12:37:42 -0800 (PST) Received: from localhost ([2.25.234.26]) by smtp.gmail.com with ESMTPSA id p13sm18884850wrc.61.2017.12.19.12.37.41 (version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256); Tue, 19 Dec 2017 12:37:41 -0800 (PST) From: Richard Sandiford To: gcc-patches@gcc.gnu.org Mail-Followup-To: gcc-patches@gcc.gnu.org,richard.earnshaw@arm.com, james.greenhalgh@arm.com, marcus.shawcroft@arm.com, richard.sandiford@linaro.org Cc: richard.earnshaw@arm.com, james.greenhalgh@arm.com, marcus.shawcroft@arm.com Subject: Re: [13/13] [AArch64] Use vec_perm_indices helper routines References: <87indfmrgt.fsf@linaro.org> <87shcjjxcc.fsf@linaro.org> Date: Tue, 19 Dec 2017 20:37:00 -0000 In-Reply-To: <87shcjjxcc.fsf@linaro.org> (Richard Sandiford's message of "Sat, 09 Dec 2017 23:27:47 +0000") Message-ID: <87r2rq314b.fsf@linaro.org> User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/25.3 (gnu/linux) MIME-Version: 1.0 Content-Type: text/plain X-SW-Source: 2017-12/txt/msg01308.txt.bz2 Ping Richard Sandiford writes: > This patch makes the AArch64 vec_perm_const code use the new > vec_perm_indices routines, instead of checking each element individually. > This means that they extend naturally to variable-length vectors. > > Also, aarch64_evpc_dup was the only function that generated rtl when > testing_p is true, and that looked accidental. The patch adds the > missing check and then replaces the gen_rtx_REG/start_sequence/ > end_sequence stuff with an assert that no rtl is generated. > > Tested on aarch64-linux-gnu. Also tested by making sure that there > were no assembly output differences for aarch64_be-linux-gnu or > aarch64_be-linux-gnu. OK to install? > > Richard > > > 2017-12-09 Richard Sandiford > > gcc/ > * config/aarch64/aarch64.c (aarch64_evpc_trn): Use d.perm.series_p > instead of checking each element individually. > (aarch64_evpc_uzp): Likewise. > (aarch64_evpc_zip): Likewise. > (aarch64_evpc_ext): Likewise. > (aarch64_evpc_rev): Likewise. > (aarch64_evpc_dup): Test the encoding for a single duplicated element, > instead of checking each element individually. Return true without > generating rtl if > (aarch64_vectorize_vec_perm_const): Use all_from_input_p to test > whether all selected elements come from the same input, instead of > checking each element individually. Remove calls to gen_rtx_REG, > start_sequence and end_sequence and instead assert that no rtl is > generated. > > Index: gcc/config/aarch64/aarch64.c > =================================================================== > --- gcc/config/aarch64/aarch64.c 2017-12-09 22:48:47.535824832 +0000 > +++ gcc/config/aarch64/aarch64.c 2017-12-09 22:49:00.139270410 +0000 > @@ -13295,7 +13295,7 @@ aarch64_expand_vec_perm (rtx target, rtx > static bool > aarch64_evpc_trn (struct expand_vec_perm_d *d) > { > - unsigned int i, odd, mask, nelt = d->perm.length (); > + unsigned int odd, nelt = d->perm.length (); > rtx out, in0, in1, x; > machine_mode vmode = d->vmode; > > @@ -13304,21 +13304,11 @@ aarch64_evpc_trn (struct expand_vec_perm > > /* Note that these are little-endian tests. > We correct for big-endian later. */ > - if (d->perm[0] == 0) > - odd = 0; > - else if (d->perm[0] == 1) > - odd = 1; > - else > + odd = d->perm[0]; > + if ((odd != 0 && odd != 1) > + || !d->perm.series_p (0, 2, odd, 2) > + || !d->perm.series_p (1, 2, nelt + odd, 2)) > return false; > - mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); > - > - for (i = 0; i < nelt; i += 2) > - { > - if (d->perm[i] != i + odd) > - return false; > - if (d->perm[i + 1] != ((i + nelt + odd) & mask)) > - return false; > - } > > /* Success! */ > if (d->testing_p) > @@ -13342,7 +13332,7 @@ aarch64_evpc_trn (struct expand_vec_perm > static bool > aarch64_evpc_uzp (struct expand_vec_perm_d *d) > { > - unsigned int i, odd, mask, nelt = d->perm.length (); > + unsigned int odd; > rtx out, in0, in1, x; > machine_mode vmode = d->vmode; > > @@ -13351,20 +13341,10 @@ aarch64_evpc_uzp (struct expand_vec_perm > > /* Note that these are little-endian tests. > We correct for big-endian later. */ > - if (d->perm[0] == 0) > - odd = 0; > - else if (d->perm[0] == 1) > - odd = 1; > - else > + odd = d->perm[0]; > + if ((odd != 0 && odd != 1) > + || !d->perm.series_p (0, 1, odd, 2)) > return false; > - mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); > - > - for (i = 0; i < nelt; i++) > - { > - unsigned elt = (i * 2 + odd) & mask; > - if (d->perm[i] != elt) > - return false; > - } > > /* Success! */ > if (d->testing_p) > @@ -13388,7 +13368,7 @@ aarch64_evpc_uzp (struct expand_vec_perm > static bool > aarch64_evpc_zip (struct expand_vec_perm_d *d) > { > - unsigned int i, high, mask, nelt = d->perm.length (); > + unsigned int high, nelt = d->perm.length (); > rtx out, in0, in1, x; > machine_mode vmode = d->vmode; > > @@ -13397,25 +13377,11 @@ aarch64_evpc_zip (struct expand_vec_perm > > /* Note that these are little-endian tests. > We correct for big-endian later. */ > - high = nelt / 2; > - if (d->perm[0] == high) > - /* Do Nothing. */ > - ; > - else if (d->perm[0] == 0) > - high = 0; > - else > + high = d->perm[0]; > + if ((high != 0 && high * 2 != nelt) > + || !d->perm.series_p (0, 2, high, 1) > + || !d->perm.series_p (1, 2, high + nelt, 1)) > return false; > - mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); > - > - for (i = 0; i < nelt / 2; i++) > - { > - unsigned elt = (i + high) & mask; > - if (d->perm[i * 2] != elt) > - return false; > - elt = (elt + nelt) & mask; > - if (d->perm[i * 2 + 1] != elt) > - return false; > - } > > /* Success! */ > if (d->testing_p) > @@ -13440,23 +13406,14 @@ aarch64_evpc_zip (struct expand_vec_perm > static bool > aarch64_evpc_ext (struct expand_vec_perm_d *d) > { > - unsigned int i, nelt = d->perm.length (); > + unsigned int nelt = d->perm.length (); > rtx offset; > > unsigned int location = d->perm[0]; /* Always < nelt. */ > > /* Check if the extracted indices are increasing by one. */ > - for (i = 1; i < nelt; i++) > - { > - unsigned int required = location + i; > - if (d->one_vector_p) > - { > - /* We'll pass the same vector in twice, so allow indices to wrap. */ > - required &= (nelt - 1); > - } > - if (d->perm[i] != required) > - return false; > - } > + if (!d->perm.series_p (0, 1, location, 1)) > + return false; > > /* Success! */ > if (d->testing_p) > @@ -13488,7 +13445,7 @@ aarch64_evpc_ext (struct expand_vec_perm > static bool > aarch64_evpc_rev (struct expand_vec_perm_d *d) > { > - unsigned int i, j, diff, size, unspec, nelt = d->perm.length (); > + unsigned int i, diff, size, unspec; > > if (!d->one_vector_p) > return false; > @@ -13504,18 +13461,10 @@ aarch64_evpc_rev (struct expand_vec_perm > else > return false; > > - for (i = 0; i < nelt ; i += diff + 1) > - for (j = 0; j <= diff; j += 1) > - { > - /* This is guaranteed to be true as the value of diff > - is 7, 3, 1 and we should have enough elements in the > - queue to generate this. Getting a vector mask with a > - value of diff other than these values implies that > - something is wrong by the time we get here. */ > - gcc_assert (i + j < nelt); > - if (d->perm[i + j] != i + diff - j) > - return false; > - } > + unsigned int step = diff + 1; > + for (i = 0; i < step; ++i) > + if (!d->perm.series_p (i, step, diff - i, step)) > + return false; > > /* Success! */ > if (d->testing_p) > @@ -13532,15 +13481,17 @@ aarch64_evpc_dup (struct expand_vec_perm > rtx out = d->target; > rtx in0; > machine_mode vmode = d->vmode; > - unsigned int i, elt, nelt = d->perm.length (); > + unsigned int elt; > rtx lane; > > + if (d->perm.encoding ().encoded_nelts () != 1) > + return false; > + > + /* Success! */ > + if (d->testing_p) > + return true; > + > elt = d->perm[0]; > - for (i = 1; i < nelt; i++) > - { > - if (elt != d->perm[i]) > - return false; > - } > > /* The generic preparation in aarch64_expand_vec_perm_const_1 > swaps the operand order and the permute indices if it finds > @@ -13628,61 +13579,37 @@ aarch64_vectorize_vec_perm_const (machin > rtx op1, const vec_perm_indices &sel) > { > struct expand_vec_perm_d d; > - unsigned int i, which; > > - d.vmode = vmode; > - d.target = target; > - d.op0 = op0; > - d.op1 = op1; > - d.testing_p = !target; > - > - /* Calculate whether all elements are in one vector. */ > - unsigned int nelt = sel.length (); > - for (i = which = 0; i < nelt; ++i) > + /* Check whether the mask can be applied to a single vector. */ > + if (op0 && rtx_equal_p (op0, op1)) > + d.one_vector_p = true; > + else if (sel.all_from_input_p (0)) > { > - unsigned int ei = sel[i] & (2 * nelt - 1); > - which |= (ei < nelt ? 1 : 2); > + d.one_vector_p = true; > + op1 = op0; > } > - > - switch (which) > + else if (sel.all_from_input_p (1)) > { > - default: > - gcc_unreachable (); > - > - case 3: > - d.one_vector_p = false; > - if (d.testing_p || !rtx_equal_p (op0, op1)) > - break; > - > - /* The elements of PERM do not suggest that only the first operand > - is used, but both operands are identical. Allow easier matching > - of the permutation by folding the permutation into the single > - input vector. */ > - /* Fall Through. */ > - case 2: > - d.op0 = op1; > - d.one_vector_p = true; > - break; > - > - case 1: > - d.op1 = op0; > d.one_vector_p = true; > - break; > + op0 = op1; > } > + else > + d.one_vector_p = false; > > - d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt); > + d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, > + sel.nelts_per_input ()); > + d.vmode = vmode; > + d.target = target; > + d.op0 = op0; > + d.op1 = op1; > + d.testing_p = !target; > > if (!d.testing_p) > return aarch64_expand_vec_perm_const_1 (&d); > > - d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); > - d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); > - if (!d.one_vector_p) > - d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); > - > - start_sequence (); > + rtx_insn *last = get_last_insn (); > bool ret = aarch64_expand_vec_perm_const_1 (&d); > - end_sequence (); > + gcc_assert (last == get_last_insn ()); > > return ret; > }