From: Jakub Jelinek <jakub@redhat.com>
To: Jeff Law <law@redhat.com>
Cc: gcc-patches@gcc.gnu.org
Subject: [PATCH] Fix REE for vector modes (PR rtl-optimization/64286, take 2)
Date: Tue, 13 Jan 2015 16:22:00 -0000 [thread overview]
Message-ID: <20150113161136.GC1405@tucnak.redhat.com> (raw)
In-Reply-To: <54B43CD1.1040100@redhat.com>
On Mon, Jan 12, 2015 at 02:29:53PM -0700, Jeff Law wrote:
> On 01/12/15 12:59, Jakub Jelinek wrote:
> >Hi!
> >
> >As mentioned in the PR, giving up for all vector mode extensions
> >is unnecessary, but unlike scalar integer extensions, where the low part
> >of the extended value is the original value, for vectors this is not true,
> >thus the old value is lost. Which means we can perform REE, but only if
> >all uses of the definition are the same (code+mode) extension.
> >
> >Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> >
> >2015-01-12 Jakub Jelinek <jakub@redhat.com>
> >
> > PR rtl-optimization/64286
> > * ree.c (add_removable_extension): Don't add vector mode
> > extensions if all uses of the source register aren't the same
> > vector extensions.
> >
> > * gcc.target/i386/avx2-pr64286.c: New test.
> Does it make sense to remove your change for 59754 in combine_reaching_defs?
> Shouldn't this patch handle that case as well?
You're right, this patch handles that too.
New patch, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
trunk?
2015-01-13 Jakub Jelinek <jakub@redhat.com>
PR rtl-optimization/64286
* ree.c (combine_reaching_defs): Move part of comment earlier,
remove !SCALAR_INT_MODE_P check.
(add_removable_extension): Don't add vector mode
extensions if all uses of the source register aren't the same
vector extensions.
* gcc.target/i386/avx2-pr64286.c: New test.
--- gcc/ree.c.jj 2015-01-12 21:29:07.023060045 +0100
+++ gcc/ree.c 2015-01-13 09:43:32.158449885 +0100
@@ -783,6 +783,17 @@ combine_reaching_defs (ext_cand *cand, c
!= REGNO (get_extended_src_reg (SET_SRC (PATTERN (cand->insn)))));
if (copy_needed)
{
+ /* Considering transformation of
+ (set (reg1) (expression))
+ ...
+ (set (reg2) (any_extend (reg1)))
+
+ into
+
+ (set (reg2) (any_extend (expression)))
+ (set (reg1) (reg2))
+ ... */
+
/* In theory we could handle more than one reaching def, it
just makes the code to update the insn stream more complex. */
if (state->defs_list.length () != 1)
@@ -798,18 +809,6 @@ combine_reaching_defs (ext_cand *cand, c
if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE)
return false;
- /* Transformation of
- (set (reg1) (expression))
- (set (reg2) (any_extend (reg1)))
- into
- (set (reg2) (any_extend (expression)))
- (set (reg1) (reg2))
- is only valid for scalar integral modes, as it relies on the low
- subreg of reg1 to have the value of (expression), which is not true
- e.g. for vector modes. */
- if (!SCALAR_INT_MODE_P (GET_MODE (SET_DEST (PATTERN (cand->insn)))))
- return false;
-
machine_mode dst_mode = GET_MODE (SET_DEST (PATTERN (cand->insn)));
rtx src_reg = get_extended_src_reg (SET_SRC (PATTERN (cand->insn)));
@@ -1027,6 +1026,7 @@ add_removable_extension (const_rtx expr,
different extension. FIXME: this obviously can be improved. */
for (def = defs; def; def = def->next)
if ((idx = def_map[INSN_UID (DF_REF_INSN (def->ref))])
+ && idx != -1U
&& (cand = &(*insn_list)[idx - 1])
&& cand->code != code)
{
@@ -1038,6 +1038,57 @@ add_removable_extension (const_rtx expr,
}
return;
}
+ /* For vector mode extensions, ensure that all uses of the
+ XEXP (src, 0) register are the same extension (both code
+ and to which mode), as unlike integral extensions lowpart
+ subreg of the sign/zero extended register are not equal
+ to the original register, so we have to change all uses or
+ none. */
+ else if (VECTOR_MODE_P (GET_MODE (XEXP (src, 0))))
+ {
+ if (idx == 0)
+ {
+ struct df_link *ref_chain, *ref_link;
+
+ ref_chain = DF_REF_CHAIN (def->ref);
+ for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
+ {
+ if (ref_link->ref == NULL
+ || DF_REF_INSN_INFO (ref_link->ref) == NULL)
+ {
+ idx = -1U;
+ break;
+ }
+ rtx_insn *use_insn = DF_REF_INSN (ref_link->ref);
+ const_rtx use_set;
+ if (use_insn == insn || DEBUG_INSN_P (use_insn))
+ continue;
+ if (!(use_set = single_set (use_insn))
+ || !REG_P (SET_DEST (use_set))
+ || GET_MODE (SET_DEST (use_set)) != GET_MODE (dest)
+ || GET_CODE (SET_SRC (use_set)) != code
+ || !rtx_equal_p (XEXP (SET_SRC (use_set), 0),
+ XEXP (src, 0)))
+ {
+ idx = -1U;
+ break;
+ }
+ }
+ if (idx == -1U)
+ def_map[INSN_UID (DF_REF_INSN (def->ref))] = idx;
+ }
+ if (idx == -1U)
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, "Cannot eliminate extension:\n");
+ print_rtl_single (dump_file, insn);
+ fprintf (dump_file,
+ " because some vector uses aren't extension\n");
+ }
+ return;
+ }
+ }
/* Then add the candidate to the list and insert the reaching definitions
into the definition map. */
--- gcc/testsuite/gcc.target/i386/avx2-pr64286.c.jj 2015-01-13 09:33:53.986612033 +0100
+++ gcc/testsuite/gcc.target/i386/avx2-pr64286.c 2015-01-13 09:33:53.986612033 +0100
@@ -0,0 +1,37 @@
+/* PR rtl-optimization/64286 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx2" } */
+/* { dg-require-effective-target avx2 } */
+
+#include <string.h>
+#include <stdlib.h>
+#include <x86intrin.h>
+#include "avx2-check.h"
+
+__m128i v;
+__m256i w;
+
+__attribute__((noinline, noclone)) void
+foo (__m128i *p, __m128i *q)
+{
+ __m128i a = _mm_loadu_si128 (p);
+ __m128i b = _mm_xor_si128 (a, v);
+ w = _mm256_cvtepu8_epi16 (a);
+ *q = b;
+}
+
+static void
+avx2_test (void)
+{
+ v = _mm_set1_epi8 (0x40);
+ __m128i c = _mm_set_epi8 (16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
+ __m128i d;
+ foo (&c, &d);
+ __m128i e = _mm_set_epi8 (0x50, 0x4f, 0x4e, 0x4d, 0x4c, 0x4b, 0x4a, 0x49,
+ 0x48, 0x47, 0x46, 0x45, 0x44, 0x43, 0x42, 0x41);
+ __m256i f = _mm256_set_epi16 (16, 15, 14, 13, 12, 11, 10, 9,
+ 8, 7, 6, 5, 4, 3, 2, 1);
+ if (memcmp (&w, &f, sizeof (w)) != 0
+ || memcmp (&d, &e, sizeof (d)) != 0)
+ abort ();
+}
Jakub
next prev parent reply other threads:[~2015-01-13 16:11 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-01-12 20:02 [PATCH] Fix REE for vector modes (PR rtl-optimization/64286) Jakub Jelinek
2015-01-12 21:39 ` Jeff Law
2015-01-13 16:22 ` Jakub Jelinek [this message]
2015-01-13 19:24 ` [PATCH] Fix REE for vector modes (PR rtl-optimization/64286, take 2) Jeff Law
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20150113161136.GC1405@tucnak.redhat.com \
--to=jakub@redhat.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=law@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).