From: Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
To: Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>,
gcc Patches <gcc-patches@gcc.gnu.org>,
richard.sandiford@arm.com
Subject: Re: PR111648: Fix wrong code-gen due to incorrect VEC_PERM_EXPR folding
Date: Thu, 19 Oct 2023 00:34:53 +0530 [thread overview]
Message-ID: <CAAgBjMkCcBW3xwcF+q2PQ2oP4-QVWTZm1J0Ddi=gNOnB04hTDg@mail.gmail.com> (raw)
In-Reply-To: <mptfs2724y4.fsf@arm.com>
On Wed, 18 Oct 2023 at 23:22, Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> writes:
> > On Tue, 17 Oct 2023 at 02:40, Richard Sandiford
> > <richard.sandiford@arm.com> wrote:
> >> Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> writes:
> >> > diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
> >> > index 4f8561509ff..55a6a68c16c 100644
> >> > --- a/gcc/fold-const.cc
> >> > +++ b/gcc/fold-const.cc
> >> > @@ -10684,9 +10684,8 @@ valid_mask_for_fold_vec_perm_cst_p (tree arg0, tree arg1,
> >> >
> >> > /* Ensure that the stepped sequence always selects from the same
> >> > input pattern. */
> >> > - unsigned arg_npatterns
> >> > - = ((q1 & 1) == 0) ? VECTOR_CST_NPATTERNS (arg0)
> >> > - : VECTOR_CST_NPATTERNS (arg1);
> >> > + tree arg = ((q1 & 1) == 0) ? arg0 : arg1;
> >> > + unsigned arg_npatterns = VECTOR_CST_NPATTERNS (arg);
> >> >
> >> > if (!multiple_p (step, arg_npatterns))
> >> > {
> >> > @@ -10694,6 +10693,29 @@ valid_mask_for_fold_vec_perm_cst_p (tree arg0, tree arg1,
> >> > *reason = "step is not multiple of npatterns";
> >> > return false;
> >> > }
> >> > +
> >> > + /* If a1 chooses base element from arg, ensure that it's a natural
> >> > + stepped sequence, ie, (arg[2] - arg[1]) == (arg[1] - arg[0])
> >> > + to preserve arg's encoding. */
> >> > +
> >> > + unsigned HOST_WIDE_INT index;
> >> > + if (!r1.is_constant (&index))
> >> > + return false;
> >> > + if (index < arg_npatterns)
> >> > + {
> >>
> >> I don't know whether it matters in practice, but I think the two conditions
> >> above are more natural as:
> >>
> >> if (maybe_lt (r1, arg_npatterns))
> >> {
> >> unsigned HOST_WIDE_INT index;
> >> if (!r1.is_constant (&index))
> >> return false;
> >>
> >> ...[code below]...
> >> }
> >>
> >> > + tree arg_elem0 = vector_cst_elt (arg, index);
> >> > + tree arg_elem1 = vector_cst_elt (arg, index + arg_npatterns);
> >> > + tree arg_elem2 = vector_cst_elt (arg, index + arg_npatterns * 2);
> >> > +
> >> > + if (!operand_equal_p (const_binop (MINUS_EXPR, arg_elem2, arg_elem1),
> >> > + const_binop (MINUS_EXPR, arg_elem1, arg_elem0),
> >> > + 0))
> >>
> >> This needs to check whether const_binop returns null. Maybe:
> >>
> >> tree step1, step2;
> >> if (!(step1 = const_binop (MINUS_EXPR, arg_elem1, arg_elem0))
> >> || !(step2 = const_binop (MINUS_EXPR, arg_elem2, arg_elem1))
> >> || !operand_equal_p (step1, step2, 0))
> >>
> >> OK with those changes, thanks.
> > Hi Richard,
> > Thanks for the suggestions, updated the attached patch accordingly.
> > Bootstrapped+tested with and without SVE on aarch64-linux-gnu and
> > x86_64-linux-gnu.
> > OK to commit ?
>
> Yes, thanks.
Thanks, committed to trunk in 3ec8ecb8e92faec889bc6f7aeac9ff59e82b4f7f.
Thanks,
Prathamesh
>
> Richard
>
> >
> > Thanks,
> > Prathamesh
> >>
> >> Richard
> >>
> >> > + {
> >> > + if (reason)
> >> > + *reason = "not a natural stepped sequence";
> >> > + return false;
> >> > + }
> >> > + }
> >> > }
> >> >
> >> > return true;
> >> > @@ -17161,7 +17183,8 @@ namespace test_fold_vec_perm_cst {
> >> > static tree
> >> > build_vec_cst_rand (machine_mode vmode, unsigned npatterns,
> >> > unsigned nelts_per_pattern,
> >> > - int step = 0, int threshold = 100)
> >> > + int step = 0, bool natural_stepped = false,
> >> > + int threshold = 100)
> >> > {
> >> > tree inner_type = lang_hooks.types.type_for_mode (GET_MODE_INNER (vmode), 1);
> >> > tree vectype = build_vector_type_for_mode (inner_type, vmode);
> >> > @@ -17176,17 +17199,28 @@ build_vec_cst_rand (machine_mode vmode, unsigned npatterns,
> >> >
> >> > // Fill a1 for each pattern
> >> > for (unsigned i = 0; i < npatterns; i++)
> >> > - builder.quick_push (build_int_cst (inner_type, rand () % threshold));
> >> > -
> >> > + {
> >> > + tree a1;
> >> > + if (natural_stepped)
> >> > + {
> >> > + tree a0 = builder[i];
> >> > + wide_int a0_val = wi::to_wide (a0);
> >> > + wide_int a1_val = a0_val + step;
> >> > + a1 = wide_int_to_tree (inner_type, a1_val);
> >> > + }
> >> > + else
> >> > + a1 = build_int_cst (inner_type, rand () % threshold);
> >> > + builder.quick_push (a1);
> >> > + }
> >> > if (nelts_per_pattern == 2)
> >> > return builder.build ();
> >> >
> >> > for (unsigned i = npatterns * 2; i < npatterns * nelts_per_pattern; i++)
> >> > {
> >> > tree prev_elem = builder[i - npatterns];
> >> > - int prev_elem_val = TREE_INT_CST_LOW (prev_elem);
> >> > - int val = prev_elem_val + step;
> >> > - builder.quick_push (build_int_cst (inner_type, val));
> >> > + wide_int prev_elem_val = wi::to_wide (prev_elem);
> >> > + wide_int val = prev_elem_val + step;
> >> > + builder.quick_push (wide_int_to_tree (inner_type, val));
> >> > }
> >> >
> >> > return builder.build ();
> >> > @@ -17432,7 +17466,7 @@ test_nunits_min_2 (machine_mode vmode)
> >> > and step (a2 - a1) = 1, step is not a multiple of npatterns
> >> > in input vector. So return NULL_TREE. */
> >> > {
> >> > - tree arg0 = build_vec_cst_rand (vmode, 2, 3, 1);
> >> > + tree arg0 = build_vec_cst_rand (vmode, 2, 3, 1, true);
> >> > tree arg1 = build_vec_cst_rand (vmode, 2, 3, 1);
> >> > poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
> >> >
> >> > @@ -17452,7 +17486,7 @@ test_nunits_min_2 (machine_mode vmode)
> >> > Test that stepped sequence of the pattern selects from arg0.
> >> > res = { arg1[0], arg0[0], arg0[1], ... } // (1, 3) */
> >> > {
> >> > - tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1);
> >> > + tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1, true);
> >> > tree arg1 = build_vec_cst_rand (vmode, 1, 3, 1);
> >> > poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
> >> >
> >> > @@ -17466,6 +17500,62 @@ test_nunits_min_2 (machine_mode vmode)
> >> > tree expected_res[] = { ARG1(0), ARG0(0), ARG0(1) };
> >> > validate_res (1, 3, res, expected_res);
> >> > }
> >> > +
> >> > + /* Case 6: PR111648 - a1 chooses base element from input vector arg.
> >> > + In this case ensure that arg has a natural stepped sequence
> >> > + to preserve arg's encoding.
> >> > +
> >> > + As a concrete example, consider:
> >> > + arg0: { -16, -9, -10, ... } // (1, 3)
> >> > + arg1: { -12, -5, -6, ... } // (1, 3)
> >> > + sel = { 0, len, len + 1, ... } // (1, 3)
> >> > +
> >> > + This will create res with following encoding:
> >> > + res = { arg0[0], arg1[0], arg1[1], ... } // (1, 3)
> >> > + = { -16, -12, -5, ... }
> >> > +
> >> > + The step in above encoding would be: (-5) - (-12) = 7
> >> > + And hence res[3] would be computed as -5 + 7 = 2.
> >> > + instead of arg1[2], ie, -6.
> >> > + Ensure that valid_mask_for_fold_vec_perm_cst returns false
> >> > + for this case. */
> >> > + {
> >> > + tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1);
> >> > + tree arg1 = build_vec_cst_rand (vmode, 1, 3, 1);
> >> > + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
> >> > +
> >> > + vec_perm_builder builder (len, 1, 3);
> >> > + poly_uint64 mask_elems[] = { 0, len, len+1 };
> >> > + builder_push_elems (builder, mask_elems);
> >> > +
> >> > + vec_perm_indices sel (builder, 2, len);
> >> > + const char *reason;
> >> > + /* FIXME: It may happen that build_vec_cst_rand may build a natural
> >> > + stepped pattern, even if we didn't explicitly tell it to. So folding
> >> > + may not always fail, but if it does, ensure that's because arg1 does
> >> > + not have a natural stepped sequence (and not due to other reason) */
> >> > + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel, &reason);
> >> > + if (res == NULL_TREE)
> >> > + ASSERT_TRUE (!strcmp (reason, "not a natural stepped sequence"));
> >> > + }
> >> > +
> >> > + /* Case 7: Same as Case 6, except that arg1 contains natural stepped
> >> > + sequence and thus folding should be valid for this case. */
> >> > + {
> >> > + tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1);
> >> > + tree arg1 = build_vec_cst_rand (vmode, 1, 3, 1, true);
> >> > + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
> >> > +
> >> > + vec_perm_builder builder (len, 1, 3);
> >> > + poly_uint64 mask_elems[] = { 0, len, len+1 };
> >> > + builder_push_elems (builder, mask_elems);
> >> > +
> >> > + vec_perm_indices sel (builder, 2, len);
> >> > + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel);
> >> > +
> >> > + tree expected_res[] = { ARG0(0), ARG1(0), ARG1(1) };
> >> > + validate_res (1, 3, res, expected_res);
> >> > + }
> >> > }
> >> > }
> >> >
> >> > diff --git a/gcc/testsuite/gcc.dg/vect/pr111648.c b/gcc/testsuite/gcc.dg/vect/pr111648.c
> >> > new file mode 100644
> >> > index 00000000000..093e2b02654
> >> > --- /dev/null
> >> > +++ b/gcc/testsuite/gcc.dg/vect/pr111648.c
> >> > @@ -0,0 +1,23 @@
> >> > +/* { dg-do compile } */
> >> > +/* { dg-options "-O2 -fdump-tree-optimized" } */
> >> > +
> >> > +int a;
> >> > +int *b = &a;
> >> > +static int **c = &b;
> >> > +static int d;
> >> > +short e;
> >> > +short f;
> >> > +
> >> > +_Bool foo ()
> >> > +{
> >> > + f = -21;
> >> > + for (; f < -5; f++) {
> >> > + e = f ^ 3;
> >> > + d = *b;
> >> > + **c = e;
> >> > + }
> >> > +
> >> > + return d == -6;
> >> > +}
> >> > +
> >> > +/* { dg-final { scan-tree-dump "return 1" "optimized" } } */
> >
> > diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
> > index 44118e799eb..40767736389 100644
> > --- a/gcc/fold-const.cc
> > +++ b/gcc/fold-const.cc
> > @@ -10692,9 +10692,8 @@ valid_mask_for_fold_vec_perm_cst_p (tree arg0, tree arg1,
> >
> > /* Ensure that the stepped sequence always selects from the same
> > input pattern. */
> > - unsigned arg_npatterns
> > - = ((q1 & 1) == 0) ? VECTOR_CST_NPATTERNS (arg0)
> > - : VECTOR_CST_NPATTERNS (arg1);
> > + tree arg = ((q1 & 1) == 0) ? arg0 : arg1;
> > + unsigned arg_npatterns = VECTOR_CST_NPATTERNS (arg);
> >
> > if (!multiple_p (step, arg_npatterns))
> > {
> > @@ -10702,6 +10701,31 @@ valid_mask_for_fold_vec_perm_cst_p (tree arg0, tree arg1,
> > *reason = "step is not multiple of npatterns";
> > return false;
> > }
> > +
> > + /* If a1 chooses base element from arg, ensure that it's a natural
> > + stepped sequence, ie, (arg[2] - arg[1]) == (arg[1] - arg[0])
> > + to preserve arg's encoding. */
> > +
> > + if (maybe_lt (r1, arg_npatterns))
> > + {
> > + unsigned HOST_WIDE_INT index;
> > + if (!r1.is_constant (&index))
> > + return false;
> > +
> > + tree arg_elem0 = vector_cst_elt (arg, index);
> > + tree arg_elem1 = vector_cst_elt (arg, index + arg_npatterns);
> > + tree arg_elem2 = vector_cst_elt (arg, index + arg_npatterns * 2);
> > +
> > + tree step1, step2;
> > + if (!(step1 = const_binop (MINUS_EXPR, arg_elem1, arg_elem0))
> > + || !(step2 = const_binop (MINUS_EXPR, arg_elem2, arg_elem1))
> > + || !operand_equal_p (step1, step2, 0))
> > + {
> > + if (reason)
> > + *reason = "not a natural stepped sequence";
> > + return false;
> > + }
> > + }
> > }
> >
> > return true;
> > @@ -17165,7 +17189,8 @@ namespace test_fold_vec_perm_cst {
> > static tree
> > build_vec_cst_rand (machine_mode vmode, unsigned npatterns,
> > unsigned nelts_per_pattern,
> > - int step = 0, int threshold = 100)
> > + int step = 0, bool natural_stepped = false,
> > + int threshold = 100)
> > {
> > tree inner_type = lang_hooks.types.type_for_mode (GET_MODE_INNER (vmode), 1);
> > tree vectype = build_vector_type_for_mode (inner_type, vmode);
> > @@ -17180,17 +17205,28 @@ build_vec_cst_rand (machine_mode vmode, unsigned npatterns,
> >
> > // Fill a1 for each pattern
> > for (unsigned i = 0; i < npatterns; i++)
> > - builder.quick_push (build_int_cst (inner_type, rand () % threshold));
> > -
> > + {
> > + tree a1;
> > + if (natural_stepped)
> > + {
> > + tree a0 = builder[i];
> > + wide_int a0_val = wi::to_wide (a0);
> > + wide_int a1_val = a0_val + step;
> > + a1 = wide_int_to_tree (inner_type, a1_val);
> > + }
> > + else
> > + a1 = build_int_cst (inner_type, rand () % threshold);
> > + builder.quick_push (a1);
> > + }
> > if (nelts_per_pattern == 2)
> > return builder.build ();
> >
> > for (unsigned i = npatterns * 2; i < npatterns * nelts_per_pattern; i++)
> > {
> > tree prev_elem = builder[i - npatterns];
> > - int prev_elem_val = TREE_INT_CST_LOW (prev_elem);
> > - int val = prev_elem_val + step;
> > - builder.quick_push (build_int_cst (inner_type, val));
> > + wide_int prev_elem_val = wi::to_wide (prev_elem);
> > + wide_int val = prev_elem_val + step;
> > + builder.quick_push (wide_int_to_tree (inner_type, val));
> > }
> >
> > return builder.build ();
> > @@ -17436,7 +17472,7 @@ test_nunits_min_2 (machine_mode vmode)
> > and step (a2 - a1) = 1, step is not a multiple of npatterns
> > in input vector. So return NULL_TREE. */
> > {
> > - tree arg0 = build_vec_cst_rand (vmode, 2, 3, 1);
> > + tree arg0 = build_vec_cst_rand (vmode, 2, 3, 1, true);
> > tree arg1 = build_vec_cst_rand (vmode, 2, 3, 1);
> > poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
> >
> > @@ -17456,7 +17492,7 @@ test_nunits_min_2 (machine_mode vmode)
> > Test that stepped sequence of the pattern selects from arg0.
> > res = { arg1[0], arg0[0], arg0[1], ... } // (1, 3) */
> > {
> > - tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1);
> > + tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1, true);
> > tree arg1 = build_vec_cst_rand (vmode, 1, 3, 1);
> > poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
> >
> > @@ -17470,6 +17506,62 @@ test_nunits_min_2 (machine_mode vmode)
> > tree expected_res[] = { ARG1(0), ARG0(0), ARG0(1) };
> > validate_res (1, 3, res, expected_res);
> > }
> > +
> > + /* Case 6: PR111648 - a1 chooses base element from input vector arg.
> > + In this case ensure that arg has a natural stepped sequence
> > + to preserve arg's encoding.
> > +
> > + As a concrete example, consider:
> > + arg0: { -16, -9, -10, ... } // (1, 3)
> > + arg1: { -12, -5, -6, ... } // (1, 3)
> > + sel = { 0, len, len + 1, ... } // (1, 3)
> > +
> > + This will create res with following encoding:
> > + res = { arg0[0], arg1[0], arg1[1], ... } // (1, 3)
> > + = { -16, -12, -5, ... }
> > +
> > + The step in above encoding would be: (-5) - (-12) = 7
> > + And hence res[3] would be computed as -5 + 7 = 2.
> > + instead of arg1[2], ie, -6.
> > + Ensure that valid_mask_for_fold_vec_perm_cst returns false
> > + for this case. */
> > + {
> > + tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1);
> > + tree arg1 = build_vec_cst_rand (vmode, 1, 3, 1);
> > + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
> > +
> > + vec_perm_builder builder (len, 1, 3);
> > + poly_uint64 mask_elems[] = { 0, len, len+1 };
> > + builder_push_elems (builder, mask_elems);
> > +
> > + vec_perm_indices sel (builder, 2, len);
> > + const char *reason;
> > + /* FIXME: It may happen that build_vec_cst_rand may build a natural
> > + stepped pattern, even if we didn't explicitly tell it to. So folding
> > + may not always fail, but if it does, ensure that's because arg1 does
> > + not have a natural stepped sequence (and not due to other reason) */
> > + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel, &reason);
> > + if (res == NULL_TREE)
> > + ASSERT_TRUE (!strcmp (reason, "not a natural stepped sequence"));
> > + }
> > +
> > + /* Case 7: Same as Case 6, except that arg1 contains natural stepped
> > + sequence and thus folding should be valid for this case. */
> > + {
> > + tree arg0 = build_vec_cst_rand (vmode, 1, 3, 1);
> > + tree arg1 = build_vec_cst_rand (vmode, 1, 3, 1, true);
> > + poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
> > +
> > + vec_perm_builder builder (len, 1, 3);
> > + poly_uint64 mask_elems[] = { 0, len, len+1 };
> > + builder_push_elems (builder, mask_elems);
> > +
> > + vec_perm_indices sel (builder, 2, len);
> > + tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel);
> > +
> > + tree expected_res[] = { ARG0(0), ARG1(0), ARG1(1) };
> > + validate_res (1, 3, res, expected_res);
> > + }
> > }
> > }
> >
prev parent reply other threads:[~2023-10-18 19:05 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-10-04 10:16 Prathamesh Kulkarni
2023-10-09 11:35 ` Richard Sandiford
2023-10-11 11:12 ` Prathamesh Kulkarni
2023-10-11 11:27 ` Prathamesh Kulkarni
2023-10-12 9:54 ` Prathamesh Kulkarni
2023-10-16 21:10 ` Richard Sandiford
2023-10-17 18:53 ` Prathamesh Kulkarni
2023-10-18 17:52 ` Richard Sandiford
2023-10-18 19:04 ` Prathamesh Kulkarni [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CAAgBjMkCcBW3xwcF+q2PQ2oP4-QVWTZm1J0Ddi=gNOnB04hTDg@mail.gmail.com' \
--to=prathamesh.kulkarni@linaro.org \
--cc=gcc-patches@gcc.gnu.org \
--cc=richard.sandiford@arm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).