public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Fix PR82396: qsort comparator non-negative on sorted output
@ 2017-10-03 16:34 Wilco Dijkstra
  2017-10-04  8:18 ` Christophe Lyon
  2017-10-16 10:57 ` Wilco Dijkstra
  0 siblings, 2 replies; 22+ messages in thread
From: Wilco Dijkstra @ 2017-10-03 16:34 UTC (permalink / raw)
  To: GCC Patches; +Cc: nd

r253236 broke AArch64 bootstrap. Earlier revision r253071 changed scheduling
behaviour on AArch64 as autopref scheduling no longer checks the base.

This patch fixes the bootstrap failure and cleans up autopref scheduling.
The code is greatly simplified.  Sort accesses on the offset first, and
only if the offsets are the same fall back to other comparisons in
rank_for_schedule.  This doesn't at all restore the original behaviour
since we no longer compare the base address, but it now defines a total
sorting order.  More work will be required to improve the sorting so
that only loads/stores with the same base are affected.

AArch64 bootstrap completes. I'd like to commit this ASAP as the AArch64
bootstrap has been broken for days now.

ChangeLog:
2017-10-03  Wilco Dijkstra  <wdijkstr@arm.com>

        PR rtl-optimization/82396
        * gcc/haifa-sched.c (autopref_multipass_init): Simplify
        initialization.
        (autopref_rank_data): Simplify sort order.
        * gcc/sched-int.h (autopref_multipass_data_): Remove
        multi_mem_insn_p, min_offset and max_offset.
--

diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c
index 549e8961411ecd0a04ac3b24ba78b5d53e63258a..77ba8c5292a0801bbbcb35ed61ab3040015f6677 100644
--- a/gcc/haifa-sched.c
+++ b/gcc/haifa-sched.c
@@ -5568,9 +5568,7 @@ autopref_multipass_init (const rtx_insn *insn, int write)
 
   gcc_assert (data->status == AUTOPREF_MULTIPASS_DATA_UNINITIALIZED);
   data->base = NULL_RTX;
-  data->min_offset = 0;
-  data->max_offset = 0;
-  data->multi_mem_insn_p = false;
+  data->offset = 0;
   /* Set insn entry initialized, but not relevant for auto-prefetcher.  */
   data->status = AUTOPREF_MULTIPASS_DATA_IRRELEVANT;
 
@@ -5585,10 +5583,9 @@ autopref_multipass_init (const rtx_insn *insn, int write)
     {
       int n_elems = XVECLEN (pat, 0);
 
-      int i = 0;
-      rtx prev_base = NULL_RTX;
-      int min_offset = 0;
-      int max_offset = 0;
+      int i, offset;
+      rtx base, prev_base = NULL_RTX;
+      int min_offset = INT_MAX;
 
       for (i = 0; i < n_elems; i++)
 	{
@@ -5596,38 +5593,23 @@ autopref_multipass_init (const rtx_insn *insn, int write)
 	  if (GET_CODE (set) != SET)
 	    return;
 
-	  rtx base = NULL_RTX;
-	  int offset = 0;
 	  if (!analyze_set_insn_for_autopref (set, write, &base, &offset))
 	    return;
 
-	  if (i == 0)
-	    {
-	      prev_base = base;
-	      min_offset = offset;
-	      max_offset = offset;
-	    }
 	  /* Ensure that all memory operations in the PARALLEL use the same
 	     base register.  */
-	  else if (REGNO (base) != REGNO (prev_base))
+	  if (i > 0 && REGNO (base) != REGNO (prev_base))
 	    return;
-	  else
-	    {
-	      min_offset = MIN (min_offset, offset);
-	      max_offset = MAX (max_offset, offset);
-	    }
+	  prev_base = base;
+	  min_offset = MIN (min_offset, offset);
 	}
 
-      /* If we reached here then we have a valid PARALLEL of multiple memory
-	 ops with prev_base as the base and min_offset and max_offset
-	 containing the offsets range.  */
+      /* If we reached here then we have a valid PARALLEL of multiple memory ops
+	 with prev_base as the base and min_offset containing the offset.  */
       gcc_assert (prev_base);
       data->base = prev_base;
-      data->min_offset = min_offset;
-      data->max_offset = max_offset;
-      data->multi_mem_insn_p = true;
+      data->offset = min_offset;
       data->status = AUTOPREF_MULTIPASS_DATA_NORMAL;
-
       return;
     }
 
@@ -5637,7 +5619,7 @@ autopref_multipass_init (const rtx_insn *insn, int write)
     return;
 
   if (!analyze_set_insn_for_autopref (set, write, &data->base,
-				       &data->min_offset))
+				       &data->offset))
     return;
 
   /* This insn is relevant for the auto-prefetcher.
@@ -5646,63 +5628,6 @@ autopref_multipass_init (const rtx_insn *insn, int write)
   data->status = AUTOPREF_MULTIPASS_DATA_NORMAL;
 }
 
-
-/* Helper for autopref_rank_for_schedule.  Given the data of two
-   insns relevant to the auto-prefetcher modelling code DATA1 and DATA2
-   return their comparison result.  Return 0 if there is no sensible
-   ranking order for the two insns.  */
-
-static int
-autopref_rank_data (autopref_multipass_data_t data1,
-		     autopref_multipass_data_t data2)
-{
-  /* Simple case when both insns are simple single memory ops.  */
-  if (!data1->multi_mem_insn_p && !data2->multi_mem_insn_p)
-    return data1->min_offset - data2->min_offset;
-
-  /* Two load/store multiple insns.  Return 0 if the offset ranges
-     overlap and the difference between the minimum offsets otherwise.  */
-  else if (data1->multi_mem_insn_p && data2->multi_mem_insn_p)
-    {
-      int min1 = data1->min_offset;
-      int max1 = data1->max_offset;
-      int min2 = data2->min_offset;
-      int max2 = data2->max_offset;
-
-      if (max1 < min2 || min1 > max2)
-	return min1 - min2;
-      else
-	return 0;
-    }
-
-  /* The other two cases is a pair of a load/store multiple and
-     a simple memory op.  Return 0 if the single op's offset is within the
-     range of the multi-op insn and the difference between the single offset
-     and the minimum offset of the multi-set insn otherwise.  */
-  else if (data1->multi_mem_insn_p && !data2->multi_mem_insn_p)
-    {
-      int max1 = data1->max_offset;
-      int min1 = data1->min_offset;
-
-      if (data2->min_offset >= min1
-	  && data2->min_offset <= max1)
-	return 0;
-      else
-	return min1 - data2->min_offset;
-    }
-  else
-    {
-      int max2 = data2->max_offset;
-      int min2 = data2->min_offset;
-
-      if (data1->min_offset >= min2
-	  && data1->min_offset <= max2)
-	return 0;
-      else
-	return data1->min_offset - min2;
-    }
-}
-
 /* Helper function for rank_for_schedule sorting.  */
 static int
 autopref_rank_for_schedule (const rtx_insn *insn1, const rtx_insn *insn2)
@@ -5725,7 +5650,7 @@ autopref_rank_for_schedule (const rtx_insn *insn1, const rtx_insn *insn2)
       int irrel2 = data2->status == AUTOPREF_MULTIPASS_DATA_IRRELEVANT;
 
       if (!irrel1 && !irrel2)
-	r = autopref_rank_data (data1, data2);
+	r = data1->offset - data2->offset;
       else
 	r = irrel2 - irrel1;
     }
@@ -5753,7 +5678,7 @@ autopref_multipass_dfa_lookahead_guard_1 (const rtx_insn *insn1,
     return 0;
 
   if (rtx_equal_p (data1->base, data2->base)
-      && autopref_rank_data (data1, data2) > 0)
+      && data1->offset > data2->offset)
     {
       if (sched_verbose >= 2)
 	{
diff --git a/gcc/sched-int.h b/gcc/sched-int.h
index 2af8f9fc32c0085c18511bbc83ad52c6ec31f671..6832589e3d0ad9ed5937ab3e81f3573c2560fe67 100644
--- a/gcc/sched-int.h
+++ b/gcc/sched-int.h
@@ -819,15 +819,8 @@ struct autopref_multipass_data_
   /* Base part of memory address.  */
   rtx base;
 
-  /* Memory offsets from the base.  For single simple sets
-     only min_offset is valid.  For multi-set insns min_offset
-     and max_offset record the minimum and maximum offsets from the same
-     base among the sets inside the PARALLEL.  */
-  int min_offset;
-  int max_offset;
-
-  /* True if this is a load/store-multiple instruction.  */
-  bool multi_mem_insn_p;
+  /* Memory offsets from the base.  */
+  int offset;
 
   /* Entry status.  */
   enum autopref_multipass_data_status status;

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output
  2017-10-03 16:34 [PATCH] Fix PR82396: qsort comparator non-negative on sorted output Wilco Dijkstra
@ 2017-10-04  8:18 ` Christophe Lyon
  2017-10-04 10:29   ` Wilco Dijkstra
  2017-10-16 10:57 ` Wilco Dijkstra
  1 sibling, 1 reply; 22+ messages in thread
From: Christophe Lyon @ 2017-10-04  8:18 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: GCC Patches, nd

On 3 October 2017 at 18:34, Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:
> r253236 broke AArch64 bootstrap. Earlier revision r253071 changed scheduling
> behaviour on AArch64 as autopref scheduling no longer checks the base.
>
> This patch fixes the bootstrap failure and cleans up autopref scheduling.
> The code is greatly simplified.  Sort accesses on the offset first, and
> only if the offsets are the same fall back to other comparisons in
> rank_for_schedule.  This doesn't at all restore the original behaviour
> since we no longer compare the base address, but it now defines a total
> sorting order.  More work will be required to improve the sorting so
> that only loads/stores with the same base are affected.
>
> AArch64 bootstrap completes. I'd like to commit this ASAP as the AArch64
> bootstrap has been broken for days now.
>
> ChangeLog:
> 2017-10-03  Wilco Dijkstra  <wdijkstr@arm.com>
>
>         PR rtl-optimization/82396
>         * gcc/haifa-sched.c (autopref_multipass_init): Simplify
>         initialization.
>         (autopref_rank_data): Simplify sort order.
>         * gcc/sched-int.h (autopref_multipass_data_): Remove
>         multi_mem_insn_p, min_offset and max_offset.
> --
>

Hi,

FWIW, I confirm that this patch fixes the build failure I reported at:
https://gcc.gnu.org/ml/gcc-patches/2017-09/msg01980.html

Thanks,

Christophe

> diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c
> index 549e8961411ecd0a04ac3b24ba78b5d53e63258a..77ba8c5292a0801bbbcb35ed61ab3040015f6677 100644
> --- a/gcc/haifa-sched.c
> +++ b/gcc/haifa-sched.c
> @@ -5568,9 +5568,7 @@ autopref_multipass_init (const rtx_insn *insn, int write)
>
>    gcc_assert (data->status == AUTOPREF_MULTIPASS_DATA_UNINITIALIZED);
>    data->base = NULL_RTX;
> -  data->min_offset = 0;
> -  data->max_offset = 0;
> -  data->multi_mem_insn_p = false;
> +  data->offset = 0;
>    /* Set insn entry initialized, but not relevant for auto-prefetcher.  */
>    data->status = AUTOPREF_MULTIPASS_DATA_IRRELEVANT;
>
> @@ -5585,10 +5583,9 @@ autopref_multipass_init (const rtx_insn *insn, int write)
>      {
>        int n_elems = XVECLEN (pat, 0);
>
> -      int i = 0;
> -      rtx prev_base = NULL_RTX;
> -      int min_offset = 0;
> -      int max_offset = 0;
> +      int i, offset;
> +      rtx base, prev_base = NULL_RTX;
> +      int min_offset = INT_MAX;
>
>        for (i = 0; i < n_elems; i++)
>         {
> @@ -5596,38 +5593,23 @@ autopref_multipass_init (const rtx_insn *insn, int write)
>           if (GET_CODE (set) != SET)
>             return;
>
> -         rtx base = NULL_RTX;
> -         int offset = 0;
>           if (!analyze_set_insn_for_autopref (set, write, &base, &offset))
>             return;
>
> -         if (i == 0)
> -           {
> -             prev_base = base;
> -             min_offset = offset;
> -             max_offset = offset;
> -           }
>           /* Ensure that all memory operations in the PARALLEL use the same
>              base register.  */
> -         else if (REGNO (base) != REGNO (prev_base))
> +         if (i > 0 && REGNO (base) != REGNO (prev_base))
>             return;
> -         else
> -           {
> -             min_offset = MIN (min_offset, offset);
> -             max_offset = MAX (max_offset, offset);
> -           }
> +         prev_base = base;
> +         min_offset = MIN (min_offset, offset);
>         }
>
> -      /* If we reached here then we have a valid PARALLEL of multiple memory
> -        ops with prev_base as the base and min_offset and max_offset
> -        containing the offsets range.  */
> +      /* If we reached here then we have a valid PARALLEL of multiple memory ops
> +        with prev_base as the base and min_offset containing the offset.  */
>        gcc_assert (prev_base);
>        data->base = prev_base;
> -      data->min_offset = min_offset;
> -      data->max_offset = max_offset;
> -      data->multi_mem_insn_p = true;
> +      data->offset = min_offset;
>        data->status = AUTOPREF_MULTIPASS_DATA_NORMAL;
> -
>        return;
>      }
>
> @@ -5637,7 +5619,7 @@ autopref_multipass_init (const rtx_insn *insn, int write)
>      return;
>
>    if (!analyze_set_insn_for_autopref (set, write, &data->base,
> -                                      &data->min_offset))
> +                                      &data->offset))
>      return;
>
>    /* This insn is relevant for the auto-prefetcher.
> @@ -5646,63 +5628,6 @@ autopref_multipass_init (const rtx_insn *insn, int write)
>    data->status = AUTOPREF_MULTIPASS_DATA_NORMAL;
>  }
>
> -
> -/* Helper for autopref_rank_for_schedule.  Given the data of two
> -   insns relevant to the auto-prefetcher modelling code DATA1 and DATA2
> -   return their comparison result.  Return 0 if there is no sensible
> -   ranking order for the two insns.  */
> -
> -static int
> -autopref_rank_data (autopref_multipass_data_t data1,
> -                    autopref_multipass_data_t data2)
> -{
> -  /* Simple case when both insns are simple single memory ops.  */
> -  if (!data1->multi_mem_insn_p && !data2->multi_mem_insn_p)
> -    return data1->min_offset - data2->min_offset;
> -
> -  /* Two load/store multiple insns.  Return 0 if the offset ranges
> -     overlap and the difference between the minimum offsets otherwise.  */
> -  else if (data1->multi_mem_insn_p && data2->multi_mem_insn_p)
> -    {
> -      int min1 = data1->min_offset;
> -      int max1 = data1->max_offset;
> -      int min2 = data2->min_offset;
> -      int max2 = data2->max_offset;
> -
> -      if (max1 < min2 || min1 > max2)
> -       return min1 - min2;
> -      else
> -       return 0;
> -    }
> -
> -  /* The other two cases is a pair of a load/store multiple and
> -     a simple memory op.  Return 0 if the single op's offset is within the
> -     range of the multi-op insn and the difference between the single offset
> -     and the minimum offset of the multi-set insn otherwise.  */
> -  else if (data1->multi_mem_insn_p && !data2->multi_mem_insn_p)
> -    {
> -      int max1 = data1->max_offset;
> -      int min1 = data1->min_offset;
> -
> -      if (data2->min_offset >= min1
> -         && data2->min_offset <= max1)
> -       return 0;
> -      else
> -       return min1 - data2->min_offset;
> -    }
> -  else
> -    {
> -      int max2 = data2->max_offset;
> -      int min2 = data2->min_offset;
> -
> -      if (data1->min_offset >= min2
> -         && data1->min_offset <= max2)
> -       return 0;
> -      else
> -       return data1->min_offset - min2;
> -    }
> -}
> -
>  /* Helper function for rank_for_schedule sorting.  */
>  static int
>  autopref_rank_for_schedule (const rtx_insn *insn1, const rtx_insn *insn2)
> @@ -5725,7 +5650,7 @@ autopref_rank_for_schedule (const rtx_insn *insn1, const rtx_insn *insn2)
>        int irrel2 = data2->status == AUTOPREF_MULTIPASS_DATA_IRRELEVANT;
>
>        if (!irrel1 && !irrel2)
> -       r = autopref_rank_data (data1, data2);
> +       r = data1->offset - data2->offset;
>        else
>         r = irrel2 - irrel1;
>      }
> @@ -5753,7 +5678,7 @@ autopref_multipass_dfa_lookahead_guard_1 (const rtx_insn *insn1,
>      return 0;
>
>    if (rtx_equal_p (data1->base, data2->base)
> -      && autopref_rank_data (data1, data2) > 0)
> +      && data1->offset > data2->offset)
>      {
>        if (sched_verbose >= 2)
>         {
> diff --git a/gcc/sched-int.h b/gcc/sched-int.h
> index 2af8f9fc32c0085c18511bbc83ad52c6ec31f671..6832589e3d0ad9ed5937ab3e81f3573c2560fe67 100644
> --- a/gcc/sched-int.h
> +++ b/gcc/sched-int.h
> @@ -819,15 +819,8 @@ struct autopref_multipass_data_
>    /* Base part of memory address.  */
>    rtx base;
>
> -  /* Memory offsets from the base.  For single simple sets
> -     only min_offset is valid.  For multi-set insns min_offset
> -     and max_offset record the minimum and maximum offsets from the same
> -     base among the sets inside the PARALLEL.  */
> -  int min_offset;
> -  int max_offset;
> -
> -  /* True if this is a load/store-multiple instruction.  */
> -  bool multi_mem_insn_p;
> +  /* Memory offsets from the base.  */
> +  int offset;
>
>    /* Entry status.  */
>    enum autopref_multipass_data_status status;

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output
  2017-10-04  8:18 ` Christophe Lyon
@ 2017-10-04 10:29   ` Wilco Dijkstra
  2017-10-04 11:02     ` Richard Sandiford
  0 siblings, 1 reply; 22+ messages in thread
From: Wilco Dijkstra @ 2017-10-04 10:29 UTC (permalink / raw)
  To: Christophe Lyon; +Cc: GCC Patches, nd

Christophe Lyon wrote:

> FWIW, I confirm that this patch fixes the build failure I reported at:
> https://gcc.gnu.org/ml/gcc-patches/2017-09/msg01980.html

Thanks, now committed as r253399.

Wilco

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output
  2017-10-04 10:29   ` Wilco Dijkstra
@ 2017-10-04 11:02     ` Richard Sandiford
  2017-10-04 13:12       ` Ramana Radhakrishnan
  2017-10-04 13:24       ` Wilco Dijkstra
  0 siblings, 2 replies; 22+ messages in thread
From: Richard Sandiford @ 2017-10-04 11:02 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: Christophe Lyon, GCC Patches, nd

Wilco Dijkstra <Wilco.Dijkstra@arm.com> writes:
> Christophe Lyon wrote:
>
>> FWIW, I confirm that this patch fixes the build failure I reported at:
>> https://gcc.gnu.org/ml/gcc-patches/2017-09/msg01980.html
>
> Thanks, now committed as r253399.

I don't think it's reasonable to commit this as obvious.  You said
yourself in the covering message that "it doesn't at all restore
the original behaviour since we no longer compare the base address".
So even with the bootstrap failure, I think the patch needed review
before going in.

Christophe's message doesn't change anything because you knew when you
posted the patch that it fixed the failure.

Richard

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output
  2017-10-04 11:02     ` Richard Sandiford
@ 2017-10-04 13:12       ` Ramana Radhakrishnan
  2017-10-04 13:39         ` Alexander Monakov
  2017-10-04 13:49         ` Jakub Jelinek
  2017-10-04 13:24       ` Wilco Dijkstra
  1 sibling, 2 replies; 22+ messages in thread
From: Ramana Radhakrishnan @ 2017-10-04 13:12 UTC (permalink / raw)
  To: Wilco Dijkstra, Christophe Lyon, GCC Patches, nd, richard.sandiford

On Wed, Oct 4, 2017 at 12:01 PM, Richard Sandiford
<richard.sandiford@linaro.org> wrote:
> Wilco Dijkstra <Wilco.Dijkstra@arm.com> writes:
>> Christophe Lyon wrote:
>>
>>> FWIW, I confirm that this patch fixes the build failure I reported at:
>>> https://gcc.gnu.org/ml/gcc-patches/2017-09/msg01980.html
>>
>> Thanks, now committed as r253399.
>
> I don't think it's reasonable to commit this as obvious.  You said
> yourself in the covering message that "it doesn't at all restore
> the original behaviour since we no longer compare the base address".
> So even with the bootstrap failure, I think the patch needed review
> before going in.

I agree that this patch shouldn't have gone in without review from a
maintainer of the appropriate area regardless of whether this fixes a
bootstrap failure or not.

However we need a scheduler maintainer or global reviewer to please
help review this patch or help come up with an alternative patch. A
primary platform broken for 5 days with a commit and no public
response from the original poster is really not appropriate behaviour
in this community. If not, the original patch should have been
considered for reverting under the 48 hour rule .

regards
Ramana

>
> Christophe's message doesn't change anything because you knew when you
> posted the patch that it fixed the failure.
>
> Richard

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output
  2017-10-04 11:02     ` Richard Sandiford
  2017-10-04 13:12       ` Ramana Radhakrishnan
@ 2017-10-04 13:24       ` Wilco Dijkstra
  2017-10-04 13:26         ` Jakub Jelinek
                           ` (2 more replies)
  1 sibling, 3 replies; 22+ messages in thread
From: Wilco Dijkstra @ 2017-10-04 13:24 UTC (permalink / raw)
  To: Richard Sandiford; +Cc: Christophe Lyon, GCC Patches, nd

Richard Sandiford wrote:
>
> I don't think it's reasonable to commit this as obvious.  You said
> yourself in the covering message that "it doesn't at all restore
> the original behaviour since we no longer compare the base address".
> So even with the bootstrap failure, I think the patch needed review
> before going in.
>
> Christophe's message doesn't change anything because you knew when you
> posted the patch that it fixed the failure.

Well my understanding was that it is OK to fix a bootstrap failure. I believe my
patch is trivial since it mostly removes redundant code. Also I took Jakub's
review as an OK as there were no technical objections. However since you
seem to disagree, I will revert it.

We have now had 5 days of no builds for a major target, which is a huge
inconvenience. So I don't think it is reasonable to wait any longer.
The alternative is to revert the original patch that caused the bootstrap failure
plus the patch(es) that unexpectedly changed the behaviour of the scheduler
(I don't think there was any testing as to what effect those had on the schedule).

So the question is who will do that and when?

Wilco

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output
  2017-10-04 13:24       ` Wilco Dijkstra
@ 2017-10-04 13:26         ` Jakub Jelinek
  2017-10-04 14:54         ` Eric Botcazou
  2017-10-05 17:20         ` Steve Ellcey
  2 siblings, 0 replies; 22+ messages in thread
From: Jakub Jelinek @ 2017-10-04 13:26 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: Richard Sandiford, Christophe Lyon, GCC Patches, nd

On Wed, Oct 04, 2017 at 01:24:15PM +0000, Wilco Dijkstra wrote:
> Well my understanding was that it is OK to fix a bootstrap failure. I believe my
> patch is trivial since it mostly removes redundant code. Also I took Jakub's
> review as an OK as there were no technical objections. However since you

I've only commented (off-list) on the ChangeLog nits, not on anything else.

	Jakub

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output
  2017-10-04 13:12       ` Ramana Radhakrishnan
@ 2017-10-04 13:39         ` Alexander Monakov
  2017-10-04 13:41           ` Ramana Radhakrishnan
  2017-10-04 13:49         ` Jakub Jelinek
  1 sibling, 1 reply; 22+ messages in thread
From: Alexander Monakov @ 2017-10-04 13:39 UTC (permalink / raw)
  To: Ramana Radhakrishnan
  Cc: Wilco Dijkstra, Christophe Lyon, GCC Patches, nd, richard.sandiford

On Wed, 4 Oct 2017, Ramana Radhakrishnan wrote:
> However we need a scheduler maintainer or global reviewer to please
> help review this patch or help come up with an alternative patch. A
> primary platform broken for 5 days with a commit and no public
> response from the original poster is really not appropriate behaviour
> in this community. If not, the original patch should have been
> considered for reverting under the 48 hour rule .

I responded within 30 minutes of the original report by Christophe:
https://gcc.gnu.org/ml/gcc-patches/2017-09/msg01984.html

I think Wilco's patch is reasonable. A potentially simpler alternative
to just restore bootstrap is to take only the second hunk of my patch
above (i.e. without removal of autopref_rank_data and associated cleanups).

FWIW in the previous thread for a closely related issue I Cc'ed Maxim and
Kyrill: https://gcc.gnu.org/ml/gcc-patches/2017-09/msg01229.html

Alexander

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output
  2017-10-04 13:39         ` Alexander Monakov
@ 2017-10-04 13:41           ` Ramana Radhakrishnan
  0 siblings, 0 replies; 22+ messages in thread
From: Ramana Radhakrishnan @ 2017-10-04 13:41 UTC (permalink / raw)
  To: Alexander Monakov
  Cc: Wilco Dijkstra, Christophe Lyon, GCC Patches, nd, richard.sandiford

On Wed, Oct 4, 2017 at 2:39 PM, Alexander Monakov <amonakov@ispras.ru> wrote:
> On Wed, 4 Oct 2017, Ramana Radhakrishnan wrote:
>> However we need a scheduler maintainer or global reviewer to please
>> help review this patch or help come up with an alternative patch. A
>> primary platform broken for 5 days with a commit and no public
>> response from the original poster is really not appropriate behaviour
>> in this community. If not, the original patch should have been
>> considered for reverting under the 48 hour rule .
>
> I responded within 30 minutes of the original report by Christophe:
> https://gcc.gnu.org/ml/gcc-patches/2017-09/msg01984.html
>

My apologies.

I tried looking in my theading in my gmail and was surprised not to
see a response and somehow I missed this in the archives.

Ramana

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output
  2017-10-04 13:12       ` Ramana Radhakrishnan
  2017-10-04 13:39         ` Alexander Monakov
@ 2017-10-04 13:49         ` Jakub Jelinek
  1 sibling, 0 replies; 22+ messages in thread
From: Jakub Jelinek @ 2017-10-04 13:49 UTC (permalink / raw)
  To: Ramana Radhakrishnan
  Cc: Wilco Dijkstra, Christophe Lyon, GCC Patches, nd, richard.sandiford

On Wed, Oct 04, 2017 at 02:12:11PM +0100, Ramana Radhakrishnan wrote:
> On Wed, Oct 4, 2017 at 12:01 PM, Richard Sandiford
> <richard.sandiford@linaro.org> wrote:
> > Wilco Dijkstra <Wilco.Dijkstra@arm.com> writes:
> >> Christophe Lyon wrote:
> >>
> >>> FWIW, I confirm that this patch fixes the build failure I reported at:
> >>> https://gcc.gnu.org/ml/gcc-patches/2017-09/msg01980.html
> >>
> >> Thanks, now committed as r253399.
> >
> > I don't think it's reasonable to commit this as obvious.  You said
> > yourself in the covering message that "it doesn't at all restore
> > the original behaviour since we no longer compare the base address".
> > So even with the bootstrap failure, I think the patch needed review
> > before going in.
> 
> I agree that this patch shouldn't have gone in without review from a
> maintainer of the appropriate area regardless of whether this fixes a
> bootstrap failure or not.
> 
> However we need a scheduler maintainer or global reviewer to please
> help review this patch or help come up with an alternative patch. A
> primary platform broken for 5 days with a commit and no public
> response from the original poster is really not appropriate behaviour
> in this community. If not, the original patch should have been
> considered for reverting under the 48 hour rule .

A workaround that could be committed as obvious would be wrapping
some qsort call affected by this into (), i.e. instead of doing
qsort (...); do (qsort) (...);
That way you revert to previous behavior for the problematic qsort and not
change anything else.
The committed patch isn't in any way obvious and really needs to be reviewed
by a scheduler maintainer.

	Jakub

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output
  2017-10-04 13:24       ` Wilco Dijkstra
  2017-10-04 13:26         ` Jakub Jelinek
@ 2017-10-04 14:54         ` Eric Botcazou
  2017-10-05 17:20         ` Steve Ellcey
  2 siblings, 0 replies; 22+ messages in thread
From: Eric Botcazou @ 2017-10-04 14:54 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: gcc-patches, Richard Sandiford, Christophe Lyon, nd

> We have now had 5 days of no builds for a major target, which is a huge
> inconvenience. So I don't think it is reasonable to wait any longer.

well, to put things in perspective, you broke the SPARC compiler about one 
month ago and have done anything AFAIK since then to repair the breakage so...

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output
  2017-10-04 13:24       ` Wilco Dijkstra
  2017-10-04 13:26         ` Jakub Jelinek
  2017-10-04 14:54         ` Eric Botcazou
@ 2017-10-05 17:20         ` Steve Ellcey
  2017-10-05 17:34           ` Maxim Kuvyrkov
  2 siblings, 1 reply; 22+ messages in thread
From: Steve Ellcey @ 2017-10-05 17:20 UTC (permalink / raw)
  To: Wilco Dijkstra, Richard Sandiford
  Cc: Christophe Lyon, GCC Patches, nd, wilson, gnu, law, vmakarov

On Wed, 2017-10-04 at 13:24 +0000, Wilco Dijkstra wrote:
> Richard Sandiford wrote:
> > 
> > 
> > I don't think it's reasonable to commit this as obvious.  You said
> > yourself in the covering message that "it doesn't at all restore
> > the original behaviour since we no longer compare the base address".
> > So even with the bootstrap failure, I think the patch needed review
> > before going in.
> > 
> > Christophe's message doesn't change anything because you knew when you
> > posted the patch that it fixed the failure.
> Well my understanding was that it is OK to fix a bootstrap failure. I believe my
> patch is trivial since it mostly removes redundant code. Also I took Jakub's
> review as an OK as there were no technical objections. However since you
> seem to disagree, I will revert it.
> 
> We have now had 5 days of no builds for a major target, which is a huge
> inconvenience. So I don't think it is reasonable to wait any longer.
> The alternative is to revert the original patch that caused the bootstrap failure
> plus the patch(es) that unexpectedly changed the behaviour of the scheduler
> (I don't think there was any testing as to what effect those had on the schedule).
> 
> So the question is who will do that and when?
> 
> Wilco

The aarch64 bootstrap is still broken.  I am adding the scheduler
maintainers to the CC list on this email to see if one on them can
review/approve Wilco's patch which was applied and then reverted.  If
not, can one of the global maintainers revert the original patch that
broke the bootstrap?

Steve Ellcey
sellcey@cavium.com

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output
  2017-10-05 17:20         ` Steve Ellcey
@ 2017-10-05 17:34           ` Maxim Kuvyrkov
  2017-10-05 17:46             ` Steve Ellcey
                               ` (2 more replies)
  0 siblings, 3 replies; 22+ messages in thread
From: Maxim Kuvyrkov @ 2017-10-05 17:34 UTC (permalink / raw)
  To: Wilco Dijkstra
  Cc: Richard Sandiford, Christophe Lyon, GCC Patches, nd, Jim Wilson,
	gnu, Jeff Law, Vladimir Makarov, Steve Ellcey, Alexander Monakov

[-- Attachment #1: Type: text/plain, Size: 2470 bytes --]

> On Oct 5, 2017, at 8:20 PM, Steve Ellcey <sellcey@cavium.com> wrote:
> 
> On Wed, 2017-10-04 at 13:24 +0000, Wilco Dijkstra wrote:
>> Richard Sandiford wrote:
>>> 
>>> 
>>> I don't think it's reasonable to commit this as obvious.  You said
>>> yourself in the covering message that "it doesn't at all restore
>>> the original behaviour since we no longer compare the base address".
>>> So even with the bootstrap failure, I think the patch needed review
>>> before going in.
>>> 
>>> Christophe's message doesn't change anything because you knew when you
>>> posted the patch that it fixed the failure.
>> Well my understanding was that it is OK to fix a bootstrap failure. I believe my
>> patch is trivial since it mostly removes redundant code. Also I took Jakub's
>> review as an OK as there were no technical objections. However since you
>> seem to disagree, I will revert it.
>> 
>> We have now had 5 days of no builds for a major target, which is a huge
>> inconvenience. So I don't think it is reasonable to wait any longer.
>> The alternative is to revert the original patch that caused the bootstrap failure
>> plus the patch(es) that unexpectedly changed the behaviour of the scheduler
>> (I don't think there was any testing as to what effect those had on the schedule).
>> 
>> So the question is who will do that and when?
>> 
>> Wilco
> 
> The aarch64 bootstrap is still broken.  I am adding the scheduler
> maintainers to the CC list on this email to see if one on them can
> review/approve Wilco's patch which was applied and then reverted.  If
> not, can one of the global maintainers revert the original patch that
> broke the bootstrap?

What the heck, I'll jump in as well.

I'm still working on analysis, but it appears to me that Alexander's patch (current state of trunk) fails qsort check due to not being symmetric for load/store analysis (write == 0 or write == 1) in comparisons with "irrelevant" instructions.  Wilco's patch does not seem to address that, and, possibly, makes the failure latent (I may be wrong here, it's late and I didn't finish analysis yet).

I'm currently bootstrapping the following patch (on aarch64-linux-gnu, arm-linux-gnueabihf will follow tomorrow), which (like Wilco's patch) seems to unbreak bootstrap, but is less invasive and preserves handling of multi_mem_insns.  Would please interested parties help me test it?

Comments?

--
Maxim Kuvyrkov
www.linaro.org


[-- Attachment #2: 0001-Fix-autopref_rank_for_schedule.patch --]
[-- Type: application/octet-stream, Size: 2782 bytes --]

From 9d78200ef1cd58104ab0b814bfb406a853950a8c Mon Sep 17 00:00:00 2001
From: Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org>
Date: Thu, 5 Oct 2017 17:20:53 +0000
Subject: [PATCH] Fix autopref_rank_for_schedule

	* haifa-sched.c (autopref_rank_for_schedule): Rework comparator
	to be stable for comparisons with "non-relevant" instructions.
---
 gcc/haifa-sched.c | 38 +++++++++++++++++++++++++-------------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c
index 549e896..12ad8fc 100644
--- a/gcc/haifa-sched.c
+++ b/gcc/haifa-sched.c
@@ -5707,8 +5707,9 @@ autopref_rank_data (autopref_multipass_data_t data1,
 static int
 autopref_rank_for_schedule (const rtx_insn *insn1, const rtx_insn *insn2)
 {
-  int r = 0;
-  for (int write = 0; write < 2 && !r; ++write)
+  int irr1 = 0, irr2 = 0;
+
+  for (int write = 0; write < 2; ++write)
     {
       autopref_multipass_data_t data1
 	= &INSN_AUTOPREF_MULTIPASS_DATA (insn1)[write];
@@ -5717,20 +5718,31 @@ autopref_rank_for_schedule (const rtx_insn *insn1, const rtx_insn *insn2)
 
       if (data1->status == AUTOPREF_MULTIPASS_DATA_UNINITIALIZED)
 	autopref_multipass_init (insn1, write);
+      if (data1->status == AUTOPREF_MULTIPASS_DATA_IRRELEVANT)
+	++irr1;
 
       if (data2->status == AUTOPREF_MULTIPASS_DATA_UNINITIALIZED)
 	autopref_multipass_init (insn2, write);
-
-      int irrel1 = data1->status == AUTOPREF_MULTIPASS_DATA_IRRELEVANT;
-      int irrel2 = data2->status == AUTOPREF_MULTIPASS_DATA_IRRELEVANT;
-
-      if (!irrel1 && !irrel2)
-	r = autopref_rank_data (data1, data2);
-      else
-	r = irrel2 - irrel1;
-    }
-
-  return r;
+      if (data2->status == AUTOPREF_MULTIPASS_DATA_IRRELEVANT)
+	++irr2;
+
+      /* If both instructions are equivally relevant to auto-prefetcher,
+	 then compare them using autopref_rank_data.  We omit checking for
+	 same base register here because that would produce A < B < C < A
+	 comparisons when A and C have the same base, and B has different
+	 base.  This simplification is OK since we don't need a perfect
+	 result for autoprefetcher model during sorting -- lookahead_guard
+	 below is in charge of forcing correct ordering of memory
+	 instructions.  */
+      if (irr1 == write && irr2 == write)
+	return autopref_rank_data (data1, data2);
+    }
+
+  /* Prioritize instructions irrelevant to auto-prefetcher to the relevant
+     ones.  This gives a chance for other relevant instructions to get into
+     ready list (by resolving their dependencies from irrelevant instructions),
+     and be sorted appropriately among relevant instructions.  */
+  return irr2 - irr1;
 }
 
 /* True if header of debug dump was printed.  */
-- 
2.7.4


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output
  2017-10-05 17:34           ` Maxim Kuvyrkov
@ 2017-10-05 17:46             ` Steve Ellcey
  2017-10-05 18:51             ` Wilco Dijkstra
  2017-10-05 20:28             ` Alexander Monakov
  2 siblings, 0 replies; 22+ messages in thread
From: Steve Ellcey @ 2017-10-05 17:46 UTC (permalink / raw)
  To: Maxim Kuvyrkov, Wilco Dijkstra
  Cc: Richard Sandiford, Christophe Lyon, GCC Patches, nd, Jim Wilson,
	gnu, Jeff Law, Vladimir Makarov, Alexander Monakov

On Thu, 2017-10-05 at 20:33 +0300, Maxim Kuvyrkov wrote:

> I'm currently bootstrapping the following patch (on aarch64-linux-
> gnu, arm-linux-gnueabihf will follow tomorrow), which (like Wilco's
> patch) seems to unbreak bootstrap, but is less invasive and preserves
> handling of multi_mem_insns.  Would please interested parties help me
> test it?
> 
> Comments?

This patch is not applying to my ToT.  The code
in autopref_rank_for_schedule doesn't seem to match what I have in my
tree.

Steve Ellcey

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output
  2017-10-05 17:34           ` Maxim Kuvyrkov
  2017-10-05 17:46             ` Steve Ellcey
@ 2017-10-05 18:51             ` Wilco Dijkstra
  2017-10-05 20:28             ` Alexander Monakov
  2 siblings, 0 replies; 22+ messages in thread
From: Wilco Dijkstra @ 2017-10-05 18:51 UTC (permalink / raw)
  To: Maxim Kuvyrkov
  Cc: Richard Sandiford, Christophe Lyon, GCC Patches, nd, Jim Wilson,
	gnu, Jeff Law, Vladimir Makarov, Steve Ellcey, Alexander Monakov

Maxim Kuvyrkov wrote:

> What the heck, I'll jump in as well.

Why not - how many engineers does it take to sort?!?

> I'm still working on analysis, but it appears to me that Alexander's patch 
> (current state of trunk) fails qsort check due to not being symmetric for
> load/store analysis (write == 0 or write == 1) in comparisons with "irrelevant"
> instructions.  Wilco's patch  does not seem to address that, and, possibly,
> makes the failure latent (I may be wrong here, it's late and I didn't finish analysis yet).
>
> I'm currently bootstrapping the following patch (on aarch64-linux-gnu, 
> arm-linux-gnueabihf will follow tomorrow), which (like Wilco's patch) seems
> to unbreak bootstrap, but is less invasive and preserves handling of
> multi_mem_insns.  Would please interested  parties help me test it?

So what you're saying is that if we compare a load with a store (or a store
with a load), we should always return 0 and use the original instruction order?
This won't fix the issue of overlaps in autopref_rank_data, but I believe this
creates a new sorting order problem:

A. Load off 8
B. Store off 12
C. Load off 4

So A < B (instruction order), B < C (instruction order) but A > C (offset order)...

The key here is that sorting functions should only return zero if the two
objects being compared are really indistinguishable, not as a way to say they
are not comparable(!!!) and then defer to a different comparison order. It is
impossible to combine multiple different comparisons to create a total sorting
order that way. 

However it is feasible to divide things into several classes, order the classes
with respect to each other and use a different sort within each class. If you want
to treat loads/stores equally, that's fine, but then they end up in the same class
and thus you have to use a comparison that orders both loads and stores.

Wilco

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output
  2017-10-05 17:34           ` Maxim Kuvyrkov
  2017-10-05 17:46             ` Steve Ellcey
  2017-10-05 18:51             ` Wilco Dijkstra
@ 2017-10-05 20:28             ` Alexander Monakov
  2017-10-06  7:55               ` Christophe Lyon
  2017-10-06  8:37               ` Maxim Kuvyrkov
  2 siblings, 2 replies; 22+ messages in thread
From: Alexander Monakov @ 2017-10-05 20:28 UTC (permalink / raw)
  To: Maxim Kuvyrkov
  Cc: Wilco Dijkstra, Richard Sandiford, Christophe Lyon, GCC Patches,
	nd, Jim Wilson, gnu, Jeff Law, Vladimir Makarov, Steve Ellcey,
	Kyrill Tkachov

On Thu, 5 Oct 2017, Maxim Kuvyrkov wrote:
> I'm still working on analysis, but it appears to me that Alexander's patch
> (current state of trunk) fails qsort check due to not being symmetric for
> load/store analysis (write == 0 or write == 1) in comparisons with
> "irrelevant" instructions.  Wilco's patch does not seem to address that, and,
> possibly, makes the failure latent (I may be wrong here, it's late and I
> didn't finish analysis yet).

Yes, your analysis is incomplete, it should be easy to see that for always-false
multi_mem_insn_p, autopref_rank_for_schedule implements lexicographical order.
The problem is that when multi_mem_insn_p may be true, autopref_rank_data is
not guaranteed to be transitive.

I think your patch loses transitivity in autopref_rank_for_schedule, see Wilco's
response.

FWIW, this hunk from my patch posted back on Friday is sufficient to restore
bootstrap as confirmed (again, back on Friday) by Steve.  It avoids the fancy
non-transitive comparison for qsort (but autopref_rank_data is still used in
multipref_dfa_lookahead_guard).

(I'm surprised Kyrill wasn't Cc'ed - adding him now)

Alexander

	* haifa-sched.c (autopref_rank_for_schedule): Do not invoke
	autopref_rank_data, order by min_offset.

diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c
index 549e8961411..cea1242e1f1 100644
--- a/gcc/haifa-sched.c
+++ b/gcc/haifa-sched.c
@@ -5725,7 +5669,7 @@ autopref_rank_for_schedule (const rtx_insn *insn1, const rtx_insn *insn2)
       int irrel2 = data2->status == AUTOPREF_MULTIPASS_DATA_IRRELEVANT;
 
       if (!irrel1 && !irrel2)
-	r = autopref_rank_data (data1, data2);
+	r = data1->min_offset - data2->min_offset;
       else
 	r = irrel2 - irrel1;
     }

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output
  2017-10-05 20:28             ` Alexander Monakov
@ 2017-10-06  7:55               ` Christophe Lyon
  2017-10-06  8:37               ` Maxim Kuvyrkov
  1 sibling, 0 replies; 22+ messages in thread
From: Christophe Lyon @ 2017-10-06  7:55 UTC (permalink / raw)
  To: Alexander Monakov
  Cc: Maxim Kuvyrkov, Wilco Dijkstra, Richard Sandiford, GCC Patches,
	nd, Jim Wilson, gnu, Jeff Law, Vladimir Makarov, Steve Ellcey,
	Kyrill Tkachov

On 5 October 2017 at 22:28, Alexander Monakov <amonakov@ispras.ru> wrote:
> On Thu, 5 Oct 2017, Maxim Kuvyrkov wrote:
>> I'm still working on analysis, but it appears to me that Alexander's patch
>> (current state of trunk) fails qsort check due to not being symmetric for
>> load/store analysis (write == 0 or write == 1) in comparisons with
>> "irrelevant" instructions.  Wilco's patch does not seem to address that, and,
>> possibly, makes the failure latent (I may be wrong here, it's late and I
>> didn't finish analysis yet).
>
> Yes, your analysis is incomplete, it should be easy to see that for always-false
> multi_mem_insn_p, autopref_rank_for_schedule implements lexicographical order.
> The problem is that when multi_mem_insn_p may be true, autopref_rank_data is
> not guaranteed to be transitive.
>
> I think your patch loses transitivity in autopref_rank_for_schedule, see Wilco's
> response.
>
> FWIW, this hunk from my patch posted back on Friday is sufficient to restore
> bootstrap as confirmed (again, back on Friday) by Steve.  It avoids the fancy
> non-transitive comparison for qsort (but autopref_rank_data is still used in
> multipref_dfa_lookahead_guard).
>
> (I'm surprised Kyrill wasn't Cc'ed - adding him now)
>
> Alexander
>
>         * haifa-sched.c (autopref_rank_for_schedule): Do not invoke
>         autopref_rank_data, order by min_offset.
>
> diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c
> index 549e8961411..cea1242e1f1 100644
> --- a/gcc/haifa-sched.c
> +++ b/gcc/haifa-sched.c
> @@ -5725,7 +5669,7 @@ autopref_rank_for_schedule (const rtx_insn *insn1, const rtx_insn *insn2)
>        int irrel2 = data2->status == AUTOPREF_MULTIPASS_DATA_IRRELEVANT;
>
>        if (!irrel1 && !irrel2)
> -       r = autopref_rank_data (data1, data2);
> +       r = data1->min_offset - data2->min_offset;
>        else
>         r = irrel2 - irrel1;
>      }

Hi,

FWIW, I've ran validations with this small patch, and it fixes:
- the aarch64-linux-gnu build problem
- a few other regressions (ICEs) that had probably already been
reported (not sure I saw all the regression reports after r253295)

See http://people.linaro.org/~christophe.lyon/cross-validation/gcc-test-patches/253456-aarch64-bootstrap.patch/report-build-info.html

Where "REF-BUILDFAILED" for aarch64-linux-gnu means the reference
build failed and the patched build succeeded.
The "REGRESSED" cell for aarch64-none-elf with ilp32 only "restores" a
previous failure of gcc.target/aarch64/aapcs64/func-ret-3.c execution,
 -O3 -g

Thanks,

Christophe

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output
  2017-10-05 20:28             ` Alexander Monakov
  2017-10-06  7:55               ` Christophe Lyon
@ 2017-10-06  8:37               ` Maxim Kuvyrkov
  2017-10-06 13:24                 ` Wilco Dijkstra
  1 sibling, 1 reply; 22+ messages in thread
From: Maxim Kuvyrkov @ 2017-10-06  8:37 UTC (permalink / raw)
  To: Alexander Monakov
  Cc: Wilco Dijkstra, Richard Sandiford, Christophe Lyon, GCC Patches,
	nd, Jim Wilson, gnu, Jeff Law, Vladimir Makarov, Steve Ellcey,
	Kyrill Tkachov

> On Oct 5, 2017, at 11:28 PM, Alexander Monakov <amonakov@ispras.ru> wrote:
> 
> On Thu, 5 Oct 2017, Maxim Kuvyrkov wrote:
>> I'm still working on analysis, but it appears to me that Alexander's patch
>> (current state of trunk) fails qsort check due to not being symmetric for
>> load/store analysis (write == 0 or write == 1) in comparisons with
>> "irrelevant" instructions.  Wilco's patch does not seem to address that, and,
>> possibly, makes the failure latent (I may be wrong here, it's late and I
>> didn't finish analysis yet).
> 
> Yes, your analysis is incomplete, it should be easy to see that for always-false
> multi_mem_insn_p, autopref_rank_for_schedule implements lexicographical order.
> The problem is that when multi_mem_insn_p may be true, autopref_rank_data is
> not guaranteed to be transitive.

Agree.

> 
> I think your patch loses transitivity in autopref_rank_for_schedule, see Wilco's
> response.

Agree.

> 
> FWIW, this hunk from my patch posted back on Friday is sufficient to restore
> bootstrap as confirmed (again, back on Friday) by Steve.  It avoids the fancy
> non-transitive comparison for qsort (but autopref_rank_data is still used in
> multipref_dfa_lookahead_guard).
> 
> (I'm surprised Kyrill wasn't Cc'ed - adding him now)
> 
> Alexander
> 
> 	* haifa-sched.c (autopref_rank_for_schedule): Do not invoke
> 	autopref_rank_data, order by min_offset.
> 
> diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c
> index 549e8961411..cea1242e1f1 100644
> --- a/gcc/haifa-sched.c
> +++ b/gcc/haifa-sched.c
> @@ -5725,7 +5669,7 @@ autopref_rank_for_schedule (const rtx_insn *insn1, const rtx_insn *insn2)
>       int irrel2 = data2->status == AUTOPREF_MULTIPASS_DATA_IRRELEVANT;
> 
>       if (!irrel1 && !irrel2)
> -	r = autopref_rank_data (data1, data2);
> +	r = data1->min_offset - data2->min_offset;
>       else
> 	r = irrel2 - irrel1;
>     }

I think that this is the best solution so far.  Could you add a comment like the following?
==
Ideally, we would call autopref_rank_data() here, but we can't since it is not guaranteed to return transitive results fro multi_mem_insns.  We use an approximation here and rely on lookahead_guard below to force instruction order according to autopref_rank_data().
==

I think that the above patch qualifies as obvious to unbreak the bootstrap.  Wilco, any objection to the above fix?

Regards,

--
Maxim Kuvyrkov
www.linaro.org





^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output
  2017-10-06  8:37               ` Maxim Kuvyrkov
@ 2017-10-06 13:24                 ` Wilco Dijkstra
  0 siblings, 0 replies; 22+ messages in thread
From: Wilco Dijkstra @ 2017-10-06 13:24 UTC (permalink / raw)
  To: Maxim Kuvyrkov, Alexander Monakov
  Cc: Richard Sandiford, Christophe Lyon, GCC Patches, nd, Jim Wilson,
	gnu, Jeff Law, Vladimir Makarov, Steve Ellcey, Kyrill Tkachov

Maxim Kuvyrkov wrote:

Note I've committed: https://gcc.gnu.org/ml/gcc-patches/2017-10/msg00309.html which does
change qsort to (qsort) like Jakub proposed.

> I think that this is the best solution so far.  Could you add a comment like the following?
> ==
> Ideally, we would call autopref_rank_data() here, but we can't since it is not guaranteed
> to return transitive results fro multi_mem_insns.  We use an approximation here and rely
> on lookahead_guard below to force instruction order according to autopref_rank_data().
> ==

The issue is that autopref_rank_data() doesn't do anything useful. It checks for overlaps
which shouldn't happen much, if at all. And as discussed declaring a comparison as
"unordered" is simply not possible. lookahead_guard can't fix things up either, so there is
really no point in keeping this function. Similarly all the min/max offset calculations are
redundant even if we assume the offsets in a LDP/STP instruction are completely random.

Wilco





    

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH] Fix PR82396: qsort comparator non-negative on sorted output
  2017-10-03 16:34 [PATCH] Fix PR82396: qsort comparator non-negative on sorted output Wilco Dijkstra
  2017-10-04  8:18 ` Christophe Lyon
@ 2017-10-16 10:57 ` Wilco Dijkstra
  2017-10-16 11:58   ` Maxim Kuvyrkov
  2017-10-24  2:43   ` Jeff Law
  1 sibling, 2 replies; 22+ messages in thread
From: Wilco Dijkstra @ 2017-10-16 10:57 UTC (permalink / raw)
  To: GCC Patches; +Cc: nd

This patch cleans up autopref scheduling.

The code is greatly simplified.  Sort accesses on the offset first, and
only if the offsets are the same fall back to other comparisons in
rank_for_schedule.  This doesn't at all restore the original behaviour
since we no longer compare the base address, but it now defines a total
sorting order.  More work will be required to improve the sorting so
that only loads/stores with the same base are affected.

AArch64 bootstrap completes.

OK for commit?

ChangeLog:
2017-10-03  Wilco Dijkstra  <wdijkstr@arm.com>

        PR rtl-optimization/82396
        * gcc/haifa-sched.c (autopref_multipass_init): Simplify
        initialization.
        (autopref_rank_data): Simplify sort order.
        * gcc/sched-int.h (autopref_multipass_data_): Remove
        multi_mem_insn_p, min_offset and max_offset.
--

diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c
index 549e8961411ecd0a04ac3b24ba78b5d53e63258a..77ba8c5292a0801bbbcb35ed61ab3040015f6677 100644
--- a/gcc/haifa-sched.c
+++ b/gcc/haifa-sched.c
@@ -5568,9 +5568,7 @@ autopref_multipass_init (const rtx_insn *insn, int write)
 
   gcc_assert (data->status == AUTOPREF_MULTIPASS_DATA_UNINITIALIZED);
   data->base = NULL_RTX;
-  data->min_offset = 0;
-  data->max_offset = 0;
-  data->multi_mem_insn_p = false;
+  data->offset = 0;
   /* Set insn entry initialized, but not relevant for auto-prefetcher.  */
   data->status = AUTOPREF_MULTIPASS_DATA_IRRELEVANT;
 
@@ -5585,10 +5583,9 @@ autopref_multipass_init (const rtx_insn *insn, int write)
     {
       int n_elems = XVECLEN (pat, 0);
 
-      int i = 0;
-      rtx prev_base = NULL_RTX;
-      int min_offset = 0;
-      int max_offset = 0;
+      int i, offset;
+      rtx base, prev_base = NULL_RTX;
+      int min_offset = INT_MAX;
 
       for (i = 0; i < n_elems; i++)
         {
@@ -5596,38 +5593,23 @@ autopref_multipass_init (const rtx_insn *insn, int write)
           if (GET_CODE (set) != SET)
             return;
 
-         rtx base = NULL_RTX;
-         int offset = 0;
           if (!analyze_set_insn_for_autopref (set, write, &base, &offset))
             return;
 
-         if (i == 0)
-           {
-             prev_base = base;
-             min_offset = offset;
-             max_offset = offset;
-           }
           /* Ensure that all memory operations in the PARALLEL use the same
              base register.  */
-         else if (REGNO (base) != REGNO (prev_base))
+         if (i > 0 && REGNO (base) != REGNO (prev_base))
             return;
-         else
-           {
-             min_offset = MIN (min_offset, offset);
-             max_offset = MAX (max_offset, offset);
-           }
+         prev_base = base;
+         min_offset = MIN (min_offset, offset);
         }
 
-      /* If we reached here then we have a valid PARALLEL of multiple memory
-        ops with prev_base as the base and min_offset and max_offset
-        containing the offsets range.  */
+      /* If we reached here then we have a valid PARALLEL of multiple memory ops
+        with prev_base as the base and min_offset containing the offset.  */
       gcc_assert (prev_base);
       data->base = prev_base;
-      data->min_offset = min_offset;
-      data->max_offset = max_offset;
-      data->multi_mem_insn_p = true;
+      data->offset = min_offset;
       data->status = AUTOPREF_MULTIPASS_DATA_NORMAL;
-
       return;
     }
 
@@ -5637,7 +5619,7 @@ autopref_multipass_init (const rtx_insn *insn, int write)
     return;
 
   if (!analyze_set_insn_for_autopref (set, write, &data->base,
-                                      &data->min_offset))
+                                      &data->offset))
     return;
 
   /* This insn is relevant for the auto-prefetcher.
@@ -5646,63 +5628,6 @@ autopref_multipass_init (const rtx_insn *insn, int write)
   data->status = AUTOPREF_MULTIPASS_DATA_NORMAL;
 }
 
-
-/* Helper for autopref_rank_for_schedule.  Given the data of two
-   insns relevant to the auto-prefetcher modelling code DATA1 and DATA2
-   return their comparison result.  Return 0 if there is no sensible
-   ranking order for the two insns.  */
-
-static int
-autopref_rank_data (autopref_multipass_data_t data1,
-                    autopref_multipass_data_t data2)
-{
-  /* Simple case when both insns are simple single memory ops.  */
-  if (!data1->multi_mem_insn_p && !data2->multi_mem_insn_p)
-    return data1->min_offset - data2->min_offset;
-
-  /* Two load/store multiple insns.  Return 0 if the offset ranges
-     overlap and the difference between the minimum offsets otherwise.  */
-  else if (data1->multi_mem_insn_p && data2->multi_mem_insn_p)
-    {
-      int min1 = data1->min_offset;
-      int max1 = data1->max_offset;
-      int min2 = data2->min_offset;
-      int max2 = data2->max_offset;
-
-      if (max1 < min2 || min1 > max2)
-       return min1 - min2;
-      else
-       return 0;
-    }
-
-  /* The other two cases is a pair of a load/store multiple and
-     a simple memory op.  Return 0 if the single op's offset is within the
-     range of the multi-op insn and the difference between the single offset
-     and the minimum offset of the multi-set insn otherwise.  */
-  else if (data1->multi_mem_insn_p && !data2->multi_mem_insn_p)
-    {
-      int max1 = data1->max_offset;
-      int min1 = data1->min_offset;
-
-      if (data2->min_offset >= min1
-         && data2->min_offset <= max1)
-       return 0;
-      else
-       return min1 - data2->min_offset;
-    }
-  else
-    {
-      int max2 = data2->max_offset;
-      int min2 = data2->min_offset;
-
-      if (data1->min_offset >= min2
-         && data1->min_offset <= max2)
-       return 0;
-      else
-       return data1->min_offset - min2;
-    }
-}
-
 /* Helper function for rank_for_schedule sorting.  */
 static int
 autopref_rank_for_schedule (const rtx_insn *insn1, const rtx_insn *insn2)
@@ -5725,7 +5650,7 @@ autopref_rank_for_schedule (const rtx_insn *insn1, const rtx_insn *insn2)
       int irrel2 = data2->status == AUTOPREF_MULTIPASS_DATA_IRRELEVANT;
 
       if (!irrel1 && !irrel2)
-       r = autopref_rank_data (data1, data2);
+       r = data1->offset - data2->offset;
       else
         r = irrel2 - irrel1;
     }
@@ -5753,7 +5678,7 @@ autopref_multipass_dfa_lookahead_guard_1 (const rtx_insn *insn1,
     return 0;
 
   if (rtx_equal_p (data1->base, data2->base)
-      && autopref_rank_data (data1, data2) > 0)
+      && data1->offset > data2->offset)
     {
       if (sched_verbose >= 2)
         {
diff --git a/gcc/sched-int.h b/gcc/sched-int.h
index 2af8f9fc32c0085c18511bbc83ad52c6ec31f671..6832589e3d0ad9ed5937ab3e81f3573c2560fe67 100644
--- a/gcc/sched-int.h
+++ b/gcc/sched-int.h
@@ -819,15 +819,8 @@ struct autopref_multipass_data_
   /* Base part of memory address.  */
   rtx base;
 
-  /* Memory offsets from the base.  For single simple sets
-     only min_offset is valid.  For multi-set insns min_offset
-     and max_offset record the minimum and maximum offsets from the same
-     base among the sets inside the PARALLEL.  */
-  int min_offset;
-  int max_offset;
-
-  /* True if this is a load/store-multiple instruction.  */
-  bool multi_mem_insn_p;
+  /* Memory offsets from the base.  */
+  int offset;
 
   /* Entry status.  */
   enum autopref_multipass_data_status status;
    

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output
  2017-10-16 10:57 ` Wilco Dijkstra
@ 2017-10-16 11:58   ` Maxim Kuvyrkov
  2017-10-24  2:43   ` Jeff Law
  1 sibling, 0 replies; 22+ messages in thread
From: Maxim Kuvyrkov @ 2017-10-16 11:58 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: GCC Patches, nd

> On Oct 16, 2017, at 1:56 PM, Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:
> 
> This patch cleans up autopref scheduling.
> 
> The code is greatly simplified.  Sort accesses on the offset first, and
> only if the offsets are the same fall back to other comparisons in
> rank_for_schedule.  This doesn't at all restore the original behaviour
> since we no longer compare the base address, but it now defines a total
> sorting order.  More work will be required to improve the sorting so
> that only loads/stores with the same base are affected.
> 
> AArch64 bootstrap completes.
> 
> OK for commit?

Looks good to me.  I'm not an official scheduler maintainer, so wait for an ack from a maintainer.

--
Maxim Kuvyrkov
www.linaro.org


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] Fix PR82396: qsort comparator non-negative on sorted output
  2017-10-16 10:57 ` Wilco Dijkstra
  2017-10-16 11:58   ` Maxim Kuvyrkov
@ 2017-10-24  2:43   ` Jeff Law
  1 sibling, 0 replies; 22+ messages in thread
From: Jeff Law @ 2017-10-24  2:43 UTC (permalink / raw)
  To: Wilco Dijkstra, GCC Patches; +Cc: nd

On 10/16/2017 04:56 AM, Wilco Dijkstra wrote:
> This patch cleans up autopref scheduling.
> 
> The code is greatly simplified.  Sort accesses on the offset first, and
> only if the offsets are the same fall back to other comparisons in
> rank_for_schedule.  This doesn't at all restore the original behaviour
> since we no longer compare the base address, but it now defines a total
> sorting order.  More work will be required to improve the sorting so
> that only loads/stores with the same base are affected.
> 
> AArch64 bootstrap completes.
> 
> OK for commit?
> 
> ChangeLog:
> 2017-10-03  Wilco Dijkstra  <wdijkstr@arm.com>
> 
>         PR rtl-optimization/82396
>         * gcc/haifa-sched.c (autopref_multipass_init): Simplify
>         initialization.
>         (autopref_rank_data): Simplify sort order.
>         * gcc/sched-int.h (autopref_multipass_data_): Remove
>         multi_mem_insn_p, min_offset and max_offset.
OK. Sorry for the delay.

jeff

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2017-10-23 23:22 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-03 16:34 [PATCH] Fix PR82396: qsort comparator non-negative on sorted output Wilco Dijkstra
2017-10-04  8:18 ` Christophe Lyon
2017-10-04 10:29   ` Wilco Dijkstra
2017-10-04 11:02     ` Richard Sandiford
2017-10-04 13:12       ` Ramana Radhakrishnan
2017-10-04 13:39         ` Alexander Monakov
2017-10-04 13:41           ` Ramana Radhakrishnan
2017-10-04 13:49         ` Jakub Jelinek
2017-10-04 13:24       ` Wilco Dijkstra
2017-10-04 13:26         ` Jakub Jelinek
2017-10-04 14:54         ` Eric Botcazou
2017-10-05 17:20         ` Steve Ellcey
2017-10-05 17:34           ` Maxim Kuvyrkov
2017-10-05 17:46             ` Steve Ellcey
2017-10-05 18:51             ` Wilco Dijkstra
2017-10-05 20:28             ` Alexander Monakov
2017-10-06  7:55               ` Christophe Lyon
2017-10-06  8:37               ` Maxim Kuvyrkov
2017-10-06 13:24                 ` Wilco Dijkstra
2017-10-16 10:57 ` Wilco Dijkstra
2017-10-16 11:58   ` Maxim Kuvyrkov
2017-10-24  2:43   ` Jeff Law

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).