public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Tree SRA and atomicity/volatility
@ 2007-01-06 13:19 Eric Botcazou
  2007-01-06 13:31 ` Richard Guenther
                   ` (2 more replies)
  0 siblings, 3 replies; 49+ messages in thread
From: Eric Botcazou @ 2007-01-06 13:19 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 3538 bytes --]

Hi,

The Ada language has the concept of atomicity, for example:

   type Word is
      record
         First, Second, Third, Fourth : Byte;
      end record;

   External : Word := (others => 0);
   pragma Atomic (External);

Quoting the Ada RM: "For an atomic object (including an atomic component) all 
reads and updates of the object as a whole are indivisible."

The GNAT compiler implements it by (among other things) declaring the object 
as being volatile.  While this works well in 3.x, this breaks in 4.x because 
the SRA may decompose accesses to volatile aggregates:

eric@linux:~/build/gcc/native> cat t.c
struct S
{
  char a;
  char b;
  char c;
  char d;
};

void foo(void)
{
  volatile struct S dev;
  struct S tmp = dev;
  tmp.a = 0;
  dev = tmp;
}

In .sra at -O:

;; Function foo (foo)

Cannot scalarize variable dev because it is declared volatile

[...]

foo ()
{
  char tmp$d;
  char tmp$c;
  char tmp$b;
  char tmp$a;
  struct S tmp;
  struct S dev;

<bb 2>:
  tmp$d_6 = dev.d;
  tmp$c_7 = dev.c;
  tmp$b_8 = dev.b;
  tmp$a_9 = dev.a;
  tmp$a_10 = 0;
  dev.d = tmp$d_6;
  dev.c = tmp$c_7;
  dev.b = tmp$b_8;
  dev.a = tmp$a_10;
  return;

}

but accesses to the volatile object are nevertheless decomposed.  On x86-64:

foo:
.LFB2:
        movzbl  -21(%rsp), %eax
        movzbl  -22(%rsp), %edx
        movzbl  -23(%rsp), %ecx
        movzbl  -24(%rsp), %esi
        movb    %al, -21(%rsp)
        movb    %dl, -22(%rsp)
        movb    %cl, -23(%rsp)
        movb    $0, -24(%rsp)
        ret


While this may or may not be a problem for the C language, I think that it's a 
real bug in the Tree SRA pass, which doesn't check whether the object to be 
decomposed has side-effects.  More specifically, this is the LDST case:

  /* Invoked when we have a copy between one scalarizable reference ELT
     and one non-scalarizable reference OTHER.  IS_OUTPUT is true if ELT
     is on the left-hand side.  */
  void (*ldst) (struct sra_elt *elt, tree other,
		block_stmt_iterator *bsi, bool is_output);

/* Scalarize a LDST.  To recap, this is an assignment between one scalarizable
   reference ELT and one non-scalarizable reference OTHER.  IS_OUTPUT is true
   if ELT is on the left-hand side.  */

static void
scalarize_ldst (struct sra_elt *elt, tree other,
		block_stmt_iterator *bsi, bool is_output)
{
  /* Shouldn't have gotten called for a scalar.  */
  gcc_assert (!elt->replacement);

  if (elt->use_block_copy)
    {
      /* Since ELT is not fully instantiated, we have to leave the
	 block copy in place.  Treat this as a USE.  */
      scalarize_use (elt, NULL, bsi, is_output, false);
    }
  else
    {
      /* The interesting case is when ELT is fully instantiated.  In this
	 case we can have each element stored/loaded directly to/from the
	 corresponding slot in OTHER.  This avoids a block copy.  */

The last comment is wrong if OTHER has side-effects.  Hence the attached 
patch, tested on x86_64-suse-linux, which yields on x86-64:

foo:
.LFB2:
        movl    -24(%rsp), %eax
        movb    $0, %al
        movl    %eax, -24(%rsp)
        ret


This is a regression from the 3.x series for the Ada compiler so I'm 
requesting approval for mainline/4.2/4.1 branches.


2007-01-06  Eric Botcazou  <ebotcazou@adacore.com>

	* tree-sra.c (sra_walk_fns) <ldst>: Document new restriction.
	(sra_walk_modify_expr) <rhs_elt>: Treat the reference as a use
	if the lhs has side-effects.
	<lhs_elt>: Treat the reference as a use if the rhs has side-effects.



:ADDPATCH Tree-SRA:

-- 
Eric Botcazou

[-- Attachment #2: fc04-007_fsf.diff --]
[-- Type: text/x-diff, Size: 1606 bytes --]

Index: tree-sra.c
===================================================================
--- tree-sra.c	(revision 120483)
+++ tree-sra.c	(working copy)
@@ -670,8 +670,8 @@ struct sra_walk_fns
   void (*init) (struct sra_elt *elt, tree value, block_stmt_iterator *bsi);
 
   /* Invoked when we have a copy between one scalarizable reference ELT
-     and one non-scalarizable reference OTHER.  IS_OUTPUT is true if ELT
-     is on the left-hand side.  */
+     and one non-scalarizable reference OTHER without side-effects. 
+     IS_OUTPUT is true if ELT is on the left-hand side.  */
   void (*ldst) (struct sra_elt *elt, tree other,
 		block_stmt_iterator *bsi, bool is_output);
 
@@ -887,7 +887,7 @@ sra_walk_gimple_modify_stmt (tree expr, 
   /* If the RHS is scalarizable, handle it.  There are only two cases.  */
   if (rhs_elt)
     {
-      if (!rhs_elt->is_scalar)
+      if (!rhs_elt->is_scalar && !TREE_SIDE_EFFECTS (lhs))
 	fns->ldst (rhs_elt, lhs, bsi, false);
       else
 	fns->use (rhs_elt, &GIMPLE_STMT_OPERAND (expr, 1), bsi, false, false);
@@ -930,7 +930,8 @@ sra_walk_gimple_modify_stmt (tree expr, 
 	 The lvalue requirement prevents us from trying to directly scalarize
 	 the result of a function call.  Which would result in trying to call
 	 the function multiple times, and other evil things.  */
-      else if (!lhs_elt->is_scalar && is_gimple_addressable (rhs))
+      else if (!lhs_elt->is_scalar
+	       && !TREE_SIDE_EFFECTS (rhs) && is_gimple_addressable (rhs))
 	fns->ldst (lhs_elt, rhs, bsi, true);
 
       /* Otherwise we're being used in some context that requires the

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-06 13:19 [PATCH] Tree SRA and atomicity/volatility Eric Botcazou
@ 2007-01-06 13:31 ` Richard Guenther
  2007-01-06 13:47   ` Richard Kenner
  2007-01-06 13:49   ` Eric Botcazou
  2007-01-23 16:58 ` Mark Mitchell
  2007-03-02 14:55 ` Eric Botcazou
  2 siblings, 2 replies; 49+ messages in thread
From: Richard Guenther @ 2007-01-06 13:31 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: gcc-patches

On 1/6/07, Eric Botcazou <ebotcazou@adacore.com> wrote:
> Hi,
>
> The Ada language has the concept of atomicity, for example:
>
>    type Word is
>       record
>          First, Second, Third, Fourth : Byte;
>       end record;
>
>    External : Word := (others => 0);
>    pragma Atomic (External);
>
> Quoting the Ada RM: "For an atomic object (including an atomic component) all
> reads and updates of the object as a whole are indivisible."
>
> The GNAT compiler implements it by (among other things) declaring the object
> as being volatile.  While this works well in 3.x, this breaks in 4.x because
> the SRA may decompose accesses to volatile aggregates:
>
> eric@linux:~/build/gcc/native> cat t.c
> struct S
> {
>   char a;
>   char b;
>   char c;
>   char d;
> };
>
> void foo(void)
> {
>   volatile struct S dev;
>   struct S tmp = dev;
>   tmp.a = 0;
>   dev = tmp;
> }
>
> In .sra at -O:
>
> ;; Function foo (foo)
>
> Cannot scalarize variable dev because it is declared volatile
>
> [...]
>
> foo ()
> {
>   char tmp$d;
>   char tmp$c;
>   char tmp$b;
>   char tmp$a;
>   struct S tmp;
>   struct S dev;
>
> <bb 2>:
>   tmp$d_6 = dev.d;
>   tmp$c_7 = dev.c;
>   tmp$b_8 = dev.b;
>   tmp$a_9 = dev.a;
>   tmp$a_10 = 0;
>   dev.d = tmp$d_6;
>   dev.c = tmp$c_7;
>   dev.b = tmp$b_8;
>   dev.a = tmp$a_10;
>   return;
>
> }

It doesn't scalarize dev - it scalarizes tmp, which is not volatile.  Then
it uses by-element copy, which is maybe not a good idea here
(but not necessarily wrong).

The problem is really that the middle-end doesn't have a good definition
of volatile semantics on a structure.  At least not what you want from
it.

It looks to me that you want either

struct dev {
  volatile unsigned int data;
};

and "decompose" data explicitly.

What is the required semantics if I only access dev.a?  Do you
require a full-word load of the data and an extraction of a there?

> but accesses to the volatile object are nevertheless decomposed.  On x86-64:
>
> foo:
> .LFB2:
>         movzbl  -21(%rsp), %eax
>         movzbl  -22(%rsp), %edx
>         movzbl  -23(%rsp), %ecx
>         movzbl  -24(%rsp), %esi
>         movb    %al, -21(%rsp)
>         movb    %dl, -22(%rsp)
>         movb    %cl, -23(%rsp)
>         movb    $0, -24(%rsp)
>         ret
>
>
> While this may or may not be a problem for the C language, I think that it's a
> real bug in the Tree SRA pass, which doesn't check whether the object to be
> decomposed has side-effects.  More specifically, this is the LDST case:
>
>   /* Invoked when we have a copy between one scalarizable reference ELT
>      and one non-scalarizable reference OTHER.  IS_OUTPUT is true if ELT
>      is on the left-hand side.  */
>   void (*ldst) (struct sra_elt *elt, tree other,
>                 block_stmt_iterator *bsi, bool is_output);
>
> /* Scalarize a LDST.  To recap, this is an assignment between one scalarizable
>    reference ELT and one non-scalarizable reference OTHER.  IS_OUTPUT is true
>    if ELT is on the left-hand side.  */
>
> static void
> scalarize_ldst (struct sra_elt *elt, tree other,
>                 block_stmt_iterator *bsi, bool is_output)
> {
>   /* Shouldn't have gotten called for a scalar.  */
>   gcc_assert (!elt->replacement);
>
>   if (elt->use_block_copy)
>     {
>       /* Since ELT is not fully instantiated, we have to leave the
>          block copy in place.  Treat this as a USE.  */
>       scalarize_use (elt, NULL, bsi, is_output, false);
>     }
>   else
>     {
>       /* The interesting case is when ELT is fully instantiated.  In this
>          case we can have each element stored/loaded directly to/from the
>          corresponding slot in OTHER.  This avoids a block copy.  */
>
> The last comment is wrong if OTHER has side-effects.  Hence the attached
> patch, tested on x86_64-suse-linux, which yields on x86-64:
>
> foo:
> .LFB2:
>         movl    -24(%rsp), %eax
>         movb    $0, %al
>         movl    %eax, -24(%rsp)
>         ret
>
>
> This is a regression from the 3.x series for the Ada compiler so I'm
> requesting approval for mainline/4.2/4.1 branches.

No.  This is not the correct solution for your problem.

>
> 2007-01-06  Eric Botcazou  <ebotcazou@adacore.com>
>
>         * tree-sra.c (sra_walk_fns) <ldst>: Document new restriction.
>         (sra_walk_modify_expr) <rhs_elt>: Treat the reference as a use
>         if the lhs has side-effects.
>         <lhs_elt>: Treat the reference as a use if the rhs has side-effects.
>
>
>
> :ADDPATCH Tree-SRA:
>
> --
> Eric Botcazou
>
>
>

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-06 13:31 ` Richard Guenther
@ 2007-01-06 13:47   ` Richard Kenner
  2007-01-06 13:49   ` Eric Botcazou
  1 sibling, 0 replies; 49+ messages in thread
From: Richard Kenner @ 2007-01-06 13:47 UTC (permalink / raw)
  To: richard.guenther; +Cc: ebotcazou, gcc-patches

> The problem is really that the middle-end doesn't have a good
> definition of volatile semantics on a structure.  At least not what
> you want from it.

As was discussed in another thread a while ago, the middle-end doesn't wreally
have a good definition of volatile semantics AT ALL: a lot of it is just "do
what we used to do".

My feeling is that the proper semantics of volatile are that the access
pattern in the generated code should match, as closely as the underlying
machine allows, the access pattern in the source.  Unfortunately, this is an
informal, and hence necessarily imprecise, definition, but I think it might
be the best we can do.  It's certainly consistent with past practice.

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-06 13:31 ` Richard Guenther
  2007-01-06 13:47   ` Richard Kenner
@ 2007-01-06 13:49   ` Eric Botcazou
  2007-01-07 11:23     ` Richard Guenther
  1 sibling, 1 reply; 49+ messages in thread
From: Eric Botcazou @ 2007-01-06 13:49 UTC (permalink / raw)
  To: Richard Guenther; +Cc: gcc-patches

> It doesn't scalarize dev - it scalarizes tmp, which is not volatile.

Right, this is why I took care of not saying "it scalarizes dev", but "it 
decomposes accesses to dev", which is precisely what happens.

> Then it uses by-element copy, which is maybe not a good idea here
> (but not necessarily wrong).

I agree that it's not necessarily wrong for the C testcase, but I think it's 
nevertheless a bug in the Tree SRA pass, which happens to generate wrong code 
for the equivalent Ada testcase.  It should check whether the [LR]HS has 
side-effects before turning a single access into multiple ones.

> What is the required semantics if I only access dev.a?  Do you
> require a full-word load of the data and an extraction of a there?

Exactly.  Gigi makes sure that a full-word load is always possible, but it 
relies on the side-effectness for keeping it together.

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-06 13:49   ` Eric Botcazou
@ 2007-01-07 11:23     ` Richard Guenther
  2007-01-08 11:30       ` Eric Botcazou
  0 siblings, 1 reply; 49+ messages in thread
From: Richard Guenther @ 2007-01-07 11:23 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: gcc-patches

On 1/6/07, Eric Botcazou <ebotcazou@adacore.com> wrote:
> > It doesn't scalarize dev - it scalarizes tmp, which is not volatile.
>
> Right, this is why I took care of not saying "it scalarizes dev", but "it
> decomposes accesses to dev", which is precisely what happens.
>
> > Then it uses by-element copy, which is maybe not a good idea here
> > (but not necessarily wrong).
>
> I agree that it's not necessarily wrong for the C testcase, but I think it's
> nevertheless a bug in the Tree SRA pass, which happens to generate wrong code
> for the equivalent Ada testcase.  It should check whether the [LR]HS has
> side-effects before turning a single access into multiple ones.
>
> > What is the required semantics if I only access dev.a?  Do you
> > require a full-word load of the data and an extraction of a there?
>
> Exactly.  Gigi makes sure that a full-word load is always possible, but it
> relies on the side-effectness for keeping it together.

I don't see where the tree-ssa part of the middle-end guarantees this and
also expand does not seem to honor this as then we'd get a full load
for every component load.  It looks to me that for a component write
you then need a read-modify-write cycle as we get for targets that
don't support sub-word loads/stores and/or strict-alignment targets.

Does this all work?  If so it would be nice to have this documented
(that setting TYPE_VOLATILE on a record type forces full record
access) and also warn/error if this is not possible (because for example
the record has a size that is bigger than the largest mode we can
atomically load/store).  We should also have testcases that verify
this works for your copying case, for loads and for stores.

The patch as is is certainly simple and safe enough for the branches
if it fixes a regression there (with some testcases there as well, please).
For mainline I'd see us starting to define the middle-end type system
and this is certainly one part of it.  I'm just curious if a patch to
tree-sra.c can really fix all the issues we have with this.

So, the patch is ok for the branches if you add two testcases, one for
the copying and one to check we get a proper read-modify-write
cycle for storing into a part of the structure (just choose a popular
non-strict-align target for that one - this needs to be a asm test).

For mainline you should document this effect in tree.h and tree-ssa.texi
(I realize we don't have a section on the type-system there).  And maybe
in extend.texi if we want to expose this behavior to users for POD types.

All of the above given that nobody objects to it within a reasonable time.

Thanks,
Richard.

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-07 11:23     ` Richard Guenther
@ 2007-01-08 11:30       ` Eric Botcazou
  2007-01-08 11:52         ` Richard Guenther
  0 siblings, 1 reply; 49+ messages in thread
From: Eric Botcazou @ 2007-01-08 11:30 UTC (permalink / raw)
  To: Richard Guenther; +Cc: gcc-patches

> > Exactly.  Gigi makes sure that a full-word load is always possible, but
> > it relies on the side-effectness for keeping it together.
>
> I don't see where the tree-ssa part of the middle-end guarantees this and
> also expand does not seem to honor this as then we'd get a full load
> for every component load.  It looks to me that for a component write
> you then need a read-modify-write cycle as we get for targets that
> don't support sub-word loads/stores and/or strict-alignment targets.

I'm not sure I understand, maybe my wording was ambiguous.  Gigi has all the 
magic to support atomicity (type wrapping, alignment tweaking, countermeasure 
for problematic targets, e.g. Alpha, and so on) and generates the full-word 
access on its own.  The only requirement is for the middle-end to fully honor 
the TREE_THIS_VOLATILE and TREE_SIDE_EFFECTS flags.

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-08 11:30       ` Eric Botcazou
@ 2007-01-08 11:52         ` Richard Guenther
  2007-01-08 12:43           ` Eric Botcazou
                             ` (2 more replies)
  0 siblings, 3 replies; 49+ messages in thread
From: Richard Guenther @ 2007-01-08 11:52 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: gcc-patches

On 1/8/07, Eric Botcazou <ebotcazou@adacore.com> wrote:
> > > Exactly.  Gigi makes sure that a full-word load is always possible, but
> > > it relies on the side-effectness for keeping it together.
> >
> > I don't see where the tree-ssa part of the middle-end guarantees this and
> > also expand does not seem to honor this as then we'd get a full load
> > for every component load.  It looks to me that for a component write
> > you then need a read-modify-write cycle as we get for targets that
> > don't support sub-word loads/stores and/or strict-alignment targets.
>
> I'm not sure I understand, maybe my wording was ambiguous.  Gigi has all the
> magic to support atomicity (type wrapping, alignment tweaking, countermeasure
> for problematic targets, e.g. Alpha, and so on) and generates the full-word
> access on its own.  The only requirement is for the middle-end to fully honor
> the TREE_THIS_VOLATILE and TREE_SIDE_EFFECTS flags.

Oh, so the only problem you see are for structure copies that are
decomposed by SRA?  Still, middle-end support for this kind of stuff
could be useful - maybe using volatile bitfields instead of a volatile
structure type.

struct val {
  volatile unsigned val1 : 8;
  volatile unsigned val2 : 8;
};

(at the moment accesses to the above are also decomposed by expand).

Richard.

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-08 11:52         ` Richard Guenther
@ 2007-01-08 12:43           ` Eric Botcazou
  2007-01-08 13:12           ` Richard Kenner
  2007-01-08 14:55           ` Richard Guenther
  2 siblings, 0 replies; 49+ messages in thread
From: Eric Botcazou @ 2007-01-08 12:43 UTC (permalink / raw)
  To: Richard Guenther; +Cc: gcc-patches

> Oh, so the only problem you see are for structure copies that are
> decomposed by SRA?

At the moment, yes.

> Still, middle-end support for this kind of stuff could be useful - maybe
> using volatile bitfields instead of a volatile structure type.
>
> struct val {
>   volatile unsigned val1 : 8;
>   volatile unsigned val2 : 8;
> };
>
> (at the moment accesses to the above are also decomposed by expand).

Why would we prevent the decomposition in this case?

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-08 11:52         ` Richard Guenther
  2007-01-08 12:43           ` Eric Botcazou
@ 2007-01-08 13:12           ` Richard Kenner
  2007-01-08 13:40             ` Richard Guenther
  2007-01-08 14:55           ` Richard Guenther
  2 siblings, 1 reply; 49+ messages in thread
From: Richard Kenner @ 2007-01-08 13:12 UTC (permalink / raw)
  To: richard.guenther; +Cc: ebotcazou, gcc-patches

> Still, middle-end support for this kind of stuff could be useful -
> maybe using volatile bitfields instead of a volatile structure type.

Could you say what kinds of semantics a volatile bitfield might have?  To me,
that sounds somewhat self-contradictory since bitfields often do things
like cross byte boundaries.


^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-08 13:12           ` Richard Kenner
@ 2007-01-08 13:40             ` Richard Guenther
  0 siblings, 0 replies; 49+ messages in thread
From: Richard Guenther @ 2007-01-08 13:40 UTC (permalink / raw)
  To: Richard Kenner; +Cc: ebotcazou, gcc-patches

On 1/8/07, Richard Kenner <kenner@vlsi1.ultra.nyu.edu> wrote:
> > Still, middle-end support for this kind of stuff could be useful -
> > maybe using volatile bitfields instead of a volatile structure type.
>
> Could you say what kinds of semantics a volatile bitfield might have?  To me,
> that sounds somewhat self-contradictory since bitfields often do things
> like cross byte boundaries.

It would require "atomic" (in the sense of this thread - loads and stores only
of the full object) access of the underlying object.  But it's just
random ideas -
the volatile qualifier isn't really appropriate for this.

Richard.

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-08 11:52         ` Richard Guenther
  2007-01-08 12:43           ` Eric Botcazou
  2007-01-08 13:12           ` Richard Kenner
@ 2007-01-08 14:55           ` Richard Guenther
  2007-01-12 13:57             ` Eric Botcazou
  2 siblings, 1 reply; 49+ messages in thread
From: Richard Guenther @ 2007-01-08 14:55 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: gcc-patches

On 1/8/07, Richard Guenther <richard.guenther@gmail.com> wrote:
> On 1/8/07, Eric Botcazou <ebotcazou@adacore.com> wrote:
> > > > Exactly.  Gigi makes sure that a full-word load is always possible, but
> > > > it relies on the side-effectness for keeping it together.
> > >
> > > I don't see where the tree-ssa part of the middle-end guarantees this and
> > > also expand does not seem to honor this as then we'd get a full load
> > > for every component load.  It looks to me that for a component write
> > > you then need a read-modify-write cycle as we get for targets that
> > > don't support sub-word loads/stores and/or strict-alignment targets.
> >
> > I'm not sure I understand, maybe my wording was ambiguous.  Gigi has all the
> > magic to support atomicity (type wrapping, alignment tweaking, countermeasure
> > for problematic targets, e.g. Alpha, and so on) and generates the full-word
> > access on its own.  The only requirement is for the middle-end to fully honor
> > the TREE_THIS_VOLATILE and TREE_SIDE_EFFECTS flags.
>
> Oh, so the only problem you see are for structure copies that are
> decomposed by SRA?

On a second thought this cannot be true.  If you have "all the magic
to support atomicity" and generate "the full-word access on" your own --
why do you expose the individual fields to the middle-end at all??
I claim you _cannot_ generate full-word access this way (unless
using memcpy, but even that is nowadays lowered).

So get us a testcase that shows copy/read/write on elements of such
a type.

Thanks,
Richard.

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-08 14:55           ` Richard Guenther
@ 2007-01-12 13:57             ` Eric Botcazou
  2007-01-12 16:36               ` Richard Guenther
  0 siblings, 1 reply; 49+ messages in thread
From: Eric Botcazou @ 2007-01-12 13:57 UTC (permalink / raw)
  To: Richard Guenther; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1272 bytes --]

[Sorry for the delay, pretty hot week]

> On a second thought this cannot be true.  If you have "all the magic
> to support atomicity" and generate "the full-word access on" your own --
> why do you expose the individual fields to the middle-end at all??

Well, the type has fields and you can access these fields individually in the 
source so they must be present in the IL to generate debug info for them.

> I claim you _cannot_ generate full-word access this way (unless
> using memcpy, but even that is nowadays lowered).

Do not underestimate the cleverness of Gigi. :-)

> So get us a testcase that shows copy/read/write on elements of such
> a type.

Direct translation of the C testcase attached.  At -O2 -fno-tree-sra on i686:

_ada_q:
	pushl	%ebp
	movl	%esp, %ebp
	subl	$16, %esp
	movl	$0, -4(%ebp)
	movl	-4(%ebp), %eax
	xorb	%al, %al
	movl	%eax, -4(%ebp)
	leave
	ret

At -O2 on i686:

_ada_q:
	pushl	%ebp
	movl	%esp, %ebp
	pushl	%ebx
	subl	$16, %esp
	movb	$0, -5(%ebp)
	movb	$0, -6(%ebp)
	movb	$0, -7(%ebp)
	movb	$0, -8(%ebp)
	movzbl	-5(%ebp), %eax
	movzbl	-6(%ebp), %edx
	movzbl	-7(%ebp), %ecx
	movzbl	-8(%ebp), %ebx
	movb	%al, -5(%ebp)
	movb	%dl, -6(%ebp)
	movb	%cl, -7(%ebp)
	movb	$0, -8(%ebp)
	addl	$16, %esp
	popl	%ebx
	popl	%ebp
	ret

-- 
Eric Botcazou

[-- Attachment #2: q.adb --]
[-- Type: text/x-adasrc, Size: 314 bytes --]

procedure Q is

   type Byte is mod 2**8;
   for Byte'Size use 8;

   type Word is
      record
         First, Second, Third, Fourth : Byte;
      end record;

   External : Word := (others => 0);
   pragma Atomic (External);

   Tmp : Word;

begin
   Tmp := External;
   Tmp.First := 0;
   External := Tmp;
end;

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-12 13:57             ` Eric Botcazou
@ 2007-01-12 16:36               ` Richard Guenther
  2007-01-12 17:03                 ` Richard Guenther
  2007-01-14  7:47                 ` Eric Botcazou
  0 siblings, 2 replies; 49+ messages in thread
From: Richard Guenther @ 2007-01-12 16:36 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: gcc-patches

On 1/12/07, Eric Botcazou <ebotcazou@adacore.com> wrote:
> [Sorry for the delay, pretty hot week]
>
> > On a second thought this cannot be true.  If you have "all the magic
> > to support atomicity" and generate "the full-word access on" your own --
> > why do you expose the individual fields to the middle-end at all??
>
> Well, the type has fields and you can access these fields individually in the
> source so they must be present in the IL to generate debug info for them.
>
> > I claim you _cannot_ generate full-word access this way (unless
> > using memcpy, but even that is nowadays lowered).
>
> Do not underestimate the cleverness of Gigi. :-)

I don't see anything "clever" in

Q ()
{
  typedef q__byte q__byte;
  typedef struct q__word q__word;
  struct q__word T1b = {.first=0, .second=0, .third=0, .fourth=0};
  struct q__word tmp;
  struct q__word external = VIEW_CONVERT_EXPR<struct q__word>(SAVE_EXPR <T1b>);

  tmp = VIEW_CONVERT_EXPR<struct q__word>(external);
  tmp.first = 0;
  external = VIEW_CONVERT_EXPR<struct q__word>(tmp);
  return;
}

for this testcase (t02.original dump, gcc 4.1.2) .  Now if I change the
testcase to something I requested (direct write to a component), I get

Q ()
{
  typedef q__byte q__byte;
  typedef struct q__word q__word;
  struct q__word T1b = {.first=0, .second=0, .third=0, .fourth=0};
  struct q__word external = VIEW_CONVERT_EXPR<struct q__word>(SAVE_EXPR <T1b>);

  VIEW_CONVERT_EXPR<struct q__word>(external).first = 0;
  return;
}

and after gimplification we can see all it broken already (that is, you
exposed the component write to the middle-end - no cleverness
prevented this):

Q ()
{
  struct q__word T1b.0;
  typedef q__byte q__byte;
  typedef struct q__word q__word;
  struct q__word T1b;
  struct q__word external;

  T1b.first = 0;
  T1b.second = 0;
  T1b.third = 0;
  T1b.fourth = 0;
  T1b.0 = T1b;
  external = T1b.0;
  external.first = 0;
  return;
}

The testcase now looks like:

procedure Q is

   type Byte is mod 2**8;
   for Byte'Size use 8;

   type Word is
      record
         First, Second, Third, Fourth : Byte;
      end record;

   External : Word := (others => 0);
   pragma Atomic (External);

begin
   External.First := 0;
end;


> > So get us a testcase that shows copy/read/write on elements of such
> > a type.
>
> Direct translation of the C testcase attached.  At -O2 -fno-tree-sra on i686:

Assembler is of course not interesting - interesting is what gigi feeds the
middle-end with - and that part certainly ensures no such thing as
atomic access.

Richard.

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-12 16:36               ` Richard Guenther
@ 2007-01-12 17:03                 ` Richard Guenther
  2007-01-14  7:47                 ` Eric Botcazou
  1 sibling, 0 replies; 49+ messages in thread
From: Richard Guenther @ 2007-01-12 17:03 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: gcc-patches

On 1/12/07, Richard Guenther <richard.guenther@gmail.com> wrote:
> On 1/12/07, Eric Botcazou <ebotcazou@adacore.com> wrote:
> > [Sorry for the delay, pretty hot week]
> >
> > > On a second thought this cannot be true.  If you have "all the magic
> > > to support atomicity" and generate "the full-word access on" your own --
> > > why do you expose the individual fields to the middle-end at all??
> >
> > Well, the type has fields and you can access these fields individually in the
> > source so they must be present in the IL to generate debug info for them.
> >
> > > I claim you _cannot_ generate full-word access this way (unless
> > > using memcpy, but even that is nowadays lowered).
> >
> > Do not underestimate the cleverness of Gigi. :-)
>
> I don't see anything "clever" in
>
> Q ()
> {
>   typedef q__byte q__byte;
>   typedef struct q__word q__word;
>   struct q__word T1b = {.first=0, .second=0, .third=0, .fourth=0};
>   struct q__word tmp;
>   struct q__word external = VIEW_CONVERT_EXPR<struct q__word>(SAVE_EXPR <T1b>);
>
>   tmp = VIEW_CONVERT_EXPR<struct q__word>(external);
>   tmp.first = 0;
>   external = VIEW_CONVERT_EXPR<struct q__word>(tmp);
>   return;
> }
>
> for this testcase (t02.original dump, gcc 4.1.2) .  Now if I change the
> testcase to something I requested (direct write to a component), I get
>
> Q ()
> {
>   typedef q__byte q__byte;
>   typedef struct q__word q__word;
>   struct q__word T1b = {.first=0, .second=0, .third=0, .fourth=0};
>   struct q__word external = VIEW_CONVERT_EXPR<struct q__word>(SAVE_EXPR <T1b>);
>
>   VIEW_CONVERT_EXPR<struct q__word>(external).first = 0;
>   return;
> }
>
> and after gimplification we can see all it broken already (that is, you
> exposed the component write to the middle-end - no cleverness
> prevented this):
>
> Q ()
> {
>   struct q__word T1b.0;
>   typedef q__byte q__byte;
>   typedef struct q__word q__word;
>   struct q__word T1b;
>   struct q__word external;
>
>   T1b.first = 0;
>   T1b.second = 0;
>   T1b.third = 0;
>   T1b.fourth = 0;
>   T1b.0 = T1b;
>   external = T1b.0;
>   external.first = 0;
>   return;
> }
>
> The testcase now looks like:
>
> procedure Q is
>
>    type Byte is mod 2**8;
>    for Byte'Size use 8;
>
>    type Word is
>       record
>          First, Second, Third, Fourth : Byte;
>       end record;
>
>    External : Word := (others => 0);
>    pragma Atomic (External);
>
> begin
>    External.First := 0;
> end;
>

Oh, and of course wrong (?) assembly is created for this:

_ada_q:
.LFB3:
        pushl   %ebp
.LCFI0:
        movl    %esp, %ebp
.LCFI1:
        subl    $16, %esp
.LCFI2:
        movl    $0, -4(%ebp)
        movb    $0, -4(%ebp)
        leave
        ret

Richard.

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-12 16:36               ` Richard Guenther
  2007-01-12 17:03                 ` Richard Guenther
@ 2007-01-14  7:47                 ` Eric Botcazou
  2007-01-14 14:57                   ` Richard Guenther
  1 sibling, 1 reply; 49+ messages in thread
From: Eric Botcazou @ 2007-01-14  7:47 UTC (permalink / raw)
  To: Richard Guenther; +Cc: gcc-patches

> for this testcase (t02.original dump, gcc 4.1.2) .  Now if I change the
> testcase to something I requested (direct write to a component), I get
>
> Q ()
> {
>   typedef q__byte q__byte;
>   typedef struct q__word q__word;
>   struct q__word T1b = {.first=0, .second=0, .third=0, .fourth=0};
>   struct q__word external = VIEW_CONVERT_EXPR<struct q__word>(SAVE_EXPR
> <T1b>);
>
>   VIEW_CONVERT_EXPR<struct q__word>(external).first = 0;
>   return;
> }

You mean this I presume?

External.First = 0;

Then it is legal for the compiler to emit

        movb    $0, -4(%ebp)

because the object is not accessed "as a whole" in the source code (see the RM 
quote in my first message).

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-14  7:47                 ` Eric Botcazou
@ 2007-01-14 14:57                   ` Richard Guenther
  2007-01-19 13:58                     ` Eric Botcazou
  0 siblings, 1 reply; 49+ messages in thread
From: Richard Guenther @ 2007-01-14 14:57 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: gcc-patches

On 1/14/07, Eric Botcazou <ebotcazou@adacore.com> wrote:
> > for this testcase (t02.original dump, gcc 4.1.2) .  Now if I change the
> > testcase to something I requested (direct write to a component), I get
> >
> > Q ()
> > {
> >   typedef q__byte q__byte;
> >   typedef struct q__word q__word;
> >   struct q__word T1b = {.first=0, .second=0, .third=0, .fourth=0};
> >   struct q__word external = VIEW_CONVERT_EXPR<struct q__word>(SAVE_EXPR
> > <T1b>);
> >
> >   VIEW_CONVERT_EXPR<struct q__word>(external).first = 0;
> >   return;
> > }
>
> You mean this I presume?
>
> External.First = 0;
>
> Then it is legal for the compiler to emit
>
>         movb    $0, -4(%ebp)
>
> because the object is not accessed "as a whole" in the source code (see the RM
> quote in my first message).

So my Ada ignorance shows here - so "atomicity" of External is only
guaranteed for copies.  Still there is nothing "clever" that gigi is doing -
it's simply doing structure assignment (with some useless
VIEW_CONVERT_EXPRs).  I think you should represent this atomic
access via a union.  But I wonder for what this form of "atomicity" is
used or useful.

Richard.

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-14 14:57                   ` Richard Guenther
@ 2007-01-19 13:58                     ` Eric Botcazou
  0 siblings, 0 replies; 49+ messages in thread
From: Eric Botcazou @ 2007-01-19 13:58 UTC (permalink / raw)
  To: Richard Guenther; +Cc: gcc-patches

> So my Ada ignorance shows here - so "atomicity" of External is only
> guaranteed for copies.  Still there is nothing "clever" that gigi is doing
> - it's simply doing structure assignment (with some useless
> VIEW_CONVERT_EXPRs).

No, the VIEW_CONVERT_EXPRs are not useless, they are the crux of the matter, 
not everything is visible in the logs.

> But I wonder for what this form of "atomicity" is used or useful.

Mapping hardware, I guess.

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-06 13:19 [PATCH] Tree SRA and atomicity/volatility Eric Botcazou
  2007-01-06 13:31 ` Richard Guenther
@ 2007-01-23 16:58 ` Mark Mitchell
  2007-01-23 17:15   ` Daniel Berlin
  2007-01-23 17:24   ` Richard Guenther
  2007-03-02 14:55 ` Eric Botcazou
  2 siblings, 2 replies; 49+ messages in thread
From: Mark Mitchell @ 2007-01-23 16:58 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: gcc-patches

Eric Botcazou wrote:

> 2007-01-06  Eric Botcazou  <ebotcazou@adacore.com>
> 
> 	* tree-sra.c (sra_walk_fns) <ldst>: Document new restriction.
> 	(sra_walk_modify_expr) <rhs_elt>: Treat the reference as a use
> 	if the lhs has side-effects.
> 	<lhs_elt>: Treat the reference as a use if the rhs has side-effects.
>
> :ADDPATCH Tree-SRA:

I've read through this thread, and I think Eric's patch makes sense.  (I
think Richard G. eventually came to the same conclusion, so I'm not
claiming this is some brilliant insight on my part.)  Even in C, keeping
volatile accesses "more atomic" seems like a good thing.  And,
certainly, any possible pessimization by not doing individual accesses
to move data out of volatile structures is going to be insignificant, to
overall program runtime.

As the previous thread about volatile (involving differences, if any,
between scalars and BLKmode objects) shows, it's hard to simultaneously
(a) precisely define volatile and (b) hew to existing
practice/expectations.  But, we can bias the compiler towards more
atomicity, and, in practice, people will be happier with GCC if we don't
try to get too clever in the presence of volatile.

-- 
Mark Mitchell
CodeSourcery
mark@codesourcery.com
(650) 331-3385 x713

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-23 16:58 ` Mark Mitchell
@ 2007-01-23 17:15   ` Daniel Berlin
  2007-01-23 17:24   ` Richard Guenther
  1 sibling, 0 replies; 49+ messages in thread
From: Daniel Berlin @ 2007-01-23 17:15 UTC (permalink / raw)
  To: Mark Mitchell; +Cc: Eric Botcazou, gcc-patches

On 1/23/07, Mark Mitchell <mark@codesourcery.com> wrote:
> Eric Botcazou wrote:
>
> > 2007-01-06  Eric Botcazou  <ebotcazou@adacore.com>
> >
> >       * tree-sra.c (sra_walk_fns) <ldst>: Document new restriction.
> >       (sra_walk_modify_expr) <rhs_elt>: Treat the reference as a use
> >       if the lhs has side-effects.
> >       <lhs_elt>: Treat the reference as a use if the rhs has side-effects.
> >
> > :ADDPATCH Tree-SRA:
>
> I've read through this thread, and I think Eric's patch makes sense.

> think Richard G. eventually came to the same conclusion, so I'm not
> claiming this is some brilliant insight on my part.)  Even in C, keeping
> volatile accesses "more atomic" seems like a good thing.  And,
> certainly, any possible pessimization by not doing individual accesses
> to move data out of volatile structures is going to be insignificant, to
> overall program runtime.
I agree the principle of his patch makes sense, but i'm not sure of
the implementation.
If it's really checking for volatility, we have
stmt->has_volatile_operands, and this should be checked, rather than
every tree single operand.
We simply don't touch statements with volatile operands.

That appears not to be the case here (he's checking for some property
we don't expose in the middle end), in which case I guess testing
TREE_SIDE_EFFECTS make sense, but i'm very worried about the idea that
we are going to need to test every tree operand before making a
transformation, which is pretty expensive.
That is one of the reasons we have stmt_ann->has_volatile_ops in the
first place - so optimizers could make quick decisions about whether
they should be touching something or not.

If the only place this will ever be tested is SRA, i'm okay with it.
I have a nagging feeling it's not, particularly as we get into more
loop opts (predictive commoning, etc) and i don't want to see us say
"well, everyone was fine with us testing every tree operand then".

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-23 16:58 ` Mark Mitchell
  2007-01-23 17:15   ` Daniel Berlin
@ 2007-01-23 17:24   ` Richard Guenther
  2007-01-23 19:38     ` Mark Mitchell
  2007-01-24  0:53     ` Richard Kenner
  1 sibling, 2 replies; 49+ messages in thread
From: Richard Guenther @ 2007-01-23 17:24 UTC (permalink / raw)
  To: Mark Mitchell; +Cc: Eric Botcazou, gcc-patches

On 1/23/07, Mark Mitchell <mark@codesourcery.com> wrote:
> Eric Botcazou wrote:
>
> > 2007-01-06  Eric Botcazou  <ebotcazou@adacore.com>
> >
> >       * tree-sra.c (sra_walk_fns) <ldst>: Document new restriction.
> >       (sra_walk_modify_expr) <rhs_elt>: Treat the reference as a use
> >       if the lhs has side-effects.
> >       <lhs_elt>: Treat the reference as a use if the rhs has side-effects.
> >
> > :ADDPATCH Tree-SRA:
>
> I've read through this thread, and I think Eric's patch makes sense.  (I
> think Richard G. eventually came to the same conclusion, so I'm not
> claiming this is some brilliant insight on my part.)  Even in C, keeping
> volatile accesses "more atomic" seems like a good thing.  And,
> certainly, any possible pessimization by not doing individual accesses
> to move data out of volatile structures is going to be insignificant, to
> overall program runtime.

I agreed to fix the (possible) regression on the branches with Eric's patch.
I didn't agree to put the same fix on the mainline until we first had a
precise definition of what constraints the Ada frontend (seems to) put on
the middle-end.  And with the further conversation it looks to me what
the Ada frontend does/wants isn't what it communicates to the middle-end.

So I'm against applying the patch to the mainline without proper texi
documentation of what Ada does here (it should make little sense, too,
of course).

Richard.

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-23 17:24   ` Richard Guenther
@ 2007-01-23 19:38     ` Mark Mitchell
  2007-01-23 20:57       ` Richard Guenther
  2007-01-24  0:53     ` Richard Kenner
  1 sibling, 1 reply; 49+ messages in thread
From: Mark Mitchell @ 2007-01-23 19:38 UTC (permalink / raw)
  To: Richard Guenther; +Cc: Eric Botcazou, gcc-patches

Richard Guenther wrote:
> On 1/23/07, Mark Mitchell <mark@codesourcery.com> wrote:
>> Eric Botcazou wrote:
>>
>> > 2007-01-06  Eric Botcazou  <ebotcazou@adacore.com>
>> >
>> >       * tree-sra.c (sra_walk_fns) <ldst>: Document new restriction.
>> >       (sra_walk_modify_expr) <rhs_elt>: Treat the reference as a use
>> >       if the lhs has side-effects.
>> >       <lhs_elt>: Treat the reference as a use if the rhs has
>> side-effects.
>> >
>> > :ADDPATCH Tree-SRA:
>>
>> I've read through this thread, and I think Eric's patch makes sense.  (I
>> think Richard G. eventually came to the same conclusion, so I'm not
>> claiming this is some brilliant insight on my part.)  Even in C, keeping
>> volatile accesses "more atomic" seems like a good thing.  And,
>> certainly, any possible pessimization by not doing individual accesses
>> to move data out of volatile structures is going to be insignificant, to
>> overall program runtime.
> 
> I agreed to fix the (possible) regression on the branches with Eric's
> patch.
> I didn't agree to put the same fix on the mainline until we first had a
> precise definition of what constraints the Ada frontend (seems to) put on
> the middle-end.  And with the further conversation it looks to me what
> the Ada frontend does/wants isn't what it communicates to the middle-end.
> 
> So I'm against applying the patch to the mainline without proper texi
> documentation of what Ada does here (it should make little sense, too,
> of course).

If we don't want the patch on the mainline, then we may not want it on
the branches either.  It's a bad idea to fix a bug on a release branch
without fixing it on mainline too, as it's creating a future regression.

I understand Daniel's comments about wanting to make sure that we're
checking the right flags, but the basic issue seems entirely independent
of Ada.  If there's an assignment of a volatile object to a non-volatile
object, I think we should avoid splitting the copy, in order to reduce
the chances that we're accessing volatile words multiple times.

-- 
Mark Mitchell
CodeSourcery
mark@codesourcery.com
(650) 331-3385 x713

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-23 19:38     ` Mark Mitchell
@ 2007-01-23 20:57       ` Richard Guenther
  2007-01-23 22:07         ` Mark Mitchell
  2007-01-24  1:31         ` Richard Kenner
  0 siblings, 2 replies; 49+ messages in thread
From: Richard Guenther @ 2007-01-23 20:57 UTC (permalink / raw)
  To: Mark Mitchell; +Cc: Eric Botcazou, gcc-patches

On 1/23/07, Mark Mitchell <mark@codesourcery.com> wrote:
> Richard Guenther wrote:
> > On 1/23/07, Mark Mitchell <mark@codesourcery.com> wrote:
> >> Eric Botcazou wrote:
> >>
> >> > 2007-01-06  Eric Botcazou  <ebotcazou@adacore.com>
> >> >
> >> >       * tree-sra.c (sra_walk_fns) <ldst>: Document new restriction.
> >> >       (sra_walk_modify_expr) <rhs_elt>: Treat the reference as a use
> >> >       if the lhs has side-effects.
> >> >       <lhs_elt>: Treat the reference as a use if the rhs has
> >> side-effects.
> >> >
> >> > :ADDPATCH Tree-SRA:
> >>
> >> I've read through this thread, and I think Eric's patch makes sense.  (I
> >> think Richard G. eventually came to the same conclusion, so I'm not
> >> claiming this is some brilliant insight on my part.)  Even in C, keeping
> >> volatile accesses "more atomic" seems like a good thing.  And,
> >> certainly, any possible pessimization by not doing individual accesses
> >> to move data out of volatile structures is going to be insignificant, to
> >> overall program runtime.
> >
> > I agreed to fix the (possible) regression on the branches with Eric's
> > patch.
> > I didn't agree to put the same fix on the mainline until we first had a
> > precise definition of what constraints the Ada frontend (seems to) put on
> > the middle-end.  And with the further conversation it looks to me what
> > the Ada frontend does/wants isn't what it communicates to the middle-end.
> >
> > So I'm against applying the patch to the mainline without proper texi
> > documentation of what Ada does here (it should make little sense, too,
> > of course).
>
> If we don't want the patch on the mainline, then we may not want it on
> the branches either.  It's a bad idea to fix a bug on a release branch
> without fixing it on mainline too, as it's creating a future regression.
>
> I understand Daniel's comments about wanting to make sure that we're
> checking the right flags, but the basic issue seems entirely independent
> of Ada.  If there's an assignment of a volatile object to a non-volatile
> object, I think we should avoid splitting the copy, in order to reduce
> the chances that we're accessing volatile words multiple times.

We don't have defined what TYPE_VOLATILE on a structure type means.
And it certainly won't work the way the Ada frontend seems to thing for
random structures and/or targets.  I'm just requesting that we define what
it means and that the Ada frontend maybe re-considers the way it
want's to guarantee its 'atomicity' constraints on whole-aggregate access.

Of course fixing SRA with Erics patch is not wrong - but it will make us
not fix the bug or missing feature on the mainline, it's only papering over
at one specific place.  And without a set of sophisticated testcases we're
going to regress again without noticing.

Richard.

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-23 20:57       ` Richard Guenther
@ 2007-01-23 22:07         ` Mark Mitchell
  2007-01-24  1:39           ` Richard Kenner
  2007-01-24 13:33           ` Eric Botcazou
  2007-01-24  1:31         ` Richard Kenner
  1 sibling, 2 replies; 49+ messages in thread
From: Mark Mitchell @ 2007-01-23 22:07 UTC (permalink / raw)
  To: Richard Guenther; +Cc: Eric Botcazou, gcc-patches

Richard Guenther wrote:

>> I understand Daniel's comments about wanting to make sure that we're
>> checking the right flags, but the basic issue seems entirely independent
>> of Ada.  If there's an assignment of a volatile object to a non-volatile
>> object, I think we should avoid splitting the copy, in order to reduce
>> the chances that we're accessing volatile words multiple times.
> 
> We don't have defined what TYPE_VOLATILE on a structure type means.

In general, GCC's middle-end implements C, unless otherwise specified.
Now, it's true that ISO C also doesn't completely say what volatile
means on a struct, as discussed in the previous thread.  (For example,
if the struct is 8 bytes, and the machine only has 4 byte loads, it is
not guaranteed that the accesses are ordered in any particular way.)

But, I think your statement is overly broad.  We certainly know some
things about volatile.  For example, it's a bad thing to generate two
references to the same byte of a volatile object when only one appears
in the source.  So, we should prefer word accesses to byte accesses,
when we think we can.

I'm a big fan of specifications, but volatile is something where, in
practice, best-effort and semi-formal rules are probably the best we can
do, in the context of a portable compiler with a big legacy codebase.
There are enough defect reports about volatile in the C and C++
standards to make me think that there's no clear consensus about exactly
what ought to be done, but, clearly, volatile is a useful concept, and
we should try to do our best.

> Of course fixing SRA with Erics patch is not wrong - but it will make us
> not fix the bug or missing feature on the mainline, it's only papering over
> at one specific place.

I don't think it's papering over anything.  It just may not be the
complete solution.  The fact that it's not complete doesn't mean it's
not a good start.  Is there some more generic, over-arching mechanism
that you think will solve the whole problem?  That's a serious question,
but my expectation is that there's nothing for it but to handle this
case-by-case, in which case Eric's approach is a fine step.

> And without a set of sophisticated testcases we're
> going to regress again without noticing.

Yes, test cases are good.  If Eric's patch didn't have a test case, then
one should be added.

My concern is that we not block incremental progress just because we
don't have a 100% solution.

-- 
Mark Mitchell
CodeSourcery
mark@codesourcery.com
(650) 331-3385 x713

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-23 17:24   ` Richard Guenther
  2007-01-23 19:38     ` Mark Mitchell
@ 2007-01-24  0:53     ` Richard Kenner
  1 sibling, 0 replies; 49+ messages in thread
From: Richard Kenner @ 2007-01-24  0:53 UTC (permalink / raw)
  To: richard.guenther; +Cc: ebotcazou, gcc-patches, mark

> I agreed to fix the (possible) regression on the branches with Eric's patch.
> I didn't agree to put the same fix on the mainline until we first had a
> precise definition of what constraints the Ada frontend (seems to) put on
> the middle-end.  And with the further conversation it looks to me what
> the Ada frontend does/wants isn't what it communicates to the middle-end.

I'm missing something.  I don't see this as language-specific: if you say
"volatile", you mean "do as little mucking with these accesses as possible".
Sure, that isn't defined in a formal matter, but it really can't be and
we all know what it means in a pragmatic sense.

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-23 20:57       ` Richard Guenther
  2007-01-23 22:07         ` Mark Mitchell
@ 2007-01-24  1:31         ` Richard Kenner
  2007-01-24  9:27           ` Richard Guenther
  1 sibling, 1 reply; 49+ messages in thread
From: Richard Kenner @ 2007-01-24  1:31 UTC (permalink / raw)
  To: richard.guenther; +Cc: ebotcazou, gcc-patches, mark

> We don't have defined what TYPE_VOLATILE on a structure type means.

It means exactly the same as defined everywhere else: do as little as
possible (in a pragmatic sense) in changing the accesses to objects of
that type from the way they were originally written.

> And it certainly won't work the way the Ada frontend seems to thing for
> random structures and/or targets.  I'm just requesting that we define what
> it means and that the Ada frontend maybe re-considers the way it
> want's to guarantee its 'atomicity' constraints on whole-aggregate access.

If there's a problem here, please be specific as to what it is because
I'm having trouble understanding this point.

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-23 22:07         ` Mark Mitchell
@ 2007-01-24  1:39           ` Richard Kenner
  2007-01-24 13:33           ` Eric Botcazou
  1 sibling, 0 replies; 49+ messages in thread
From: Richard Kenner @ 2007-01-24  1:39 UTC (permalink / raw)
  To: mark; +Cc: ebotcazou, gcc-patches, richard.guenther

> I'm a big fan of specifications, but volatile is something where, in
> practice, best-effort and semi-formal rules are probably the best we can
> do, in the context of a portable compiler with a big legacy codebase.

I agree, but also think it's more than that.  Things at the level of
what machine code is generated is something that's very hard to fit
into a specification at a language level.  You usually can't do that
without resorting to a "you know what I mean" type of language.

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-24  1:31         ` Richard Kenner
@ 2007-01-24  9:27           ` Richard Guenther
  2007-01-24 13:02             ` Richard Kenner
  0 siblings, 1 reply; 49+ messages in thread
From: Richard Guenther @ 2007-01-24  9:27 UTC (permalink / raw)
  To: Richard Kenner; +Cc: ebotcazou, gcc-patches, mark

On 1/24/07, Richard Kenner <kenner@vlsi1.ultra.nyu.edu> wrote:
> > We don't have defined what TYPE_VOLATILE on a structure type means.
>
> It means exactly the same as defined everywhere else: do as little as
> possible (in a pragmatic sense) in changing the accesses to objects of
> that type from the way they were originally written.
>
> > And it certainly won't work the way the Ada frontend seems to thing for
> > random structures and/or targets.  I'm just requesting that we define what
> > it means and that the Ada frontend maybe re-considers the way it
> > want's to guarantee its 'atomicity' constraints on whole-aggregate access.
>
> If there's a problem here, please be specific as to what it is because
> I'm having trouble understanding this point.

I would suggest specifying TYPE_VOLATILE on a structure type as matching
behavior with TYPE_VOLATILE on a bitfield with the same machine mode.
So, if you can set TYPE_MODE of the structure to SImode (as in Erics original
example) volatile on the strucutre should behave as if it were an SImode
bitfield (or integer).  So if you can only use BLKmode on the
structure it's either
invalid or undefined what happens.

This would of course force more "whole aggregate" access than appearantly
required by the language, for example forcing read-modify-write cycles for
structure component access rather than just forcing whole aggregate access
for whole aggregate copy (which doesn't sound too useful on its own).

Richard.

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-24  9:27           ` Richard Guenther
@ 2007-01-24 13:02             ` Richard Kenner
  2007-01-24 13:33               ` Richard Guenther
  0 siblings, 1 reply; 49+ messages in thread
From: Richard Kenner @ 2007-01-24 13:02 UTC (permalink / raw)
  To: richard.guenther; +Cc: ebotcazou, gcc-patches, mark

> I would suggest specifying TYPE_VOLATILE on a structure type as matching
> behavior with TYPE_VOLATILE on a bitfield with the same machine mode.

I don't follow.  TYPE_VOLATILE on a bitfield seems to be the *least*
well-defined to me.  For a non-bitfield, we all understand what the "natural"
access to that field or variable should be, so volatile says "always do that
access".  But what's the "natural" access to a bitfield that we are to
preserve?

> So, if you can set TYPE_MODE of the structure to SImode (as in Erics original
> example) volatile on the strucutre should behave as if it were an SImode
> bitfield (or integer).  

Sure: that's the same as for a scalar.

> So if you can only use BLKmode on the structure it's either invalid
> or undefined what happens.

I see *all* of these semantics of volatile as undefined from a formal point
of view.  When you set it on BLKmode, obviously you're asking for less, but
the generic principle of doing the least changes still applies.  For example,
a copy of an entire structure should access each byte of memory exactly once
(which rules out field-by-field copies of structures that contain bitfields).

> This would of course force more "whole aggregate" access than appearantly
> required by the language, for example forcing read-modify-write cycles for
> structure component access rather than just forcing whole aggregate access
> for whole aggregate copy (which doesn't sound too useful on its own).

I don't follow.

I still don't understand why it's necessary or useful to try to define these
semantics more precisely.  Traditionally, "volatile" has simply meant to do
as little mucking around with accesses to volatile objects as possible.
That's also the easiest implementation.  I don't understand what problem you
are trying to solve.

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-24 13:02             ` Richard Kenner
@ 2007-01-24 13:33               ` Richard Guenther
  2007-01-24 13:57                 ` Richard Kenner
  2007-01-24 18:31                 ` Mark Mitchell
  0 siblings, 2 replies; 49+ messages in thread
From: Richard Guenther @ 2007-01-24 13:33 UTC (permalink / raw)
  To: Richard Kenner; +Cc: ebotcazou, gcc-patches, mark

On 1/24/07, Richard Kenner <kenner@vlsi1.ultra.nyu.edu> wrote:
> > I would suggest specifying TYPE_VOLATILE on a structure type as matching
> > behavior with TYPE_VOLATILE on a bitfield with the same machine mode.
>
> I don't follow.  TYPE_VOLATILE on a bitfield seems to be the *least*
> well-defined to me.  For a non-bitfield, we all understand what the "natural"
> access to that field or variable should be, so volatile says "always do that
> access".  But what's the "natural" access to a bitfield that we are to
> preserve?

The choice of wording using "bitfield" was wrong then.  Disregard it
and substitute
"integer of the specified mode".

> > So, if you can set TYPE_MODE of the structure to SImode (as in Erics original
> > example) volatile on the strucutre should behave as if it were an SImode
> > bitfield (or integer).
>
> Sure: that's the same as for a scalar.
>
> > So if you can only use BLKmode on the structure it's either invalid
> > or undefined what happens.
>
> I see *all* of these semantics of volatile as undefined from a formal point
> of view.

Sorry - but we lived in an undefined middle-end world for too long.  "Undefined"
doesn't work there.  It is what gives you what we have now - different behavior
than you expect(ed).

>  When you set it on BLKmode, obviously you're asking for less, but
> the generic principle of doing the least changes still applies.  For example,
> a copy of an entire structure should access each byte of memory exactly once
> (which rules out field-by-field copies of structures that contain bitfields).

I'm asking for a specification, for constraints -- if you want to guarantee what
you said you should specify it.  I'm for leaving it as undefined (which is what
we have now).  Like if you have a large "volatile" structure gcc might still
use memcpy to copy it?  The memcpy implementation might touch parts
of the structure twice to, for example, prime the cache.  So I say you cannot
expect anything for structures that don't fit in a machine word.  (Like, forget
TImode on x86 without proper alignment and forcing SSE register loads)

> > This would of course force more "whole aggregate" access than appearantly
> > required by the language, for example forcing read-modify-write cycles for
> > structure component access rather than just forcing whole aggregate access
> > for whole aggregate copy (which doesn't sound too useful on its own).
>
> I don't follow.
>
> I still don't understand why it's necessary or useful to try to define these
> semantics more precisely.  Traditionally, "volatile" has simply meant to do
> as little mucking around with accesses to volatile objects as possible.
> That's also the easiest implementation.  I don't understand what problem you
> are trying to solve.

Eh?  We are currently perfectly conforming to the undefinedness of volatile
on a structure -- yet you want to "fix" our implementation.  I'm
asking for a reason,
and you say you don't want to give me any other than "as little mucking around
with accesses as possible"?  _That_ sounds like a useful definition to adhere
to the Ada language standard requirements of 'atomicity'.

I'll stop arguing with you here - it doesn't make sense.

Richard.

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-23 22:07         ` Mark Mitchell
  2007-01-24  1:39           ` Richard Kenner
@ 2007-01-24 13:33           ` Eric Botcazou
  1 sibling, 0 replies; 49+ messages in thread
From: Eric Botcazou @ 2007-01-24 13:33 UTC (permalink / raw)
  To: Mark Mitchell; +Cc: Richard Guenther, gcc-patches

> I don't think it's papering over anything.  It just may not be the
> complete solution.  The fact that it's not complete doesn't mean it's
> not a good start.  Is there some more generic, over-arching mechanism
> that you think will solve the whole problem?  That's a serious question,
> but my expectation is that there's nothing for it but to handle this
> case-by-case, in which case Eric's approach is a fine step.

Thanks for stepping in, especially on the right side. ;-) ;-)

> Yes, test cases are good.  If Eric's patch didn't have a test case, then
> one should be added.

I posted an Ada testcase somewhere in the thread, I'll add it to gnat.dg if 
the patch is formally approved.

> My concern is that we not block incremental progress just because we
> don't have a 100% solution.

For the time being we haven't detected any other regressions pertaining to 
atomicity support in the 4.x series of Ada compilers.  Of course that may 
change in the future, but I don't expect problems as serious as this one.

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-24 13:33               ` Richard Guenther
@ 2007-01-24 13:57                 ` Richard Kenner
  2007-01-24 18:31                 ` Mark Mitchell
  1 sibling, 0 replies; 49+ messages in thread
From: Richard Kenner @ 2007-01-24 13:57 UTC (permalink / raw)
  To: richard.guenther; +Cc: ebotcazou, gcc-patches, mark

> Sorry - but we lived in an undefined middle-end world for too long.
> "Undefined" doesn't work there.  It is what gives you what we have
> now - different behavior than you expect(ed).

I see that different behavior as simply a BUG, not due to insufficient
specification.  It might even be possible to find a test case not involving
volatile (such as a call) that fails due to that bug.

SRA certainly respects volatile in every other place: it just forgot one.

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-24 13:33               ` Richard Guenther
  2007-01-24 13:57                 ` Richard Kenner
@ 2007-01-24 18:31                 ` Mark Mitchell
  2007-01-24 23:57                   ` Richard Kenner
  2007-01-25  9:38                   ` Richard Guenther
  1 sibling, 2 replies; 49+ messages in thread
From: Mark Mitchell @ 2007-01-24 18:31 UTC (permalink / raw)
  To: Richard Guenther; +Cc: Richard Kenner, ebotcazou, gcc-patches

Richard Guenther wrote:

> I'm asking for a specification, for constraints -- if you want to
> guarantee what
> you said you should specify it.  I'm for leaving it as undefined (which
> is what
> we have now).  Like if you have a large "volatile" structure gcc might
> still
> use memcpy to copy it?  The memcpy implementation might touch parts
> of the structure twice to, for example, prime the cache. 

Yes, I would consider it a bug to use memcpy to implement structure
assignment from/to a volatile structure, for exactly the reason you
state.  I think you can get that directly from the C standard, which
says that the number of accesses to volatile memory is exactly the
number that appear in the program.

One might also argue that it would be a bug for the compiler to generate
a byte-wise copy inline (rather than a word-wise) copy, since that, too,
will result in additional accesses on most hardware.  I'd be more
forgiving there, but it's a question of degree.

I think the issue that you're struggling with is that volatile is not
entirely well-defined.  It's a specification at the language level of
something that has machine-specific semantics.  There's a big huge
"implementation-defined" cloud hanging over it.  If we were building a
compiler designed only for us on (say) ARM chips, then maybe we could go
further in saying exactly what it means.  But, with GCC's full
generality, we can't say exactly.  That doesn't mean that volatile isn't
useful, or that we shouldn't make efforts to make it meaningful.

As a programmer, I would try hard to minimize my reliance on volatile
for exactly this reason.  On a 32-bit machine, I'd expect the compiler
to handle "volatile int" by using 32-bit word-aligned loads/stores.
But, I would try to avoid "struct S { volatile char a; volatile char b;
}", if reading/writing "a" and "b" had side-effects, because I wouldn't
know exactly what that might do.  I'd use inline assembly if I needed
that fine level of control.

As a compiler developer, I would try to make the compiler be "as atomic
as possible".  So, for example, when copying from a volatile structure,
I'd try to do a block copy (rather than call memcpy), and I would avoid
SRA-ing a volatile structure.

In other words, I would try to be defensive -- whether acting as
programmer or compiler developer -- and hope that then most programs
using volatile would behave as I expect -- because programmers don't
expect too much and because compilers do as well as possible.

-- 
Mark Mitchell
CodeSourcery
mark@codesourcery.com
(650) 331-3385 x713

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-24 18:31                 ` Mark Mitchell
@ 2007-01-24 23:57                   ` Richard Kenner
  2007-01-25  9:38                   ` Richard Guenther
  1 sibling, 0 replies; 49+ messages in thread
From: Richard Kenner @ 2007-01-24 23:57 UTC (permalink / raw)
  To: mark; +Cc: ebotcazou, gcc-patches, richard.guenther

> As a programmer, I would try hard to minimize my reliance on volatile
> for exactly this reason.  On a 32-bit machine, I'd expect the compiler
> to handle "volatile int" by using 32-bit word-aligned loads/stores.
> But, I would try to avoid "struct S { volatile char a; volatile char b;
> }", if reading/writing "a" and "b" had side-effects, because I wouldn't
> know exactly what that might do.  I'd use inline assembly if I needed
> that fine level of control.

I know you and most people contributing to this thread know this, but just
for others who might not be thinking about it, there's *another* purpose of
volatile that all of us agree on.  If there's an object X of struct S and you
have

	X.a = 5;
	if (X.a == 5) ...

That second comparison must *not* be optimized away.  That's the core of the
meaning of volatile: that two separate reads must always be done separately.
We can argue about what it means in a *single* read, but it always means
*something* to declare anything volatile, so we can never make it invalid.

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-24 18:31                 ` Mark Mitchell
  2007-01-24 23:57                   ` Richard Kenner
@ 2007-01-25  9:38                   ` Richard Guenther
  2007-01-25 11:38                     ` Richard Kenner
  1 sibling, 1 reply; 49+ messages in thread
From: Richard Guenther @ 2007-01-25  9:38 UTC (permalink / raw)
  To: Mark Mitchell; +Cc: Richard Kenner, ebotcazou, gcc-patches

On 1/24/07, Mark Mitchell <mark@codesourcery.com> wrote:
> Richard Guenther wrote:
>
> > I'm asking for a specification, for constraints -- if you want to
> > guarantee what
> > you said you should specify it.  I'm for leaving it as undefined (which
> > is what
> > we have now).  Like if you have a large "volatile" structure gcc might
> > still
> > use memcpy to copy it?  The memcpy implementation might touch parts
> > of the structure twice to, for example, prime the cache.
>
> Yes, I would consider it a bug to use memcpy to implement structure
> assignment from/to a volatile structure, for exactly the reason you
> state.  I think you can get that directly from the C standard, which
> says that the number of accesses to volatile memory is exactly the
> number that appear in the program.

I'm not talking about the C language here (after all, this thread resulted
from a Ada regression), but about the middle-end and what it means
to set TYPE_VOLATILE on a structure type.  _That_ we should define -
we obviously cannot "define" what volatile means in the context of the
C language at our will.

So, please, if a frontend assumes that setting TYPE_VOLATILE on
a structure type has defined semantics, then please let's _define_
those.  Or if it doesn't fit the "middle-end is like C" then _don't_ use
TYPE_VOLATILE but use whatever TYPE_FOOBAR flag we can
come up to mark the middle-end structure type as adhering to some
well defined constraints.

After all I thought we need a middle-end type-system for LTO - finally.

(I'm disappointed we're still actively refusing to define that).

Richard.

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-25  9:38                   ` Richard Guenther
@ 2007-01-25 11:38                     ` Richard Kenner
  2007-01-25 16:32                       ` Mark Mitchell
  2007-01-25 22:03                       ` Mike Stump
  0 siblings, 2 replies; 49+ messages in thread
From: Richard Kenner @ 2007-01-25 11:38 UTC (permalink / raw)
  To: richard.guenther; +Cc: ebotcazou, gcc-patches, mark

> So, please, if a frontend assumes that setting TYPE_VOLATILE on
> a structure type has defined semantics, then please let's _define_ those.

As both Mark and I have been trying to say, that's not possible.  The
middle-end defines things at roughly the level that a language
specification would and these sorts of behaviors of volatile affect
lower-level things, which cannot be expressed in such a manner.  The
best we can do is say "don't change things".  And indeed in tree parts
of the middle-end, that's a quite reasonable operational definition
since it means don't change the way the reference looks in the tree.
But we still can't try to express it in any semantically-meaningful way.

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-25 11:38                     ` Richard Kenner
@ 2007-01-25 16:32                       ` Mark Mitchell
  2007-01-25 16:41                         ` Richard Guenther
  2007-01-25 22:03                       ` Mike Stump
  1 sibling, 1 reply; 49+ messages in thread
From: Mark Mitchell @ 2007-01-25 16:32 UTC (permalink / raw)
  To: Richard Kenner; +Cc: richard.guenther, ebotcazou, gcc-patches

Richard Kenner wrote:
>> So, please, if a frontend assumes that setting TYPE_VOLATILE on
>> a structure type has defined semantics, then please let's _define_ those.
> 
> As both Mark and I have been trying to say, that's not possible.  The
> middle-end defines things at roughly the level that a language
> specification would and these sorts of behaviors of volatile affect
> lower-level things, which cannot be expressed in such a manner. 

I agree.  I think TYPE_VOLATILE is the right flag, and that any other
flag would be essentially equivalent.  Or, rather, we might be able to
specify some particular behavior with additional flags, like
TYPE_DO_NOT_SRA_ME, but even several such flags would not capture the
totality of the C "volatile" keyword, so we would still need TYPE_VOLATILE.

Of course, I'm all for specifying whatever we can and writing that down.
 You may recall that I gave Eric B. a lot of grief about documentation
for the last round of volatile changes, and there is now some
documentation about them.  (It's a little fuzzy, and I argued for less
fuzzineess than we adopted, but it's certainly much less fuzzy than what
was there before, and I consider it reasonable progress.)

-- 
Mark Mitchell
CodeSourcery
mark@codesourcery.com
(650) 331-3385 x713

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-25 16:32                       ` Mark Mitchell
@ 2007-01-25 16:41                         ` Richard Guenther
  2007-01-25 18:29                           ` Richard Kenner
  0 siblings, 1 reply; 49+ messages in thread
From: Richard Guenther @ 2007-01-25 16:41 UTC (permalink / raw)
  To: Mark Mitchell; +Cc: Richard Kenner, ebotcazou, gcc-patches

On 1/25/07, Mark Mitchell <mark@codesourcery.com> wrote:
> Richard Kenner wrote:
> >> So, please, if a frontend assumes that setting TYPE_VOLATILE on
> >> a structure type has defined semantics, then please let's _define_ those.
> >
> > As both Mark and I have been trying to say, that's not possible.  The
> > middle-end defines things at roughly the level that a language
> > specification would and these sorts of behaviors of volatile affect
> > lower-level things, which cannot be expressed in such a manner.
>
> I agree.  I think TYPE_VOLATILE is the right flag, and that any other
> flag would be essentially equivalent.  Or, rather, we might be able to
> specify some particular behavior with additional flags, like
> TYPE_DO_NOT_SRA_ME, but even several such flags would not capture the
> totality of the C "volatile" keyword, so we would still need TYPE_VOLATILE.
>
> Of course, I'm all for specifying whatever we can and writing that down.
>  You may recall that I gave Eric B. a lot of grief about documentation
> for the last round of volatile changes, and there is now some
> documentation about them.  (It's a little fuzzy, and I argued for less
> fuzzineess than we adopted, but it's certainly much less fuzzy than what
> was there before, and I consider it reasonable progress.)

Ok, so may be it then.  I guess the Ada specification for pragma Atomic
is equally vague and fuzzy and allows for an undefined implementation.

Richard.

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-25 16:41                         ` Richard Guenther
@ 2007-01-25 18:29                           ` Richard Kenner
  0 siblings, 0 replies; 49+ messages in thread
From: Richard Kenner @ 2007-01-25 18:29 UTC (permalink / raw)
  To: richard.guenther; +Cc: ebotcazou, gcc-patches, mark

> Ok, so may be it then.  I guess the Ada specification for pragma Atomic
> is equally vague and fuzzy and allows for an undefined implementation.

No, Atomic actually has a bit more of a defined implemenation because
of "C.6(10): it is illegal to apply ... an Atomic ... pragma to an
object or type if the implementation cannot support the indivisible
reads and updates required by the pragma".

Under "Dynamic Semantics", C.6(15) says "For an atomic object ... all reads
and updates of the object as a whole are indivisible".

And under "Implemention Requirements", C.6(20) says "The external effect of
a program is defined to include each read and update of a volatile or atomic
object.  The implementation shall not generate any memory reads or updates
of atomic and volatile objects other than those specified by the program".

Note that the Ada front end doesn't allow pragma Atomic for BLKmode
objects or types.

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-25 11:38                     ` Richard Kenner
  2007-01-25 16:32                       ` Mark Mitchell
@ 2007-01-25 22:03                       ` Mike Stump
  2007-01-26  2:37                         ` Mark Mitchell
  1 sibling, 1 reply; 49+ messages in thread
From: Mike Stump @ 2007-01-25 22:03 UTC (permalink / raw)
  To: Richard Kenner; +Cc: richard.guenther, ebotcazou, gcc-patches, mark

On Jan 25, 2007, at 3:44 AM, Richard Kenner wrote:
> But we still can't try to express it in any semantically-meaningful  
> way.

:-(  I disagree fundamentally with this notion.  gcc, being an  
implementation, is free to talk about opcodes produced or not on i686,  
if it wants.  It can talk about signal handlers, exceptions, delay  
slots...  It can talk about chipset bus interlocks on various  
processors.  I think many people probably posses enough knowledge of  
what volatile means to attempt a definition.  They might not get a few  
of the corner cases right, but, those can be hammered out over time.

Someone on the C++ committee wanted to do this and nail things down,  
but, they didn't have the background and the deep understanding  
required, so we shot it down.  Better to have less than a language  
spec that is just wrong.

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-25 22:03                       ` Mike Stump
@ 2007-01-26  2:37                         ` Mark Mitchell
  2007-01-26  2:44                           ` Mike Stump
  0 siblings, 1 reply; 49+ messages in thread
From: Mark Mitchell @ 2007-01-26  2:37 UTC (permalink / raw)
  To: Mike Stump; +Cc: Richard Kenner, richard.guenther, ebotcazou, gcc-patches

Mike Stump wrote:
> On Jan 25, 2007, at 3:44 AM, Richard Kenner wrote:
>> But we still can't try to express it in any semantically-meaningful way.
> 
> :-(  I disagree fundamentally with this notion.  gcc, being an
> implementation, is free to talk about opcodes produced or not on i686,
> if it wants.

Sure, that's doable in principle.

But, once you write it down, you get to figure out how to make sure that
the entire compiler honors it, including the machine-independent
TREE-SSA bits.  Since the sensible choices may be different for each
CPU, how to enforce the right constraints (either separately for each
CPU, or some superset of all of them) may not be easy.

-- 
Mark Mitchell
CodeSourcery
mark@codesourcery.com
(650) 331-3385 x713

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-26  2:37                         ` Mark Mitchell
@ 2007-01-26  2:44                           ` Mike Stump
  2007-01-26  2:54                             ` Mark Mitchell
  2007-01-26 13:13                             ` Richard Kenner
  0 siblings, 2 replies; 49+ messages in thread
From: Mike Stump @ 2007-01-26  2:44 UTC (permalink / raw)
  To: Mark Mitchell; +Cc: Richard Kenner, richard.guenther, ebotcazou, gcc-patches

On Jan 25, 2007, at 6:37 PM, Mark Mitchell wrote:
> But, once you write it down, you get to figure out how to make sure  
> that
> the entire compiler honors it, including the machine-independent
> TREE-SSA bits.  Since the sensible choices may be different for each
> CPU, how to enforce the right constraints (either separately for each
> CPU, or some superset of all of them) may not be easy.

Well, you know not writing it down doesn't absolve us from having to  
get it right anyway.  I'd further say that writing it does should not  
constrain us from fixing the wording if the wording is broken, but I  
agree, if we write it down, it would be better to have the right  
theoretic answer and leave unspecified those bits that should be.

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-26  2:44                           ` Mike Stump
@ 2007-01-26  2:54                             ` Mark Mitchell
  2007-01-26  9:17                               ` Richard Guenther
  2007-01-26 13:13                             ` Richard Kenner
  1 sibling, 1 reply; 49+ messages in thread
From: Mark Mitchell @ 2007-01-26  2:54 UTC (permalink / raw)
  To: Mike Stump; +Cc: Richard Kenner, richard.guenther, ebotcazou, gcc-patches

Mike Stump wrote:
> On Jan 25, 2007, at 6:37 PM, Mark Mitchell wrote:
>> But, once you write it down, you get to figure out how to make sure that
>> the entire compiler honors it, including the machine-independent
>> TREE-SSA bits.  Since the sensible choices may be different for each
>> CPU, how to enforce the right constraints (either separately for each
>> CPU, or some superset of all of them) may not be easy.
> 
> Well, you know not writing it down doesn't absolve us from having to get
> it right anyway.  I'd further say that writing it does should not
> constrain us from fixing the wording if the wording is broken, but I
> agree, if we write it down, it would be better to have the right
> theoretic answer and leave unspecified those bits that should be.

If we make promises and don't keep them, users might be angry.  If we
don't make promises, they may be disappointed by the fuzziness, but at
least we're not tricking them.

I'm in no way opposed to specifying exactly what "volatile" means on an
i686 CPU, but I think it's hard to specify and hard to implement.  Prove
me wrong, by all means!

I would hope that we could all agree that (a) specifying the behavior in
detail would be nice, but (b) absent a specification, fixing issues on a
case-by-case basis is reasonable.  It's a fallacy to argue that just
because I'm arguing for (b) that I'm opposed to a middle-end type
system, don't want to specify behavior.

I just want to make the compiler better, step by step, and the step
right in front of us is Eric's patch, which everyone agrees improves
things, modulo, perhaps, exactly how it's implement.  So, let's get the
patch in, and move on.  When someone wants to invest the effort in the
full specification, implementation, test cases, etc., we can do that,
but that shouldn't stop us fixing the current problem.

-- 
Mark Mitchell
CodeSourcery
mark@codesourcery.com
(650) 331-3385 x713

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-26  2:54                             ` Mark Mitchell
@ 2007-01-26  9:17                               ` Richard Guenther
  2007-01-26 10:12                                 ` Eric Botcazou
  2007-01-26 13:40                                 ` Richard Kenner
  0 siblings, 2 replies; 49+ messages in thread
From: Richard Guenther @ 2007-01-26  9:17 UTC (permalink / raw)
  To: Mark Mitchell; +Cc: Mike Stump, Richard Kenner, ebotcazou, gcc-patches

On 1/26/07, Mark Mitchell <mark@codesourcery.com> wrote:
> Mike Stump wrote:
> > On Jan 25, 2007, at 6:37 PM, Mark Mitchell wrote:
> >> But, once you write it down, you get to figure out how to make sure that
> >> the entire compiler honors it, including the machine-independent
> >> TREE-SSA bits.  Since the sensible choices may be different for each
> >> CPU, how to enforce the right constraints (either separately for each
> >> CPU, or some superset of all of them) may not be easy.
> >
> > Well, you know not writing it down doesn't absolve us from having to get
> > it right anyway.  I'd further say that writing it does should not
> > constrain us from fixing the wording if the wording is broken, but I
> > agree, if we write it down, it would be better to have the right
> > theoretic answer and leave unspecified those bits that should be.
>
> If we make promises and don't keep them, users might be angry.  If we
> don't make promises, they may be disappointed by the fuzziness, but at
> least we're not tricking them.

I see that Ada _does_ make promises to the user.  And I see the Ada
frontend tries to fulfill these promises by relying on the middle-end not
mucking up with TYPE_VOLATILE structs.  (And to my surprise it
somehow works to some degree)

I'd like to have those bits that the middle-end can reasonably guarantee
specified - this will be a stricter better situation than what we have now.
I _don't_ suggest to put this anywhere in user documentation to suggest
we're promising anything on a C volatile struct type.  But I want to nail
down as much as possible of the GCC frontend - middle-end interface.

As we happen to have (at least) one language that GCC supports that
has a somewhat strict and reasonable specification about a sort-of
volatileness we better make sure we can map this somehow to the
middle-end.

> I'm in no way opposed to specifying exactly what "volatile" means on an
> i686 CPU, but I think it's hard to specify and hard to implement.  Prove
> me wrong, by all means!
>
> I would hope that we could all agree that (a) specifying the behavior in
> detail would be nice, but (b) absent a specification, fixing issues on a
> case-by-case basis is reasonable.  It's a fallacy to argue that just
> because I'm arguing for (b) that I'm opposed to a middle-end type
> system, don't want to specify behavior.
>
> I just want to make the compiler better, step by step, and the step
> right in front of us is Eric's patch, which everyone agrees improves
> things, modulo, perhaps, exactly how it's implement.  So, let's get the
> patch in, and move on.  When someone wants to invest the effort in the
> full specification, implementation, test cases, etc., we can do that,
> but that shouldn't stop us fixing the current problem.

I disagree.  We have a defect for Ada and Ada happens to require certain
semantics on a for now unspecified middle-end construct.  It is perfectly
reasonable to require, that upon fixing this defect, the interface between
the frontend and middle-end is specified where it relates to that defect.

And I still request that the Ada folks come up with a sentence or two for
that.  It would have been all done in the time we're discussing here.

Richard.

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-26  9:17                               ` Richard Guenther
@ 2007-01-26 10:12                                 ` Eric Botcazou
  2007-01-26 13:40                                 ` Richard Kenner
  1 sibling, 0 replies; 49+ messages in thread
From: Eric Botcazou @ 2007-01-26 10:12 UTC (permalink / raw)
  To: Richard Guenther; +Cc: gcc-patches, Mark Mitchell, Mike Stump, Richard Kenner

> I disagree.  We have a defect for Ada and Ada happens to require certain
> semantics on a for now unspecified middle-end construct.  It is perfectly
> reasonable to require, that upon fixing this defect, the interface between
> the frontend and middle-end is specified where it relates to that defect.

I disagree with your disagreement. :-)  I took great care of not dragging the 
discussion towards the problem of the semantics of "volatile", my patch 
doesn't rely on it, the bug I pointed out in the Tree-SRA pass is not 
directly related to it, but rather to how TREE_SIDE_EFFECTS is honored.

To recap, we have:

/* Scalarize a LDST.  To recap, this is an assignment between one scalarizable
   reference ELT and one non-scalarizable reference OTHER.  IS_OUTPUT is true
   if ELT is on the left-hand side.  */

static void
scalarize_ldst (struct sra_elt *elt, tree other,
                block_stmt_iterator *bsi, bool is_output)
{
  /* Shouldn't have gotten called for a scalar.  */
  gcc_assert (!elt->replacement);

  if (elt->use_block_copy)
    {
      /* Since ELT is not fully instantiated, we have to leave the
         block copy in place.  Treat this as a USE.  */
      scalarize_use (elt, NULL, bsi, is_output, false);
    }
  else
    {
      /* The interesting case is when ELT is fully instantiated.  In this
         case we can have each element stored/loaded directly to/from the
         corresponding slot in OTHER.  This avoids a block copy.  */


I claim that the last comment (and the code below it) violates the perfectly 
well defined TREE_SIDE_EFFECTS flag of the middle-end if it happens to be set
on OTHER because it will generate multiple instances of OTHER out of 1.

That's all.  That the discussion has drifted towards the problem of the 
semantics of "volatile" is unfortunate and should not blur the issue.

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-26  2:44                           ` Mike Stump
  2007-01-26  2:54                             ` Mark Mitchell
@ 2007-01-26 13:13                             ` Richard Kenner
  2007-01-26 19:21                               ` Mike Stump
  1 sibling, 1 reply; 49+ messages in thread
From: Richard Kenner @ 2007-01-26 13:13 UTC (permalink / raw)
  To: mrs; +Cc: ebotcazou, gcc-patches, mark, richard.guenther

> On Jan 25, 2007, at 6:37 PM, Mark Mitchell wrote:
> > But, once you write it down, you get to figure out how to make sure  
> > that
> > the entire compiler honors it, including the machine-independent
> > TREE-SSA bits.  Since the sensible choices may be different for each
> > CPU, how to enforce the right constraints (either separately for each
> > CPU, or some superset of all of them) may not be easy.
> 
> Well, you know not writing it down doesn't absolve us from having to  
> get it right anyway.  I'd further say that writing it does should not  
> constrain us from fixing the wording if the wording is broken, but I  
> agree, if we write it down, it would be better to have the right  
> theoretic answer and leave unspecified those bits that should be.

I think the point here is that writing it down at the level you suggested
(opcode choices) *overspecifies* things and creates an unrealistic burden
on the implementation, especially because if you do it that way, you have
to do it for all targets.

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-26  9:17                               ` Richard Guenther
  2007-01-26 10:12                                 ` Eric Botcazou
@ 2007-01-26 13:40                                 ` Richard Kenner
  1 sibling, 0 replies; 49+ messages in thread
From: Richard Kenner @ 2007-01-26 13:40 UTC (permalink / raw)
  To: richard.guenther; +Cc: ebotcazou, gcc-patches, mark, mrs

> And I still request that the Ada folks come up with a sentence or two for
> that.  It would have been all done in the time we're discussing here.

But both "Ada folks" and non-"Ada folks" have done this many times.  The
requirement is to avoid transformations that the change nature of the access
to volatile objects.  Yes, that's an operational specification rather than a
formal specification, but that's the best we can do without greatly
overspecifying.

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-26 13:13                             ` Richard Kenner
@ 2007-01-26 19:21                               ` Mike Stump
  0 siblings, 0 replies; 49+ messages in thread
From: Mike Stump @ 2007-01-26 19:21 UTC (permalink / raw)
  To: Richard Kenner; +Cc: ebotcazou, gcc-patches, mark, richard.guenther

On Jan 26, 2007, at 5:19 AM, Richard Kenner wrote:
> I think the point here is that writing it down at the level you  
> suggested

You read way to much into it, if you look again, I didn't state that  
we should overspecify it, I said that we had the freedom to talk  
about chipset, if we waned to.  A language standard cannot.  A  
compiler implementation can, if it _wants_ to, if it _needs_ to.   
Again, let me repeat, I'm not arguing that it needs to.

Let me give you an example, let's say that we have two volatile  
objects and an operation to perform on them.  Let's say the machine  
we're generating code for doesn't have that operation, but that it  
can be decomposed into smaller operations.  We are free to document  
the backend must not decompose the operation and leave the volatile  
objects as operands.  Or, we are free to leave it unspecified.  Ada  
might even have a mandate here.  C might as well.  I don't think I've  
seen it argued here if decomposition is allowed or not.  Now, this  
might be so trivial to those in the know that can't believe anyone  
would get something so basic wrong, and yet, I can see an expander  
for an operation split things up, fail to check for volatile.  The  
documentation would be so that people that might not know the answer  
to read it once, and then have a firm, specific grasp on the subject  
matter so that to them it is then perfectly clear decomposition is  
wrong, obvious if you will.

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-01-06 13:19 [PATCH] Tree SRA and atomicity/volatility Eric Botcazou
  2007-01-06 13:31 ` Richard Guenther
  2007-01-23 16:58 ` Mark Mitchell
@ 2007-03-02 14:55 ` Eric Botcazou
  2007-03-02 15:21   ` Diego Novillo
  2 siblings, 1 reply; 49+ messages in thread
From: Eric Botcazou @ 2007-03-02 14:55 UTC (permalink / raw)
  To: gcc-patches

> This is a regression from the 3.x series for the Ada compiler so I'm
> requesting approval for mainline/4.2/4.1 branches.
>
>
> 2007-01-06  Eric Botcazou  <ebotcazou@adacore.com>
>
> 	* tree-sra.c (sra_walk_fns) <ldst>: Document new restriction.
> 	(sra_walk_modify_expr) <rhs_elt>: Treat the reference as a use
> 	if the lhs has side-effects.
> 	<lhs_elt>: Treat the reference as a use if the rhs has side-effects.

Could I have an nth opinion from a maintainer? :-)  The thread starts at
  http://gcc.gnu.org/ml/gcc-patches/2007-01/msg00457.html

It's a relatively minor feature of Ada so I'm only requesting approval for 
mainline now.  Thanks in advance.

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 49+ messages in thread

* Re: [PATCH] Tree SRA and atomicity/volatility
  2007-03-02 14:55 ` Eric Botcazou
@ 2007-03-02 15:21   ` Diego Novillo
  0 siblings, 0 replies; 49+ messages in thread
From: Diego Novillo @ 2007-03-02 15:21 UTC (permalink / raw)
  To: Eric Botcazou; +Cc: gcc-patches

Eric Botcazou wrote on 03/02/07 09:58:

>> 2007-01-06  Eric Botcazou  <ebotcazou@adacore.com>
>>
>> 	* tree-sra.c (sra_walk_fns) <ldst>: Document new restriction.
>> 	(sra_walk_modify_expr) <rhs_elt>: Treat the reference as a use
>> 	if the lhs has side-effects.
>> 	<lhs_elt>: Treat the reference as a use if the rhs has side-effects.

OK everywhere.

^ permalink raw reply	[flat|nested] 49+ messages in thread

end of thread, other threads:[~2007-03-02 15:21 UTC | newest]

Thread overview: 49+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-01-06 13:19 [PATCH] Tree SRA and atomicity/volatility Eric Botcazou
2007-01-06 13:31 ` Richard Guenther
2007-01-06 13:47   ` Richard Kenner
2007-01-06 13:49   ` Eric Botcazou
2007-01-07 11:23     ` Richard Guenther
2007-01-08 11:30       ` Eric Botcazou
2007-01-08 11:52         ` Richard Guenther
2007-01-08 12:43           ` Eric Botcazou
2007-01-08 13:12           ` Richard Kenner
2007-01-08 13:40             ` Richard Guenther
2007-01-08 14:55           ` Richard Guenther
2007-01-12 13:57             ` Eric Botcazou
2007-01-12 16:36               ` Richard Guenther
2007-01-12 17:03                 ` Richard Guenther
2007-01-14  7:47                 ` Eric Botcazou
2007-01-14 14:57                   ` Richard Guenther
2007-01-19 13:58                     ` Eric Botcazou
2007-01-23 16:58 ` Mark Mitchell
2007-01-23 17:15   ` Daniel Berlin
2007-01-23 17:24   ` Richard Guenther
2007-01-23 19:38     ` Mark Mitchell
2007-01-23 20:57       ` Richard Guenther
2007-01-23 22:07         ` Mark Mitchell
2007-01-24  1:39           ` Richard Kenner
2007-01-24 13:33           ` Eric Botcazou
2007-01-24  1:31         ` Richard Kenner
2007-01-24  9:27           ` Richard Guenther
2007-01-24 13:02             ` Richard Kenner
2007-01-24 13:33               ` Richard Guenther
2007-01-24 13:57                 ` Richard Kenner
2007-01-24 18:31                 ` Mark Mitchell
2007-01-24 23:57                   ` Richard Kenner
2007-01-25  9:38                   ` Richard Guenther
2007-01-25 11:38                     ` Richard Kenner
2007-01-25 16:32                       ` Mark Mitchell
2007-01-25 16:41                         ` Richard Guenther
2007-01-25 18:29                           ` Richard Kenner
2007-01-25 22:03                       ` Mike Stump
2007-01-26  2:37                         ` Mark Mitchell
2007-01-26  2:44                           ` Mike Stump
2007-01-26  2:54                             ` Mark Mitchell
2007-01-26  9:17                               ` Richard Guenther
2007-01-26 10:12                                 ` Eric Botcazou
2007-01-26 13:40                                 ` Richard Kenner
2007-01-26 13:13                             ` Richard Kenner
2007-01-26 19:21                               ` Mike Stump
2007-01-24  0:53     ` Richard Kenner
2007-03-02 14:55 ` Eric Botcazou
2007-03-02 15:21   ` Diego Novillo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).