public inbox for gcc@gcc.gnu.org
 help / color / mirror / Atom feed
* Endianess attribute
@ 2009-07-02  9:59 Paul Chavent
  2009-07-02 13:37 ` Ken Raeburn
  2009-07-02 20:44 ` Michael Meissner
  0 siblings, 2 replies; 5+ messages in thread
From: Paul Chavent @ 2009-07-02  9:59 UTC (permalink / raw)
  To: gcc

Hi.

I already have posted about the endianess attribute (http://gcc.gnu.org/ml/gcc/2008-11/threads.html#00146).

For some year, i really need this feature on c projects.

Today i would like to go inside the internals of gcc, and i would like to implement this feature as an exercise.

You already prevent me that it would be a hard task (aliasing, etc.), but i would like to begin with basic specs.


The spec could be :

- add an attribute (this description could change to be compatible with existing ones (diabdata for example))

   __attribute__ ((endian("big")))
   __attribute__ ((endian("lil")))

- this attribute only apply to ints

- this attribute only apply to variables declaration

- a pointer to this variable don't inherit the attribute (this behavior could change later, i don't know...)

- the test case is

   uint32_t x __attribute__ ((endian("big")));
   uint32_t * ptr_x = x;

   x = 0xDEADBEEF

   if(plf_is_little)
     {
       assert((*ptr_x == 0xEFBEADDE));
     }
   else if(plf_is_big)
     {
       assert((*ptr_x == 0xDEADBEEF));
     }




My first work is the patch below.

So my questions to the mailing list are :

- is it a good starting point ?

- how can i get the endianess of the target ?


Thank for your help and suggestion.


8<------------------------------------------------------------------------

diff -abBruN gcc-4.4.0.orig/gcc/c-common.c gcc-4.4.0.mod/gcc/c-common.c
--- gcc-4.4.0.orig/gcc/c-common.c	2009-03-30 19:42:27.000000000 +0200
+++ gcc-4.4.0.mod/gcc/c-common.c	2009-07-02 11:10:28.000000000 +0200
@@ -522,6 +522,7 @@
  static bool check_case_bounds (tree, tree, tree *, tree *);

  static tree handle_packed_attribute (tree *, tree, tree, int, bool *);
+static tree handle_endian_attribute (tree *, tree, tree, int, bool *);
  static tree handle_nocommon_attribute (tree *, tree, tree, int, bool *);
  static tree handle_common_attribute (tree *, tree, tree, int, bool *);
  static tree handle_noreturn_attribute (tree *, tree, tree, int, bool *);
@@ -761,6 +762,8 @@
    /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
    { "packed",                 0, 0, false, false, false,
  			      handle_packed_attribute },
+  { "endian",                 1, 1, false, false, false,
+			      handle_endian_attribute },
    { "nocommon",               0, 0, true,  false, false,
  			      handle_nocommon_attribute },
    { "common",                 0, 0, true,  false, false,
@@ -5155,6 +5158,58 @@
    return NULL_TREE;
  }

+/* Handle an "endian" attribute; arguments as in
+   struct attribute_spec.handler.
+   IDENTIFIER_POINTER (name) gives "endian"
+   TREE_CODE (arg) should be a STRING_CST
+*/
+
+static tree
+handle_endian_attribute (tree *node, tree name, tree args,
+			 int ARG_UNUSED (flags), bool *no_add_attrs)
+{
+  tree arg = TREE_VALUE (args);
+
+  if (TREE_CODE (arg) != STRING_CST)
+    {
+      error ("argument of %qE attribute should be a string\n", name);
+    }
+  else if (TREE_CODE (*node) != FIELD_DECL &&
+           TREE_CODE (*node) != VAR_DECL  &&
+           TREE_CODE (*node) != TYPE_DECL)
+    {
+      error ("%qE only support FIELD_DECL, VAR_DECL and TYPE_DECL\n", name);
+    }
+  else
+    {
+      if (!strcmp (TREE_STRING_POINTER (arg), "little"))
+        {
+          if(TARGET_BIG_ENDIAN)
+            {
+              DECL_SWAP(*node) = 1;
+              debug_tree(*node);
+            }
+        }
+      else if (!strcmp (TREE_STRING_POINTER (arg), "big"))
+        {
+          if(TARGET_LITTLE_ENDIAN)
+            {
+              DECL_SWAP(*node) = 1;
+              debug_tree(*node);
+            }
+        }
+      else
+        {
+          error ("argument of %qE attribute should be 'little' or 'big'\n", name);
+          *no_add_attrs = true;
+        }
+    }
+
+  *no_add_attrs = true;
+
+  return NULL_TREE;
+}
+
  /* Handle a "nocommon" attribute; arguments as in
     struct attribute_spec.handler.  */

diff -abBruN gcc-4.4.0.orig/gcc/tree.h gcc-4.4.0.mod/gcc/tree.h
--- gcc-4.4.0.orig/gcc/tree.h	2009-03-23 17:29:33.000000000 +0100
+++ gcc-4.4.0.mod/gcc/tree.h	2009-07-02 11:10:28.000000000 +0200
@@ -2721,13 +2721,15 @@
    /* In FIELD_DECL, this is DECL_NONADDRESSABLE_P
       In VAR_DECL and PARM_DECL, this is DECL_HAS_VALUE_EXPR.  */
    unsigned decl_flag_3 : 1;
+  /* In FIELD_DECL, VAR_DECL and TYPE_DECL this is DECL_SWAP.  */
+  unsigned decl_flag_4 : 1;
    /* Logically, these two would go in a theoretical base shared by var and
       parm decl. */
    unsigned gimple_reg_flag : 1;
    /* In a DECL with pointer type, set if no TBAA should be done.  */
    unsigned no_tbaa_flag : 1;
    /* Padding so that 'align' can be on a 32-bit boundary.  */
-  unsigned decl_common_unused : 2;
+  unsigned decl_common_unused : 1;

    unsigned int align : 24;
    /* DECL_OFFSET_ALIGN, used only for FIELD_DECLs.  */
@@ -2854,6 +2856,10 @@
  #define DECL_NONADDRESSABLE_P(NODE) \
    (FIELD_DECL_CHECK (NODE)->decl_common.decl_flag_3)

+/* In a FIELD_DECL, indicates this field should be swapped.  */
+#define DECL_SWAP(NODE) \
+  (TREE_CHECK3 (NODE, FIELD_DECL, VAR_DECL, TYPE_DECL)->decl_common.decl_flag_4)
+
  struct tree_field_decl GTY(())
  {
    struct tree_decl_common common;


8<------------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: Endianess attribute
  2009-07-02  9:59 Endianess attribute Paul Chavent
@ 2009-07-02 13:37 ` Ken Raeburn
  2009-07-02 20:44 ` Michael Meissner
  1 sibling, 0 replies; 5+ messages in thread
From: Ken Raeburn @ 2009-07-02 13:37 UTC (permalink / raw)
  To: Paul Chavent; +Cc: gcc

On Jul 2, 2009, at 06:02, Paul Chavent wrote:
> Hi.
>
> I already have posted about the endianess attribute (http://gcc.gnu.org/ml/gcc/2008-11/threads.html#00146 
> ).
>
> For some year, i really need this feature on c projects.
>
> Today i would like to go inside the internals of gcc, and i would  
> like to implement this feature as an exercise.
>
> You already prevent me that it would be a hard task (aliasing,  
> etc.), but i would like to begin with basic specs.

As another gcc user (and, once upon a time, developer) who's had to  
deal with occasional byte ordering issues (mainly in network  
protocols), I can imagine some uses for something like this.  But...

> The spec could be :
>
> - add an attribute (this description could change to be compatible  
> with existing ones (diabdata for example))
>
>  __attribute__ ((endian("big")))
>  __attribute__ ((endian("lil")))

I would use "little" spelled out, rather than trying to use some cute  
abbreviation.  Whether it should be a string vs a C token like little  
or __little__, I don't know, or particularly care.

> - this attribute only apply to ints

It should at least be any integral type -- short to long long or  
whatever TImode is.  (Technically maybe char/QImode could be allowed  
but it wouldn't have any effect on code generation.)  I wouldn't jump  
to the conclusion that it would be useless for pointers or floating  
point values, but I don't know what the use cases for those would be  
like.  However, I think that's a case where you could limit the  
implementation initially, then expand the support later if needed,  
unlike the pointer issue below.

> - this attribute only apply to variables declaration
>
> - a pointer to this variable don't inherit the attribute (this  
> behavior could change later, i don't know...)

This seems like a poor idea -- for one thing, my use cases would  
probably involve something like pointers to unaligned big-endian  
integers in allocated buffers, or maybe integer fields in packed  
structures, again via pointers.  (It looks like you may be trying to  
handle the latter but not the former in the code you've got so far.)   
For another, one operation that may be used in code refactoring  
involves taking a bunch of code accessing some variable x (and  
presumably similar blocks of code elsewhere that may use different  
variables), and pulling it out into a separate function that takes the  
address of the thing to be modified, passed in at the call sites to  
the new function; if direct access to x and access via &x behave  
differently under this attribute, suddenly this formerly reasonable  
transformation is unsafe -- and perhaps worst of all, the behavior  
change would be silent, since the compiler would have nothing to  
complain about.

Also, changing the behavior later means changing the interpretation of  
some code after deploying a compiler using one interpretation.   
Consider this on a 32-bit little-endian machine:

   unsigned int x __attribute__((endian("big"));
   *&x = 0x12345678;

In normal C code without this attribute, reading and writing "*&x" is  
the same as reading and writing x.  In your proposed version, "*&x"  
would use the little-endian interpretation, and "x" would use the big- 
endian interpretation, with nothing at the site of the executable code  
to indicate that the two should be different.  But an expression like  
this can come up naturally when dealing with macro expansions.  Or,  
someone using this attribute may write code depending on that  
different handling of "*&x" to deal with a selected byte order in some  
cases and native byte order in other cases.  Then if you update the  
compiler so that the attribute is passed along to the pointer type, in  
the next release, suddenly the two cases behave the same -- breaking  
the user's code when it worked under the previous compiler release.   
If you support taking the address of specified-endianness variables at  
all, you need to get the pointer handling right the first time around.

I would suggest that if you implement something like this, the  
attribute should be associated with the data type, not the variable  
decl; so in the declaration above, x wouldn't be treated specially,  
but its type would be "big-endian unsigned int", a distinct type from  
"int" (even on a big-endian machine, probably).

The one advantage I see to associating the attribute with the decl  
rather than the type is that I could write:

   uint32_t thing __attribute__((endian("big")));

rather than needing to figure out what uint32_t is in fundamental C  
types and create a new typedef incorporating the underlying type plus  
the attribute, kind of like how you can't write a declaration using  
"signed size_t".  But that's a long-standing issue in C, and I don't  
think making the language inconsistent so you can fix the problem in  
some cases but not others is a very good idea.

> - the test case is
>
>  uint32_t x __attribute__ ((endian("big")));
>  uint32_t * ptr_x = x;

Related to my suggestions above, I think this assignment should get a  
warning about incompatible pointer types.

Though, it brings up an interesting additional question -- should  
pointers to big-endian int and "normal" int be compatible on big- 
endian machines?  Under C, "char", "unsigned char" and "signed char"  
are three distinct types, even though "char" must functionally be the  
same as one of the others.  I'd suggest that probably the normal type  
should be incompatible with both of the explicit-endian types, to help  
make the code type-safe and not dependent on the target machine's byte  
order.

Ken

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: Endianess attribute
  2009-07-02  9:59 Endianess attribute Paul Chavent
  2009-07-02 13:37 ` Ken Raeburn
@ 2009-07-02 20:44 ` Michael Meissner
  2009-07-02 23:55   ` Ken Raeburn
  1 sibling, 1 reply; 5+ messages in thread
From: Michael Meissner @ 2009-07-02 20:44 UTC (permalink / raw)
  To: Paul Chavent; +Cc: gcc

On Thu, Jul 02, 2009 at 12:02:29PM +0200, Paul Chavent wrote:
> Hi.
> 
> I already have posted about the endianess attribute 
> (http://gcc.gnu.org/ml/gcc/2008-11/threads.html#00146).
> 
> For some year, i really need this feature on c projects.
> 
> Today i would like to go inside the internals of gcc, and i would like to 
> implement this feature as an exercise.
> 
> You already prevent me that it would be a hard task (aliasing, etc.), but i 
> would like to begin with basic specs.

Well actually, if we can ever get the named address space patches checked in,
it provides the framework for different address spaces, where pointers might be
different sizes or encodings from standard pointers.  Non-native endian would
be handled by a different named address space, and the compiler would not let
you convert different endian pointers.  I suspect there are still holes, but
those will only be fixed when it gets more mainstream testing and use.  Tree
level aliasing might be one such case.

During the recent GCC summit, I gave a talk about the named address space
support that I had worked on last year before being transfered to a different
group within IBM.  Unfortunately all of my focus is getting the powerpc changes
in the current release, and I no longer officially work on the named address
space stuff.

Anyway I had some time during the summit, and I decided to see how hard it
would be to add explicit big/little endian support to the powerpc port.  It
only took a few hours to add the support for __little and __big qualifier
keywords, and in fact more time to get the byte swap instructions nailed down
(bear in mind, since I've written a lot of the named address space stuff, I
knew exactly where to add stuff, so it might take somewhat longer for somebody
else to add the support).

So for example, with my patches:

	__little int foo;

would declare foo to be little endian (there are restrictions that named
address space variables can only be global/static or referenced through a
pointer).

Then you can declare:

	int *__little bar = &foo;

would declare bar to be a normal pointer, which points to a little endian
item.  The following would be illegal, since bletch and bar point to different
named address spaces, and the backend says you can't convert them.

	int *bletch = bar;

-- 
Michael Meissner, IBM
4 Technology Place Drive, MS 2203A, Westford, MA, 01886, USA
meissner@linux.vnet.ibm.com

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: Endianess attribute
  2009-07-02 20:44 ` Michael Meissner
@ 2009-07-02 23:55   ` Ken Raeburn
  2009-07-03 13:36     ` Michael Meissner
  0 siblings, 1 reply; 5+ messages in thread
From: Ken Raeburn @ 2009-07-02 23:55 UTC (permalink / raw)
  To: Michael Meissner; +Cc: Paul Chavent, gcc

On Jul 2, 2009, at 16:44, Michael Meissner wrote:
> Anyway I had some time during the summit, and I decided to see how  
> hard it
> would be to add explicit big/little endian support to the powerpc  
> port.  It
> only took a few hours to add the support for __little and __big  
> qualifier
> keywords, and in fact more time to get the byte swap instructions  
> nailed down

That sounds great!

>  (there are restrictions that named
> address space variables can only be global/static or referenced  
> through a
> pointer).

That sounds like a potential problem, depending on the use cases.  No  
structure field members with explicit byte order?  That could be  
annoying for dealing with network protocols or file formats with  
explicit byte ordering.

On the other hand, if we're talking about address spaces... I would  
guess you could apply it to a structure?  That would be good for  
memory-mapped devices accepting only one byte order that may not be  
that of the main CPU.  For that use case, it would be unfortunate to  
have to tag every integer field.

I don't think Paul indicated what his use case was...

Ken

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: Endianess attribute
  2009-07-02 23:55   ` Ken Raeburn
@ 2009-07-03 13:36     ` Michael Meissner
  0 siblings, 0 replies; 5+ messages in thread
From: Michael Meissner @ 2009-07-03 13:36 UTC (permalink / raw)
  To: Ken Raeburn; +Cc: Michael Meissner, Paul Chavent, gcc

On Thu, Jul 02, 2009 at 06:54:52PM -0400, Ken Raeburn wrote:
> On Jul 2, 2009, at 16:44, Michael Meissner wrote:
>> Anyway I had some time during the summit, and I decided to see how  
>> hard it
>> would be to add explicit big/little endian support to the powerpc  
>> port.  It
>> only took a few hours to add the support for __little and __big  
>> qualifier
>> keywords, and in fact more time to get the byte swap instructions  
>> nailed down
>
> That sounds great!
>
>>  (there are restrictions that named
>> address space variables can only be global/static or referenced  
>> through a
>> pointer).
>
> That sounds like a potential problem, depending on the use cases.  No  
> structure field members with explicit byte order?  That could be  
> annoying for dealing with network protocols or file formats with  
> explicit byte ordering.

The technical report that named address spaces is based on does not allow you
to mix address spaces within a structure (pointers to different address spaces
can be mixed, just not the data themselves).  For example, stack variables are
always in the generic address space.  I do tend to think it would be better to
have little/big be more offical than a target specific thing.  However, most of
the places you need to modify for named address spaces are the places you need
to modify for mixed endian.

> On the other hand, if we're talking about address spaces... I would  
> guess you could apply it to a structure?  That would be good for  
> memory-mapped devices accepting only one byte order that may not be that 
> of the main CPU.  For that use case, it would be unfortunate to have to 
> tag every integer field.
>
> I don't think Paul indicated what his use case was...

-- 
Michael Meissner, IBM
4 Technology Place Drive, MS 2203A, Westford, MA, 01886, USA
meissner@linux.vnet.ibm.com

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2009-07-03 13:36 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-07-02  9:59 Endianess attribute Paul Chavent
2009-07-02 13:37 ` Ken Raeburn
2009-07-02 20:44 ` Michael Meissner
2009-07-02 23:55   ` Ken Raeburn
2009-07-03 13:36     ` Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).