public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* Host endian independence
@ 2019-08-24  4:14 Damien Zammit
  2019-08-27 15:54 ` Joseph Myers
  0 siblings, 1 reply; 10+ messages in thread
From: Damien Zammit @ 2019-08-24  4:14 UTC (permalink / raw)
  To: libc-alpha

[-- Attachment #1: Type: text/plain, Size: 913 bytes --]

Hi there,

Given that GCC is smart enough to optimize bitshifts of bytes to casts
followed by bswaps if necessary on machines that have such instructions,
I propose to remove most (if not all) dependence on BYTE_ORDER in glibc
so that the same code can be written for any host - this will fix some
edge cases for middle endian machines and make the code easier to read.

There does not seem to be any performance hit (at least on x86
achitectures) by making the code host endian independent by choosing a
particular endian for streams and interpreting all streams in the
correct endian using bitshifts when streams are 4-byte aligned.
(0x11 << 8) | 0x22 = 0x1122 on any host endian machine.

Please see attached patches for proof of concept, which remove
dependence on BYTE_ORDER for two subfolders of glibc.

If this is a desirable change, please let me know and I can continue
this work.

Thanks,
Damien Zammit

[-- Attachment #2: 0001-include-endian-helpers.h-Add-endian-specific-stream-.patch --]
[-- Type: text/x-patch, Size: 4975 bytes --]

From bad1f44dd866f0dbc18df00aac2e22e9e216455a Mon Sep 17 00:00:00 2001
From: Damien Zammit <damien@zamaudio.com>
Date: Thu, 22 Aug 2019 20:10:19 +1000
Subject: [PATCH 1/3] include/endian-helpers.h: Add endian specific stream
 handling helpers

---
 include/endian-helpers.h | 193 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 193 insertions(+)
 create mode 100644 include/endian-helpers.h

diff --git a/include/endian-helpers.h b/include/endian-helpers.h
new file mode 100644
index 00000000..c421cae7
--- /dev/null
+++ b/include/endian-helpers.h
@@ -0,0 +1,193 @@
+#ifndef ENDIAN_HELPERS_H_
+#define ENDIAN_HELPERS_H_
+#include <inttypes.h>
+#include <stddef.h>
+
+/* LE routines */
+
+static inline uint16_t
+__attribute ((always_inline))
+read_le16(void *src)
+{
+	uint8_t *buf = (uint8_t *)src;
+	return ((uint16_t)(buf[1]) << 8) | (uint16_t)(buf[0]);
+}
+
+static inline void
+__attribute ((always_inline))
+write_le16(void *dest, uint16_t val)
+{
+	uint8_t *buf = (uint8_t *)dest;
+	buf[0] = (uint8_t)val;
+	buf[1] = (uint8_t)(val >> 8);
+}
+
+static inline uint32_t
+__attribute ((always_inline))
+read_le32(void *src)
+{
+	uint8_t *buf = (uint8_t *)src;
+	return ((uint32_t)(buf[3]) << 24) |
+		((uint32_t)(buf[2]) << 16) |
+		((uint32_t)(buf[1]) << 8) |
+		(uint32_t)(buf[0]);
+}
+
+static inline void
+__attribute ((always_inline))
+write_le32(void *dest, uint32_t val)
+{
+	uint8_t *buf = (uint8_t *)dest;
+	buf[0] = (uint8_t)val;
+	buf[1] = (uint8_t)(val >> 8);
+	buf[2] = (uint8_t)(val >> 16);
+	buf[3] = (uint8_t)(val >> 24);
+}
+
+static inline uint64_t
+__attribute ((always_inline))
+read_le64(void *src)
+{
+	uint8_t *buf = (uint8_t *)src;
+	return ((uint64_t)(buf[7]) << 56) |
+		((uint64_t)(buf[6]) << 48) |
+		((uint64_t)(buf[5]) << 40) |
+		((uint64_t)(buf[4]) << 32) |
+		((uint64_t)(buf[3]) << 24) |
+		((uint64_t)(buf[2]) << 16) |
+		((uint64_t)(buf[1]) << 8) |
+		(uint64_t)(buf[0]);
+}
+
+static inline void
+__attribute ((always_inline))
+write_le64(void *dest, uint64_t val)
+{
+	uint8_t *buf = (uint8_t *)dest;
+	buf[0] = (uint8_t)val;
+	buf[1] = (uint8_t)(val >> 8);
+	buf[2] = (uint8_t)(val >> 16);
+	buf[3] = (uint8_t)(val >> 24);
+	buf[4] = (uint8_t)(val >> 32);
+	buf[5] = (uint8_t)(val >> 40);
+	buf[6] = (uint8_t)(val >> 48);
+	buf[7] = (uint8_t)(val >> 56);
+}
+
+static inline uintptr_t
+__attribute ((always_inline))
+read_leptr(void *src)
+{
+	if (sizeof (size_t) == 8) {
+		return (uintptr_t)read_le64 (src);
+	} else if (sizeof (size_t) == 4) {
+		return (uintptr_t)read_le32 (src);
+	}
+	return (uintptr_t)0;
+}
+
+static inline void
+__attribute ((always_inline))
+write_leptr(void *dest, uintptr_t ptr)
+{
+	if (sizeof (size_t) == 8) {
+		write_le64 (dest, (uint64_t)ptr);
+	} else if (sizeof (size_t) == 4) {
+		write_le32 (dest, (uint32_t)ptr);
+	}
+}
+
+/* BE routines */
+
+static inline uint16_t
+__attribute ((always_inline))
+read_be16(void *src)
+{
+	uint8_t *buf = (uint8_t *)src;
+	return ((uint16_t)(buf[0]) << 8) | (uint16_t)(buf[1]);
+}
+
+static inline void
+__attribute ((always_inline))
+write_be16(void *dest, uint16_t val)
+{
+	uint8_t *buf = (uint8_t *)dest;
+	buf[0] = (uint8_t)(val >> 8);
+	buf[1] = (uint8_t)val;
+}
+
+static inline uint32_t
+__attribute ((always_inline))
+read_be32(void *src)
+{
+	uint8_t *buf = (uint8_t *)src;
+	return ((uint32_t)(buf[0]) << 24) |
+		((uint32_t)(buf[1]) << 16) |
+		((uint32_t)(buf[2]) << 8) |
+		(uint32_t)(buf[3]);
+}
+
+static inline void
+__attribute ((always_inline))
+write_be32(void *dest, uint32_t val)
+{
+	uint8_t *buf = (uint8_t *)dest;
+	buf[0] = (uint8_t)(val >> 24);
+	buf[1] = (uint8_t)(val >> 16);
+	buf[2] = (uint8_t)(val >> 8);
+	buf[3] = (uint8_t)val;
+}
+
+static inline uint64_t
+__attribute ((always_inline))
+read_be64(void *src)
+{
+	uint8_t *buf = (uint8_t *)src;
+	return ((uint64_t)(buf[0]) << 56) |
+		((uint64_t)(buf[1]) << 48) |
+		((uint64_t)(buf[2]) << 40) |
+		((uint64_t)(buf[3]) << 32) |
+		((uint64_t)(buf[4]) << 24) |
+		((uint64_t)(buf[5]) << 16) |
+		((uint64_t)(buf[6]) << 8) |
+		(uint64_t)(buf[7]);
+}
+
+static inline void
+__attribute ((always_inline))
+write_be64(void *dest, uint64_t val)
+{
+	uint8_t *buf = (uint8_t *)dest;
+	buf[0] = (uint8_t)(val >> 56);
+	buf[1] = (uint8_t)(val >> 48);
+	buf[2] = (uint8_t)(val >> 40);
+	buf[3] = (uint8_t)(val >> 32);
+	buf[4] = (uint8_t)(val >> 24);
+	buf[5] = (uint8_t)(val >> 16);
+	buf[6] = (uint8_t)(val >> 8);
+	buf[7] = (uint8_t)val;
+}
+
+static inline uintptr_t
+__attribute ((always_inline))
+read_beptr(void *src)
+{
+	if (sizeof (size_t) == 8) {
+		return (uintptr_t)read_be64 (src);
+	} else if (sizeof (size_t) == 4) {
+		return (uintptr_t)read_be32 (src);
+	}
+	return (uintptr_t)0;
+}
+
+static inline void
+__attribute ((always_inline))
+write_beptr(void *dest, uintptr_t ptr)
+{
+	if (sizeof (size_t) == 8) {
+		write_be64 (dest, (uint64_t)ptr);
+	} else if (sizeof (size_t) == 4) {
+		write_be32 (dest, (uint32_t)ptr);
+	}
+}
+#endif
-- 
2.13.1


[-- Attachment #3: 0002-time-tzfile.c-Remove-dependence-on-BYTE_ORDER.patch --]
[-- Type: text/x-patch, Size: 4081 bytes --]

From 2ef3824d2561987f08db5f3ef9be8b1660a86cd1 Mon Sep 17 00:00:00 2001
From: Damien Zammit <damien@zamaudio.com>
Date: Thu, 22 Aug 2019 20:11:46 +1000
Subject: [PATCH 2/3] time/tzfile.c: Remove dependence on BYTE_ORDER

---
 time/tzfile.c | 63 +++++++++++++----------------------------------------------
 1 file changed, 14 insertions(+), 49 deletions(-)

diff --git a/time/tzfile.c b/time/tzfile.c
index a7d05e2d..f55bed98 100644
--- a/time/tzfile.c
+++ b/time/tzfile.c
@@ -26,6 +26,7 @@
 #include <sys/stat.h>
 #include <stdint.h>
 #include <alloc_buffer.h>
+#include <endian-helpers.h>
 
 #include <timezone/tzfile.h>
 
@@ -61,42 +62,6 @@ static size_t num_leaps;
 static struct leap *leaps;
 static char *tzspec;
 
-#include <endian.h>
-#include <byteswap.h>
-
-/* Decode the four bytes at PTR as a signed integer in network byte order.  */
-static inline int
-__attribute ((always_inline))
-decode (const void *ptr)
-{
-  if (BYTE_ORDER == BIG_ENDIAN && sizeof (int) == 4)
-    return *(const int *) ptr;
-  if (sizeof (int) == 4)
-    return bswap_32 (*(const int *) ptr);
-
-  const unsigned char *p = ptr;
-  int result = *p & (1 << (CHAR_BIT - 1)) ? ~0 : 0;
-
-  result = (result << 8) | *p++;
-  result = (result << 8) | *p++;
-  result = (result << 8) | *p++;
-  result = (result << 8) | *p++;
-
-  return result;
-}
-
-
-static inline int64_t
-__attribute ((always_inline))
-decode64 (const void *ptr)
-{
-  if ((BYTE_ORDER == BIG_ENDIAN))
-    return *(const int64_t *) ptr;
-
-  return bswap_64 (*(const int64_t *) ptr);
-}
-
-
 void
 __tzfile_read (const char *file, size_t extra, char **extrap)
 {
@@ -184,12 +149,12 @@ __tzfile_read (const char *file, size_t extra, char **extrap)
       || memcmp (tzhead.tzh_magic, TZ_MAGIC, sizeof (tzhead.tzh_magic)) != 0)
     goto lose;
 
-  num_transitions = (size_t) decode (tzhead.tzh_timecnt);
-  num_types = (size_t) decode (tzhead.tzh_typecnt);
-  chars = (size_t) decode (tzhead.tzh_charcnt);
-  num_leaps = (size_t) decode (tzhead.tzh_leapcnt);
-  num_isstd = (size_t) decode (tzhead.tzh_ttisstdcnt);
-  num_isgmt = (size_t) decode (tzhead.tzh_ttisgmtcnt);
+  num_transitions = (size_t) read_be32 (tzhead.tzh_timecnt);
+  num_types = (size_t) read_be32 (tzhead.tzh_typecnt);
+  chars = (size_t) read_be32 (tzhead.tzh_charcnt);
+  num_leaps = (size_t) read_be32 (tzhead.tzh_leapcnt);
+  num_isstd = (size_t) read_be32 (tzhead.tzh_ttisstdcnt);
+  num_isgmt = (size_t) read_be32 (tzhead.tzh_ttisgmtcnt);
 
   if (__glibc_unlikely (num_isstd > num_types || num_isgmt > num_types))
     goto lose;
@@ -315,14 +280,14 @@ __tzfile_read (const char *file, size_t extra, char **extrap)
 	 processed.  */
       i = num_transitions;
       while (i-- > 0)
-	transitions[i] = decode ((char *) transitions + i * 4);
+	transitions[i] = read_be32 ((char *) transitions + i * 4);
     }
-  else if (BYTE_ORDER != BIG_ENDIAN)
+  else
     {
       /* Decode the transition times, stored as 8-byte integers in
 	 network (big-endian) byte order.  */
       for (i = 0; i < num_transitions; ++i)
-	transitions[i] = decode64 ((char *) transitions + i * 8);
+	transitions[i] = read_be64 ((char *) transitions + i * 8);
     }
 
   for (i = 0; i < num_types; ++i)
@@ -342,7 +307,7 @@ __tzfile_read (const char *file, size_t extra, char **extrap)
 	/* Bogus index in data file.  */
 	goto lose;
       types[i].idx = c;
-      types[i].offset = decode (x);
+      types[i].offset = read_be32 (x);
     }
 
   if (__glibc_unlikely (__fread_unlocked (zone_names, 1, chars, f) != chars))
@@ -355,13 +320,13 @@ __tzfile_read (const char *file, size_t extra, char **extrap)
 			    != trans_width, 0))
 	goto lose;
       if (trans_width == 4)
-	leaps[i].transition = decode (x);
+	leaps[i].transition = read_be32 (x);
       else
-	leaps[i].transition = decode64 (x);
+	leaps[i].transition = read_be64 (x);
 
       if (__glibc_unlikely (__fread_unlocked (x, 1, 4, f) != 4))
 	goto lose;
-      leaps[i].change = (long int) decode (x);
+      leaps[i].change = (long int) read_be32 (x);
     }
 
   for (i = 0; i < num_isstd; ++i)
-- 
2.13.1


[-- Attachment #4: 0003-catgets-Remove-dependence-on-BYTE_ORDER.patch --]
[-- Type: text/x-patch, Size: 6006 bytes --]

From 32a3f800b3977bb7c0826e231c89c8a242cc5e69 Mon Sep 17 00:00:00 2001
From: Damien Zammit <damien@zamaudio.com>
Date: Thu, 22 Aug 2019 20:15:51 +1000
Subject: [PATCH 3/3] catgets: Remove dependence on BYTE_ORDER

---
 catgets/gencat.c       | 42 +++++++++++++++++++-----------------------
 catgets/open_catalog.c | 31 ++++++++++++-------------------
 2 files changed, 31 insertions(+), 42 deletions(-)

diff --git a/catgets/gencat.c b/catgets/gencat.c
index f0d47ae0..e4b73e5d 100644
--- a/catgets/gencat.c
+++ b/catgets/gencat.c
@@ -22,7 +22,7 @@
 #include <argp.h>
 #include <assert.h>
 #include <ctype.h>
-#include <endian.h>
+#include <endian-helpers.h>
 #include <errno.h>
 #include <error.h>
 #include <fcntl.h>
@@ -44,10 +44,6 @@
 
 #include "catgetsinfo.h"
 
-
-#define SWAPU32(w) \
-  (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24))
-
 struct message_list
 {
   int number;
@@ -852,7 +848,8 @@ write_out (struct catalog *catalog, const char *output_name,
   struct obstack string_pool;
   const char *strings;
   size_t strings_size;
-  uint32_t *array1, *array2;
+  uint32_t *array1;
+  uint8_t buf[8];
   size_t cnt;
   int fd;
 
@@ -932,8 +929,6 @@ write_out (struct catalog *catalog, const char *output_name,
   array1 =
     (uint32_t *) alloca (best_size * best_depth * sizeof (uint32_t) * 3);
   memset (array1, '\0', best_size * best_depth * sizeof (uint32_t) * 3);
-  array2
-    = (uint32_t *) alloca (best_size * best_depth * sizeof (uint32_t) * 3);
   obstack_init (&string_pool);
 
   set_run = catalog->all_sets;
@@ -969,10 +964,6 @@ write_out (struct catalog *catalog, const char *output_name,
   strings_size = obstack_object_size (&string_pool);
   strings = obstack_finish (&string_pool);
 
-  /* Compute ARRAY2 by changing the byte order.  */
-  for (cnt = 0; cnt < best_size * best_depth * 3; ++cnt)
-    array2[cnt] = SWAPU32 (array1[cnt]);
-
   /* Now we can write out the whole data.  */
   if (strcmp (output_name, "-") == 0
       || strcmp (output_name, "/dev/stdout") == 0)
@@ -986,19 +977,24 @@ write_out (struct catalog *catalog, const char *output_name,
     }
 
   /* Write out header.  */
-  write (fd, &obj, sizeof (obj));
+  write_le32 (buf, obj.magic);
+  write (fd, buf, sizeof (uint32_t));
+  write_le32 (buf, obj.plane_size);
+  write (fd, buf, sizeof (uint32_t));
+  write_le32 (buf, obj.plane_depth);
+  write (fd, buf, sizeof (uint32_t));
+  /* Nothing to write for obj.name_ptr - it is 0 bytes itself */
 
   /* We always write out the little endian version of the index
-     arrays.  */
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-  write (fd, array1, best_size * best_depth * sizeof (uint32_t) * 3);
-  write (fd, array2, best_size * best_depth * sizeof (uint32_t) * 3);
-#elif __BYTE_ORDER == __BIG_ENDIAN
-  write (fd, array2, best_size * best_depth * sizeof (uint32_t) * 3);
-  write (fd, array1, best_size * best_depth * sizeof (uint32_t) * 3);
-#else
-# error Cannot handle __BYTE_ORDER byte order
-#endif
+     array (LE then BE) */
+  for (cnt = 0; cnt < best_size * best_depth * 3; ++cnt) {
+    write_le32 (buf, array1[cnt]);
+    write (fd, buf, sizeof (uint32_t));
+  }
+  for (cnt = 0; cnt < best_size * best_depth * 3; ++cnt) {
+    write_be32 (buf, array1[cnt]);
+    write (fd, buf, sizeof (uint32_t));
+  }
 
   /* Finally write the strings.  */
   write (fd, strings, strings_size);
diff --git a/catgets/open_catalog.c b/catgets/open_catalog.c
index 9aa6a09f..5c124073 100644
--- a/catgets/open_catalog.c
+++ b/catgets/open_catalog.c
@@ -16,8 +16,7 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <byteswap.h>
-#include <endian.h>
+#include <endian-helpers.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <string.h>
@@ -32,9 +31,6 @@
 #include <not-cancel.h>
 
 
-#define SWAPU32(w) bswap_32 (w)
-
-
 int
 __open_catalog (const char *cat_name, const char *nlspath, const char *env_var,
 		__nl_catd catalog)
@@ -255,11 +251,11 @@ __open_catalog (const char *cat_name, const char *nlspath, const char *env_var,
     }
 
   /* Determine whether the file is a catalog file and if yes whether
-     it is written using the correct byte order.  Else we have to swap
-     the values.  */
-  if (__glibc_likely (catalog->file_ptr->magic == CATGETS_MAGIC))
+     it is written using the correct byte order.  Else we have to read
+     the values in opposite endian.  */
+  if (__glibc_likely (read_le32 (&catalog->file_ptr->magic) == CATGETS_MAGIC))
     swapping = 0;
-  else if (catalog->file_ptr->magic == SWAPU32 (CATGETS_MAGIC))
+  else if (read_be32 (&catalog->file_ptr->magic) == CATGETS_MAGIC)
     swapping = 1;
   else
     {
@@ -275,23 +271,20 @@ __open_catalog (const char *cat_name, const char *nlspath, const char *env_var,
       goto close_unlock_return;
     }
 
-#define SWAP(x) (swapping ? SWAPU32 (x) : (x))
+#define READ32(x) (swapping ? read_be32 (x) : read_le32 (x))
 
   /* Get dimensions of the used hashing table.  */
-  catalog->plane_size = SWAP (catalog->file_ptr->plane_size);
-  catalog->plane_depth = SWAP (catalog->file_ptr->plane_depth);
+  catalog->plane_size = READ32 (&catalog->file_ptr->plane_size);
+  catalog->plane_depth = READ32 (&catalog->file_ptr->plane_depth);
 
   /* The file contains two versions of the pointer tables.  Pick the
      right one for the local byte order.  */
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-  catalog->name_ptr = &catalog->file_ptr->name_ptr[0];
-#elif __BYTE_ORDER == __BIG_ENDIAN
-  catalog->name_ptr = &catalog->file_ptr->name_ptr[catalog->plane_size
+  if (__glibc_unlikely (swapping))
+    catalog->name_ptr = &catalog->file_ptr->name_ptr[catalog->plane_size
 						  * catalog->plane_depth
 						  * 3];
-#else
-# error Cannot handle __BYTE_ORDER byte order
-#endif
+  else
+    catalog->name_ptr = &catalog->file_ptr->name_ptr[0];
 
   /* The rest of the file contains all the strings.  They are
      addressed relative to the position of the first string.  */
-- 
2.13.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2019-09-02  7:14 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-24  4:14 Host endian independence Damien Zammit
2019-08-27 15:54 ` Joseph Myers
2019-08-29  4:45   ` Damien Zammit
2019-08-29 16:43     ` Joseph Myers
2019-08-30  9:16     ` Yann Droneaud
2019-08-30 10:19   ` Florian Weimer
2019-08-30 16:22     ` Joseph Myers
2019-09-01  9:26       ` Damien Zammit
2019-09-02  7:12         ` Florian Weimer
2019-09-02  7:14       ` Florian Weimer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).