public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [patch][RFC] 160-bits bitmap_element
@ 2012-08-17 11:47 Steven Bosscher
  2012-08-17 11:54 ` Richard Guenther
  0 siblings, 1 reply; 8+ messages in thread
From: Steven Bosscher @ 2012-08-17 11:47 UTC (permalink / raw)
  To: GCC Patches; +Cc: Richard Guenther, Jakub Jelinek

[-- Attachment #1: Type: text/plain, Size: 1088 bytes --]

Hello,

On a 64-bits host we leave 32 bits of each bitmap_element unused. And
actually, on a 32-bits host we do that too for GGC-allocated bitmaps
(due to alignment).

With this patch, we use those 32 bits by making BITMAP_WORD an
unsigned int (instead of unsigned long) and having 5 elts per
bitmap_element (instead of 2 or 4).

The effect is that computing the bit position gets more expensive.
Currently this only needs a div/mod by a power-of-two, but with this
patch it's a div/mod 5. But because most bitmaps are sparse in blocks,
bitmaps with 160 bits bitmap_elements have fewer searches. And I think
that in general the call overhead to the bitmap functions
(bitmap_bit_p/bitmap_set_bit are not inline functions, they're
relatively large) and the pointer chasing is more expensive than
requiring div/mod 5. Compile time for a set of cc1-i files doesn't
change, but compile time for the small test case of PR54146 goes down
by ~10%, and peak memory goes down a bit too (harder to measure so I
don't know exactly by how much).

Looking for your thoughts and comments,

Ciao!
Steven

[-- Attachment #2: bitmap_160.diff --]
[-- Type: application/octet-stream, Size: 7341 bytes --]

Index: bitmap.c
===================================================================
--- bitmap.c	(revision 190474)
+++ bitmap.c	(working copy)
@@ -400,9 +400,6 @@ bitmap_obstack_free (bitmap map)
 static inline int
 bitmap_element_zerop (const bitmap_element *element)
 {
-#if BITMAP_ELEMENT_WORDS == 2
-  return (element->bits[0] | element->bits[1]) == 0;
-#else
   unsigned i;
 
   for (i = 0; i < BITMAP_ELEMENT_WORDS; i++)
@@ -410,7 +407,6 @@ bitmap_element_zerop (const bitmap_eleme
       return 0;
 
   return 1;
-#endif
 }
 \f
 /* Link the bitmap element into the current bitmap linked list.  */
@@ -689,10 +685,10 @@ static const unsigned char popcount_tabl
     3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8,
 };
 
-static unsigned long
+static unsigned int
 bitmap_popcount (BITMAP_WORD a)
 {
-  unsigned long ret = 0;
+  unsigned ret = 0;
   unsigned i;
 
   /* Just do this the table way for now  */
@@ -703,10 +699,10 @@ bitmap_popcount (BITMAP_WORD a)
 #endif
 /* Count the number of bits set in the bitmap, and return it.  */
 
-unsigned long
+unsigned int
 bitmap_count_bits (const_bitmap a)
 {
-  unsigned long count = 0;
+  unsigned count = 0;
   const bitmap_element *elt;
   unsigned ix;
 
@@ -732,7 +728,7 @@ bitmap_count_bits (const_bitmap a)
 bool
 bitmap_single_bit_set_p (const_bitmap a)
 {
-  unsigned long count = 0;
+  unsigned int count = 0;
   const bitmap_element *elt;
   unsigned ix;
 
@@ -748,9 +744,9 @@ bitmap_single_bit_set_p (const_bitmap a)
   for (ix = 0; ix != BITMAP_ELEMENT_WORDS; ix++)
     {
 #if GCC_VERSION >= 3400
-      /* Note that popcountl matches BITMAP_WORD in type, so the actual size
+      /* Note that popcount matches BITMAP_WORD in type, so the actual size
 	 of BITMAP_WORD is not material.  */
-      count += __builtin_popcountl (elt->bits[ix]);
+      count += __builtin_popcount (elt->bits[ix]);
 #else
       count += bitmap_popcount (elt->bits[ix]);
 #endif
@@ -786,8 +782,8 @@ bitmap_first_set_bit (const_bitmap a)
   bit_no += ix * BITMAP_WORD_BITS;
 
 #if GCC_VERSION >= 3004
-  gcc_assert (sizeof(long) == sizeof (word));
-  bit_no += __builtin_ctzl (word);
+  gcc_assert (sizeof(unsigned int) == sizeof(BITMAP_WORD));
+  bit_no += __builtin_ctz (word);
 #else
   /* Binary search for the first set bit.  */
 #if BITMAP_WORD_BITS > 64
@@ -840,8 +836,8 @@ bitmap_last_set_bit (const_bitmap a)
 
   /* Binary search for the last set bit.  */
 #if GCC_VERSION >= 3004
-  gcc_assert (sizeof(long) == sizeof (word));
-  bit_no += sizeof (long) * 8 - __builtin_ctzl (word);
+  gcc_assert (sizeof(unsigned int) == sizeof(BITMAP_WORD));
+  bit_no += sizeof (int) * 8 - __builtin_ctz (word);
 #else
 #if BITMAP_WORD_BITS > 64
 #error "Fill out the table."
@@ -2069,14 +2065,14 @@ debug_bitmap_file (FILE *file, const_bit
 	for (j = 0; j < BITMAP_WORD_BITS; j++)
 	  if ((ptr->bits[i] >> j) & 1)
 	    {
+	      unsigned bit = ptr->indx * BITMAP_ELEMENT_ALL_BITS
+		+ i * BITMAP_WORD_BITS + j;
 	      if (col > 70)
 		{
 		  fprintf (file, "\n\t\t\t");
 		  col = 24;
 		}
-
-	      fprintf (file, " %u", (ptr->indx * BITMAP_ELEMENT_ALL_BITS
-				     + i * BITMAP_WORD_BITS + j));
+	      fprintf (file, " %u", bit);
 	      col += 4;
 	    }
 
Index: bitmap.h
===================================================================
--- bitmap.h	(revision 190474)
+++ bitmap.h	(working copy)
@@ -131,21 +131,22 @@ along with GCC; see the file COPYING3.  
 #include "statistics.h"
 #include "obstack.h"
 
-/* Fundamental storage type for bitmap.  */
+/* Fundamental storage type for bitmap.  This is 'int' instead of long
+   because the element index is also an int.  */
+typedef unsigned int BITMAP_WORD;
 
-typedef unsigned long BITMAP_WORD;
 /* BITMAP_WORD_BITS needs to be unsigned, but cannot contain casts as
    it is used in preprocessor directives -- hence the 1u.  */
-#define BITMAP_WORD_BITS (CHAR_BIT * SIZEOF_LONG * 1u)
+#define BITMAP_WORD_BITS (CHAR_BIT * sizeof(BITMAP_WORD) * 1u)
 
-/* Number of words to use for each element in the linked list.  */
-
-#ifndef BITMAP_ELEMENT_WORDS
-#define BITMAP_ELEMENT_WORDS ((128 + BITMAP_WORD_BITS - 1) / BITMAP_WORD_BITS)
-#endif
+/* Number of words to use for each element in the linked list.  The 160
+   bits results in the most space-efficient bitmap_element.  This used
+   to be 128, but we don't want to waste any bits in a bitmap_element.
+   On almost all modern machines, div/mod by 5 is cheaper than chasing
+   pointer chains.  */
+#define BITMAP_ELEMENT_WORDS ((160 + BITMAP_WORD_BITS - 1) / BITMAP_WORD_BITS)
 
 /* Number of bits in each actual element of a bitmap.  */
-
 #define BITMAP_ELEMENT_ALL_BITS (BITMAP_ELEMENT_WORDS * BITMAP_WORD_BITS)
 
 /* Obstack for allocating bitmaps and elements from.  */
@@ -215,7 +216,7 @@ extern bool bitmap_intersect_compl_p (co
 extern bool bitmap_single_bit_set_p (const_bitmap);
 
 /* Count the number of bits set in the bitmap.  */
-extern unsigned long bitmap_count_bits (const_bitmap);
+extern unsigned int bitmap_count_bits (const_bitmap);
 
 /* Boolean operations on bitmaps.  The _into variants are two operand
    versions that modify the first source operand.  The other variants
@@ -499,8 +500,8 @@ bmp_iter_next_bit (bitmap_iterator * bi,
 {
 #if (GCC_VERSION >= 3004)
   {
-    unsigned int n = __builtin_ctzl (bi->bits);
-    gcc_assert (sizeof (unsigned long) == sizeof (BITMAP_WORD));
+    gcc_assert (sizeof (unsigned int) == sizeof (BITMAP_WORD));
+    unsigned int n = __builtin_ctz (bi->bits);
     bi->bits >>= n;
     *bit_no += n;
   }
Index: dse.c
===================================================================
--- dse.c	(revision 190475)
+++ dse.c	(working copy)
@@ -2930,9 +2930,9 @@ dse_step2_init (void)
       group->process_globally = false;
       if (dump_file)
 	{
-	  fprintf (dump_file, "group %d(%d+%d): ", i,
-		   (int)bitmap_count_bits (group->store2_n),
-		   (int)bitmap_count_bits (group->store2_p));
+	  fprintf (dump_file, "group %d(%u+%u): ", i,
+		   bitmap_count_bits (group->store2_n),
+		   bitmap_count_bits (group->store2_p));
 	  bitmap_print (dump_file, group->store2_n, "n ", " ");
 	  bitmap_print (dump_file, group->store2_p, "p ", "\n");
 	}
Index: df-problems.c
===================================================================
--- df-problems.c	(revision 190475)
+++ df-problems.c	(working copy)
@@ -630,11 +630,11 @@ df_rd_top_dump (basic_block bb, FILE *fi
   if (!bb_info)
     return;
 
-  fprintf (file, ";; rd  in  \t(%d)\n", (int) bitmap_count_bits (&bb_info->in));
+  fprintf (file, ";; rd  in  \t(%u)\n", bitmap_count_bits (&bb_info->in));
   dump_bitmap (file, &bb_info->in);
-  fprintf (file, ";; rd  gen \t(%d)\n", (int) bitmap_count_bits (&bb_info->gen));
+  fprintf (file, ";; rd  gen \t(%u)\n", bitmap_count_bits (&bb_info->gen));
   dump_bitmap (file, &bb_info->gen);
-  fprintf (file, ";; rd  kill\t(%d)\n", (int) bitmap_count_bits (&bb_info->kill));
+  fprintf (file, ";; rd  kill\t(%u)\n", bitmap_count_bits (&bb_info->kill));
   dump_bitmap (file, &bb_info->kill);
 }
 
@@ -648,7 +648,7 @@ df_rd_bottom_dump (basic_block bb, FILE 
   if (!bb_info)
     return;
 
-  fprintf (file, ";; rd  out \t(%d)\n", (int) bitmap_count_bits (&bb_info->out));
+  fprintf (file, ";; rd  out \t(%u)\n", bitmap_count_bits (&bb_info->out));
   dump_bitmap (file, &bb_info->out);
 }
 

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2012-08-17 13:06 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-08-17 11:47 [patch][RFC] 160-bits bitmap_element Steven Bosscher
2012-08-17 11:54 ` Richard Guenther
2012-08-17 12:05   ` Steven Bosscher
2012-08-17 12:13     ` Richard Guenther
2012-08-17 12:16       ` Richard Guenther
2012-08-17 12:24         ` Jakub Jelinek
2012-08-17 13:06           ` Steven Bosscher
2012-08-17 12:14     ` Jakub Jelinek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).