public inbox for glibc-cvs@sourceware.org
help / color / mirror / Atom feed
* [glibc/nsz/mtag] malloc: Use branches instead of mtag_granule_mask
@ 2021-03-04 16:25 Szabolcs Nagy
  0 siblings, 0 replies; 4+ messages in thread
From: Szabolcs Nagy @ 2021-03-04 16:25 UTC (permalink / raw)
  To: glibc-cvs

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=eb88a7df0ed7f4ff1708773f3131d0e086de488b

commit eb88a7df0ed7f4ff1708773f3131d0e086de488b
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date:   Mon Feb 8 12:39:01 2021 +0000

    malloc: Use branches instead of mtag_granule_mask
    
    The branches may be better optimized since mtag_enabled is widely used.
    
    Granule size larger than a chunk header is not supported since then we
    cannot have both the chunk header and user area granule aligned.  To
    fix that for targets with large granule, the chunk layout has to change.
    
    So code that attempted to handle the granule mask generally was changed.
    This simplified CHUNK_AVAILABLE_SIZE and the logic in malloc_usable_size.

Diff:
---
 malloc/arena.c  |  1 -
 malloc/malloc.c | 34 ++++++++++++++--------------------
 2 files changed, 14 insertions(+), 21 deletions(-)

diff --git a/malloc/arena.c b/malloc/arena.c
index 1e83bb66bd..9fbbb38a15 100644
--- a/malloc/arena.c
+++ b/malloc/arena.c
@@ -306,7 +306,6 @@ ptmalloc_init (void)
 
       mtag_enabled = true;
       mtag_mmap_flags = __MTAG_MMAP_FLAGS;
-      mtag_granule_mask = ~(size_t)(__MTAG_GRANULE_SIZE - 1);
     }
 #endif
 
diff --git a/malloc/malloc.c b/malloc/malloc.c
index b1ee0f450b..8854afec88 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -442,7 +442,6 @@ void *(*__morecore)(ptrdiff_t) = __default_morecore;
 #ifdef USE_MTAG
 static bool mtag_enabled = false;
 static int mtag_mmap_flags = 0;
-static size_t mtag_granule_mask = ~(size_t)0;
 #else
 # define mtag_enabled false
 # define mtag_mmap_flags 0
@@ -1333,15 +1332,16 @@ nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
 
 /* Available size of chunk.  This is the size of the real usable data
-   in the chunk, plus the chunk header.  */
-#ifdef USE_MTAG
-#define CHUNK_AVAILABLE_SIZE(p) \
-  ((chunksize (p) + (chunk_is_mmapped (p) ? 0 : SIZE_SZ))	\
-   & mtag_granule_mask)
-#else
-#define CHUNK_AVAILABLE_SIZE(p) \
-  (chunksize (p) + (chunk_is_mmapped (p) ? 0 : SIZE_SZ))
-#endif
+   in the chunk, plus the chunk header.  Note: If memory tagging is
+   enabled the layout changes to accomodate the granule size, this is
+   wasteful for small allocations so not done by default.  The logic
+   does not work if chunk headers are not granule aligned.  */
+_Static_assert (__MTAG_GRANULE_SIZE <= CHUNK_HDR_SZ,
+		"memory tagging is not supported with large granule.");
+#define CHUNK_AVAILABLE_SIZE(p)                                       \
+  (__MTAG_GRANULE_SIZE > SIZE_SZ && __glibc_unlikely (mtag_enabled) ? \
+    chunksize (p) :                                                   \
+    chunksize (p) + (chunk_is_mmapped (p) ? 0 : SIZE_SZ))
 
 /* Check if REQ overflows when padded and aligned and if the resulting value
    is less than PTRDIFF_T.  Returns TRUE and the requested size or MINSIZE in
@@ -1353,7 +1353,6 @@ checked_request2size (size_t req, size_t *sz) __nonnull (1)
   if (__glibc_unlikely (req > PTRDIFF_MAX))
     return false;
 
-#ifdef USE_MTAG
   /* When using tagged memory, we cannot share the end of the user
      block with the header for the next chunk, so ensure that we
      allocate blocks that are rounded up to the granule size.  Take
@@ -1361,8 +1360,9 @@ checked_request2size (size_t req, size_t *sz) __nonnull (1)
      number.  Ideally, this would be part of request2size(), but that
      must be a macro that produces a compile time constant if passed
      a constant literal.  */
-  req = (req + ~mtag_granule_mask) & mtag_granule_mask;
-#endif
+  if (__glibc_unlikely (mtag_enabled))
+    req = (req + (__MTAG_GRANULE_SIZE - 1)) &
+	  ~(size_t)(__MTAG_GRANULE_SIZE - 1);
 
   *sz = request2size (req);
   return true;
@@ -5112,14 +5112,8 @@ musable (void *mem)
 	    result = chunksize (p) - CHUNK_HDR_SZ;
 	}
       else if (inuse (p))
-	result = chunksize (p) - SIZE_SZ;
+	result = CHUNK_AVAILABLE_SIZE (p) - CHUNK_HDR_SZ;
 
-#ifdef USE_MTAG
-      /* The usable space may be reduced if memory tagging is needed,
-	 since we cannot share the user-space data with malloc's internal
-	 data structure.  */
-      result &= mtag_granule_mask;
-#endif
       return result;
     }
   return 0;


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [glibc/nsz/mtag] malloc: Use branches instead of mtag_granule_mask
@ 2021-03-19 11:57 Szabolcs Nagy
  0 siblings, 0 replies; 4+ messages in thread
From: Szabolcs Nagy @ 2021-03-19 11:57 UTC (permalink / raw)
  To: glibc-cvs

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=db0edd3cb8ba74b3b904122f643b4f5b021ee5a3

commit db0edd3cb8ba74b3b904122f643b4f5b021ee5a3
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date:   Mon Feb 8 12:39:01 2021 +0000

    malloc: Use branches instead of mtag_granule_mask
    
    The branches may be better optimized since mtag_enabled is widely used.
    
    Granule size larger than a chunk header is not supported since then we
    cannot have both the chunk header and user area granule aligned.  To
    fix that for targets with large granule, the chunk layout has to change.
    
    So code that attempted to handle the granule mask generally was changed.
    This simplified CHUNK_AVAILABLE_SIZE and the logic in malloc_usable_size.
    
    Reviewed-by: DJ Delorie <dj@redhat.com>

Diff:
---
 malloc/arena.c  |  1 -
 malloc/malloc.c | 34 ++++++++++++++--------------------
 2 files changed, 14 insertions(+), 21 deletions(-)

diff --git a/malloc/arena.c b/malloc/arena.c
index 1e83bb66bd..9fbbb38a15 100644
--- a/malloc/arena.c
+++ b/malloc/arena.c
@@ -306,7 +306,6 @@ ptmalloc_init (void)
 
       mtag_enabled = true;
       mtag_mmap_flags = __MTAG_MMAP_FLAGS;
-      mtag_granule_mask = ~(size_t)(__MTAG_GRANULE_SIZE - 1);
     }
 #endif
 
diff --git a/malloc/malloc.c b/malloc/malloc.c
index 0b2aff3768..849bd8e2c9 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -442,7 +442,6 @@ void *(*__morecore)(ptrdiff_t) = __default_morecore;
 #ifdef USE_MTAG
 static bool mtag_enabled = false;
 static int mtag_mmap_flags = 0;
-static size_t mtag_granule_mask = ~(size_t)0;
 #else
 # define mtag_enabled false
 # define mtag_mmap_flags 0
@@ -1333,15 +1332,16 @@ nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
 
 /* Available size of chunk.  This is the size of the real usable data
-   in the chunk, plus the chunk header.  */
-#ifdef USE_MTAG
-#define CHUNK_AVAILABLE_SIZE(p) \
-  ((chunksize (p) + (chunk_is_mmapped (p) ? 0 : SIZE_SZ))	\
-   & mtag_granule_mask)
-#else
-#define CHUNK_AVAILABLE_SIZE(p) \
-  (chunksize (p) + (chunk_is_mmapped (p) ? 0 : SIZE_SZ))
-#endif
+   in the chunk, plus the chunk header.  Note: If memory tagging is
+   enabled the layout changes to accomodate the granule size, this is
+   wasteful for small allocations so not done by default.  The logic
+   does not work if chunk headers are not granule aligned.  */
+_Static_assert (__MTAG_GRANULE_SIZE <= CHUNK_HDR_SZ,
+		"memory tagging is not supported with large granule.");
+#define CHUNK_AVAILABLE_SIZE(p)                                       \
+  (__MTAG_GRANULE_SIZE > SIZE_SZ && __glibc_unlikely (mtag_enabled) ? \
+    chunksize (p) :                                                   \
+    chunksize (p) + (chunk_is_mmapped (p) ? 0 : SIZE_SZ))
 
 /* Check if REQ overflows when padded and aligned and if the resulting value
    is less than PTRDIFF_T.  Returns TRUE and the requested size or MINSIZE in
@@ -1353,7 +1353,6 @@ checked_request2size (size_t req, size_t *sz) __nonnull (1)
   if (__glibc_unlikely (req > PTRDIFF_MAX))
     return false;
 
-#ifdef USE_MTAG
   /* When using tagged memory, we cannot share the end of the user
      block with the header for the next chunk, so ensure that we
      allocate blocks that are rounded up to the granule size.  Take
@@ -1361,8 +1360,9 @@ checked_request2size (size_t req, size_t *sz) __nonnull (1)
      number.  Ideally, this would be part of request2size(), but that
      must be a macro that produces a compile time constant if passed
      a constant literal.  */
-  req = (req + ~mtag_granule_mask) & mtag_granule_mask;
-#endif
+  if (__glibc_unlikely (mtag_enabled))
+    req = (req + (__MTAG_GRANULE_SIZE - 1)) &
+	  ~(size_t)(__MTAG_GRANULE_SIZE - 1);
 
   *sz = request2size (req);
   return true;
@@ -5112,14 +5112,8 @@ musable (void *mem)
 	    result = chunksize (p) - CHUNK_HDR_SZ;
 	}
       else if (inuse (p))
-	result = chunksize (p) - SIZE_SZ;
+	result = CHUNK_AVAILABLE_SIZE (p) - CHUNK_HDR_SZ;
 
-#ifdef USE_MTAG
-      /* The usable space may be reduced if memory tagging is needed,
-	 since we cannot share the user-space data with malloc's internal
-	 data structure.  */
-      result &= mtag_granule_mask;
-#endif
       return result;
     }
   return 0;


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [glibc/nsz/mtag] malloc: Use branches instead of mtag_granule_mask
@ 2021-03-11 17:40 Szabolcs Nagy
  0 siblings, 0 replies; 4+ messages in thread
From: Szabolcs Nagy @ 2021-03-11 17:40 UTC (permalink / raw)
  To: glibc-cvs

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=774c11a175cf4d41b7798b312e8419b3934c60ac

commit 774c11a175cf4d41b7798b312e8419b3934c60ac
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date:   Mon Feb 8 12:39:01 2021 +0000

    malloc: Use branches instead of mtag_granule_mask
    
    The branches may be better optimized since mtag_enabled is widely used.
    
    Granule size larger than a chunk header is not supported since then we
    cannot have both the chunk header and user area granule aligned.  To
    fix that for targets with large granule, the chunk layout has to change.
    
    So code that attempted to handle the granule mask generally was changed.
    This simplified CHUNK_AVAILABLE_SIZE and the logic in malloc_usable_size.
    
    Reviewed-by: DJ Delorie <dj@redhat.com>

Diff:
---
 malloc/arena.c  |  1 -
 malloc/malloc.c | 34 ++++++++++++++--------------------
 2 files changed, 14 insertions(+), 21 deletions(-)

diff --git a/malloc/arena.c b/malloc/arena.c
index 1e83bb66bd..9fbbb38a15 100644
--- a/malloc/arena.c
+++ b/malloc/arena.c
@@ -306,7 +306,6 @@ ptmalloc_init (void)
 
       mtag_enabled = true;
       mtag_mmap_flags = __MTAG_MMAP_FLAGS;
-      mtag_granule_mask = ~(size_t)(__MTAG_GRANULE_SIZE - 1);
     }
 #endif
 
diff --git a/malloc/malloc.c b/malloc/malloc.c
index 0b2aff3768..849bd8e2c9 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -442,7 +442,6 @@ void *(*__morecore)(ptrdiff_t) = __default_morecore;
 #ifdef USE_MTAG
 static bool mtag_enabled = false;
 static int mtag_mmap_flags = 0;
-static size_t mtag_granule_mask = ~(size_t)0;
 #else
 # define mtag_enabled false
 # define mtag_mmap_flags 0
@@ -1333,15 +1332,16 @@ nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
 
 /* Available size of chunk.  This is the size of the real usable data
-   in the chunk, plus the chunk header.  */
-#ifdef USE_MTAG
-#define CHUNK_AVAILABLE_SIZE(p) \
-  ((chunksize (p) + (chunk_is_mmapped (p) ? 0 : SIZE_SZ))	\
-   & mtag_granule_mask)
-#else
-#define CHUNK_AVAILABLE_SIZE(p) \
-  (chunksize (p) + (chunk_is_mmapped (p) ? 0 : SIZE_SZ))
-#endif
+   in the chunk, plus the chunk header.  Note: If memory tagging is
+   enabled the layout changes to accomodate the granule size, this is
+   wasteful for small allocations so not done by default.  The logic
+   does not work if chunk headers are not granule aligned.  */
+_Static_assert (__MTAG_GRANULE_SIZE <= CHUNK_HDR_SZ,
+		"memory tagging is not supported with large granule.");
+#define CHUNK_AVAILABLE_SIZE(p)                                       \
+  (__MTAG_GRANULE_SIZE > SIZE_SZ && __glibc_unlikely (mtag_enabled) ? \
+    chunksize (p) :                                                   \
+    chunksize (p) + (chunk_is_mmapped (p) ? 0 : SIZE_SZ))
 
 /* Check if REQ overflows when padded and aligned and if the resulting value
    is less than PTRDIFF_T.  Returns TRUE and the requested size or MINSIZE in
@@ -1353,7 +1353,6 @@ checked_request2size (size_t req, size_t *sz) __nonnull (1)
   if (__glibc_unlikely (req > PTRDIFF_MAX))
     return false;
 
-#ifdef USE_MTAG
   /* When using tagged memory, we cannot share the end of the user
      block with the header for the next chunk, so ensure that we
      allocate blocks that are rounded up to the granule size.  Take
@@ -1361,8 +1360,9 @@ checked_request2size (size_t req, size_t *sz) __nonnull (1)
      number.  Ideally, this would be part of request2size(), but that
      must be a macro that produces a compile time constant if passed
      a constant literal.  */
-  req = (req + ~mtag_granule_mask) & mtag_granule_mask;
-#endif
+  if (__glibc_unlikely (mtag_enabled))
+    req = (req + (__MTAG_GRANULE_SIZE - 1)) &
+	  ~(size_t)(__MTAG_GRANULE_SIZE - 1);
 
   *sz = request2size (req);
   return true;
@@ -5112,14 +5112,8 @@ musable (void *mem)
 	    result = chunksize (p) - CHUNK_HDR_SZ;
 	}
       else if (inuse (p))
-	result = chunksize (p) - SIZE_SZ;
+	result = CHUNK_AVAILABLE_SIZE (p) - CHUNK_HDR_SZ;
 
-#ifdef USE_MTAG
-      /* The usable space may be reduced if memory tagging is needed,
-	 since we cannot share the user-space data with malloc's internal
-	 data structure.  */
-      result &= mtag_granule_mask;
-#endif
       return result;
     }
   return 0;


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [glibc/nsz/mtag] malloc: Use branches instead of mtag_granule_mask
@ 2021-03-11 17:39 Szabolcs Nagy
  0 siblings, 0 replies; 4+ messages in thread
From: Szabolcs Nagy @ 2021-03-11 17:39 UTC (permalink / raw)
  To: glibc-cvs

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=774c11a175cf4d41b7798b312e8419b3934c60ac

commit 774c11a175cf4d41b7798b312e8419b3934c60ac
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date:   Mon Feb 8 12:39:01 2021 +0000

    malloc: Use branches instead of mtag_granule_mask
    
    The branches may be better optimized since mtag_enabled is widely used.
    
    Granule size larger than a chunk header is not supported since then we
    cannot have both the chunk header and user area granule aligned.  To
    fix that for targets with large granule, the chunk layout has to change.
    
    So code that attempted to handle the granule mask generally was changed.
    This simplified CHUNK_AVAILABLE_SIZE and the logic in malloc_usable_size.
    
    Reviewed-by: DJ Delorie <dj@redhat.com>

Diff:
---
 malloc/arena.c  |  1 -
 malloc/malloc.c | 34 ++++++++++++++--------------------
 2 files changed, 14 insertions(+), 21 deletions(-)

diff --git a/malloc/arena.c b/malloc/arena.c
index 1e83bb66bd..9fbbb38a15 100644
--- a/malloc/arena.c
+++ b/malloc/arena.c
@@ -306,7 +306,6 @@ ptmalloc_init (void)
 
       mtag_enabled = true;
       mtag_mmap_flags = __MTAG_MMAP_FLAGS;
-      mtag_granule_mask = ~(size_t)(__MTAG_GRANULE_SIZE - 1);
     }
 #endif
 
diff --git a/malloc/malloc.c b/malloc/malloc.c
index 0b2aff3768..849bd8e2c9 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -442,7 +442,6 @@ void *(*__morecore)(ptrdiff_t) = __default_morecore;
 #ifdef USE_MTAG
 static bool mtag_enabled = false;
 static int mtag_mmap_flags = 0;
-static size_t mtag_granule_mask = ~(size_t)0;
 #else
 # define mtag_enabled false
 # define mtag_mmap_flags 0
@@ -1333,15 +1332,16 @@ nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
 
 /* Available size of chunk.  This is the size of the real usable data
-   in the chunk, plus the chunk header.  */
-#ifdef USE_MTAG
-#define CHUNK_AVAILABLE_SIZE(p) \
-  ((chunksize (p) + (chunk_is_mmapped (p) ? 0 : SIZE_SZ))	\
-   & mtag_granule_mask)
-#else
-#define CHUNK_AVAILABLE_SIZE(p) \
-  (chunksize (p) + (chunk_is_mmapped (p) ? 0 : SIZE_SZ))
-#endif
+   in the chunk, plus the chunk header.  Note: If memory tagging is
+   enabled the layout changes to accomodate the granule size, this is
+   wasteful for small allocations so not done by default.  The logic
+   does not work if chunk headers are not granule aligned.  */
+_Static_assert (__MTAG_GRANULE_SIZE <= CHUNK_HDR_SZ,
+		"memory tagging is not supported with large granule.");
+#define CHUNK_AVAILABLE_SIZE(p)                                       \
+  (__MTAG_GRANULE_SIZE > SIZE_SZ && __glibc_unlikely (mtag_enabled) ? \
+    chunksize (p) :                                                   \
+    chunksize (p) + (chunk_is_mmapped (p) ? 0 : SIZE_SZ))
 
 /* Check if REQ overflows when padded and aligned and if the resulting value
    is less than PTRDIFF_T.  Returns TRUE and the requested size or MINSIZE in
@@ -1353,7 +1353,6 @@ checked_request2size (size_t req, size_t *sz) __nonnull (1)
   if (__glibc_unlikely (req > PTRDIFF_MAX))
     return false;
 
-#ifdef USE_MTAG
   /* When using tagged memory, we cannot share the end of the user
      block with the header for the next chunk, so ensure that we
      allocate blocks that are rounded up to the granule size.  Take
@@ -1361,8 +1360,9 @@ checked_request2size (size_t req, size_t *sz) __nonnull (1)
      number.  Ideally, this would be part of request2size(), but that
      must be a macro that produces a compile time constant if passed
      a constant literal.  */
-  req = (req + ~mtag_granule_mask) & mtag_granule_mask;
-#endif
+  if (__glibc_unlikely (mtag_enabled))
+    req = (req + (__MTAG_GRANULE_SIZE - 1)) &
+	  ~(size_t)(__MTAG_GRANULE_SIZE - 1);
 
   *sz = request2size (req);
   return true;
@@ -5112,14 +5112,8 @@ musable (void *mem)
 	    result = chunksize (p) - CHUNK_HDR_SZ;
 	}
       else if (inuse (p))
-	result = chunksize (p) - SIZE_SZ;
+	result = CHUNK_AVAILABLE_SIZE (p) - CHUNK_HDR_SZ;
 
-#ifdef USE_MTAG
-      /* The usable space may be reduced if memory tagging is needed,
-	 since we cannot share the user-space data with malloc's internal
-	 data structure.  */
-      result &= mtag_granule_mask;
-#endif
       return result;
     }
   return 0;


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-03-19 11:57 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-04 16:25 [glibc/nsz/mtag] malloc: Use branches instead of mtag_granule_mask Szabolcs Nagy
2021-03-11 17:39 Szabolcs Nagy
2021-03-11 17:40 Szabolcs Nagy
2021-03-19 11:57 Szabolcs Nagy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).