From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
To: libc-alpha@sourceware.org
Subject: [PATCH v4 4/6] stdlib: Implement introsort with qsort
Date: Tue, 11 Jul 2023 16:07:20 -0300 [thread overview]
Message-ID: <20230711190722.4028821-5-adhemerval.zanella@linaro.org> (raw)
In-Reply-To: <20230711190722.4028821-1-adhemerval.zanella@linaro.org>
This patch adds a introsort implementation on qsort to avoid worse-case
performance of quicksort to O(nlog n). The heapsort fallback used is a
heapsort based on Linux implementation (commit 22a241ccb2c19962a). As a
side note the introsort implementation is similar the one used on
libstdc++ for std::sort.
Checked on x86_64-linux-gnu.
---
stdlib/qsort.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 86 insertions(+), 7 deletions(-)
diff --git a/stdlib/qsort.c b/stdlib/qsort.c
index 134e784bd1..f08640b5d9 100644
--- a/stdlib/qsort.c
+++ b/stdlib/qsort.c
@@ -116,6 +116,7 @@ typedef struct
{
char *lo;
char *hi;
+ size_t depth;
} stack_node;
/* The stack needs log (total_elements) entries (we could even subtract
@@ -125,22 +126,90 @@ typedef struct
enum { STACK_SIZE = CHAR_BIT * sizeof (size_t) };
static inline stack_node *
-push (stack_node *top, char *lo, char *hi)
+push (stack_node *top, char *lo, char *hi, size_t depth)
{
top->lo = lo;
top->hi = hi;
+ top->depth = depth;
return ++top;
}
static inline stack_node *
-pop (stack_node *top, char **lo, char **hi)
+pop (stack_node *top, char **lo, char **hi, size_t *depth)
{
--top;
*lo = top->lo;
*hi = top->hi;
+ *depth = top->depth;
return top;
}
+/* A fast, small, non-recursive O(nlog n) heapsort, adapted from Linux
+ lib/sort.c. Used on introsort implementation as a fallback routine with
+ worst-case performance of O(nlog n) and worst-case space complexity of
+ O(1). */
+
+static inline size_t
+parent (size_t i, unsigned int lsbit, size_t size)
+{
+ i -= size;
+ i -= size & -(i & lsbit);
+ return i / 2;
+}
+
+static void
+heapsort_r (void *base, void *end, size_t size, swap_func_t swap_func,
+ __compar_d_fn_t cmp, void *arg)
+{
+ size_t num = ((uintptr_t) end - (uintptr_t) base) / size;
+ size_t n = num * size, a = (num/2) * size;
+ /* Used to find parent */
+ const unsigned int lsbit = size & -size;
+
+ /* num < 2 || size == 0. */
+ if (a == 0)
+ return;
+
+ for (;;)
+ {
+ size_t b, c, d;
+
+ if (a != 0)
+ /* Building heap: sift down --a */
+ a -= size;
+ else if (n -= size)
+ /* Sorting: Extract root to --n */
+ do_swap (base, base + n, size, swap_func);
+ else
+ break;
+
+ /* Sift element at "a" down into heap. This is the "bottom-up" variant,
+ which significantly reduces calls to cmp_func(): we find the sift-down
+ path all the way to the leaves (one compare per level), then backtrack
+ to find where to insert the target element.
+
+ Because elements tend to sift down close to the leaves, this uses fewer
+ compares than doing two per level on the way down. (A bit more than
+ half as many on average, 3/4 worst-case.). */
+ for (b = a; c = 2 * b + size, (d = c + size) < n;)
+ b = cmp (base + c, base + d, arg) >= 0 ? c : d;
+ if (d == n)
+ /* Special case last leaf with no sibling. */
+ b = c;
+
+ /* Now backtrack from "b" to the correct location for "a". */
+ while (b != a && cmp (base + a, base + b, arg) >= 0)
+ b = parent (b, lsbit, size);
+ /* Where "a" belongs. */
+ c = b;
+ while (b != a)
+ {
+ /* Shift it into place. */
+ b = parent (b, lsbit, size);
+ do_swap (base + b, base + c, size, swap_func);
+ }
+ }
+}
/* Order size using quicksort. This implementation incorporates
four optimizations discussed in Sedgewick:
@@ -226,7 +295,7 @@ _quicksort (void *const pbase, size_t total_elems, size_t size,
const size_t max_thresh = MAX_THRESH * size;
- if (total_elems == 0)
+ if (total_elems <= 0)
/* Avoid lossage with unsigned arithmetic below. */
return;
@@ -238,6 +307,9 @@ _quicksort (void *const pbase, size_t total_elems, size_t size,
else
swap_func = SWAP_BYTES;
+ /* Maximum depth before quicksort switches to heapsort. */
+ size_t depth = 2 * (CHAR_BIT - 1 - __builtin_clzl (total_elems));
+
if (total_elems > MAX_THRESH)
{
char *lo = base_ptr;
@@ -245,10 +317,17 @@ _quicksort (void *const pbase, size_t total_elems, size_t size,
stack_node stack[STACK_SIZE];
stack_node *top = stack;
- top = push (top, NULL, NULL);
+ top = push (top, NULL, NULL, depth);
while (stack < top)
{
+ if (depth == 0)
+ {
+ heapsort_r (lo, hi, size, swap_func, cmp, arg);
+ top = pop (top, &lo, &hi, &depth);
+ continue;
+ }
+
char *left_ptr;
char *right_ptr;
@@ -312,7 +391,7 @@ _quicksort (void *const pbase, size_t total_elems, size_t size,
{
if ((size_t) (hi - left_ptr) <= max_thresh)
/* Ignore both small partitions. */
- top = pop (top, &lo, &hi);
+ top = pop (top, &lo, &hi, &depth);
else
/* Ignore small left partition. */
lo = left_ptr;
@@ -323,13 +402,13 @@ _quicksort (void *const pbase, size_t total_elems, size_t size,
else if ((right_ptr - lo) > (hi - left_ptr))
{
/* Push larger left partition indices. */
- top = push (top, lo, right_ptr);
+ top = push (top, lo, right_ptr, depth - 1);
lo = left_ptr;
}
else
{
/* Push larger right partition indices. */
- top = push (top, left_ptr, hi);
+ top = push (top, left_ptr, hi, depth - 1);
hi = right_ptr;
}
}
--
2.34.1
next prev parent reply other threads:[~2023-07-11 19:07 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-07-11 19:07 [PATCH v4 0/6] Use introsort for qsort Adhemerval Zanella
2023-07-11 19:07 ` [PATCH v4 1/6] stdlib: Optimization qsort{_r} swap implementation (BZ 19305) Adhemerval Zanella
2023-07-11 23:40 ` Noah Goldstein
2023-07-12 19:40 ` Adhemerval Zanella Netto
2023-07-12 21:04 ` Paul Eggert
2023-07-13 11:07 ` Adhemerval Zanella Netto
2023-07-13 13:13 ` Alexander Monakov
2023-07-13 13:29 ` Adhemerval Zanella Netto
2023-07-11 19:07 ` [PATCH v4 2/6] stdlib: Move insertion sort out qsort Adhemerval Zanella
2023-07-11 23:46 ` Noah Goldstein
2023-07-12 7:28 ` Andreas Schwab
2023-07-12 20:35 ` Adhemerval Zanella Netto
2023-07-11 19:07 ` [PATCH v4 3/6] stdlib: qsort: Move some macros to inline function Adhemerval Zanella
2023-07-11 23:44 ` Noah Goldstein
2023-07-12 20:45 ` Adhemerval Zanella Netto
2023-07-11 19:07 ` Adhemerval Zanella [this message]
2023-07-12 5:39 ` [PATCH v4 4/6] stdlib: Implement introsort with qsort Alexander Monakov
2023-07-12 20:55 ` Adhemerval Zanella Netto
2023-07-12 21:55 ` Paul Eggert
2023-07-13 11:53 ` Adhemerval Zanella Netto
2023-07-11 19:07 ` [PATCH v4 5/6] stdlib: Remove use of mergesort on qsort (BZ 21719) Adhemerval Zanella
2023-07-12 22:04 ` Paul Eggert
2023-07-13 11:55 ` Adhemerval Zanella Netto
2023-07-11 19:07 ` [PATCH v4 6/6] stdlib: Add more qsort{_r} coverage Adhemerval Zanella
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230711190722.4028821-5-adhemerval.zanella@linaro.org \
--to=adhemerval.zanella@linaro.org \
--cc=libc-alpha@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).