From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1791) id 1E837385C413; Mon, 6 Sep 2021 18:45:08 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 1E837385C413 Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Adhemerval Zanella To: glibc-cvs@sourceware.org Subject: [glibc/azanella/qsort-fixes] stdlib: Implement introsort with qsort X-Act-Checkin: glibc X-Git-Author: Adhemerval Zanella X-Git-Refname: refs/heads/azanella/qsort-fixes X-Git-Oldrev: 44b3018f45aa08f8facf7728e2a275846c120f2c X-Git-Newrev: 57788bd80c352d972e678fdcc00e758f7b03ee92 Message-Id: <20210906184508.1E837385C413@sourceware.org> Date: Mon, 6 Sep 2021 18:45:08 +0000 (GMT) X-BeenThere: glibc-cvs@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Glibc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 06 Sep 2021 18:45:08 -0000 https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=57788bd80c352d972e678fdcc00e758f7b03ee92 commit 57788bd80c352d972e678fdcc00e758f7b03ee92 Author: Adhemerval Zanella Date: Thu Sep 2 16:25:23 2021 -0300 stdlib: Implement introsort with qsort This patch adds a introsort implementation on qsort to avoid worse-case performance of quicksort to O(nlog n). The heapsort fallback used is a heapsort based on Linux implementation (commit 22a241ccb2c19962a). As a side note the introsort implementation is similar the one used on libstdc++ for std::sort. Checked on x86_64-linux-gnu. Diff: --- stdlib/qsort.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 88 insertions(+), 7 deletions(-) diff --git a/stdlib/qsort.c b/stdlib/qsort.c index 60ca3b48b0..f00f6054dd 100644 --- a/stdlib/qsort.c +++ b/stdlib/qsort.c @@ -112,6 +112,7 @@ typedef struct { char *lo; char *hi; + size_t depth; } stack_node; /* The stack needs log (total_elements) entries (we could even subtract @@ -121,23 +122,92 @@ typedef struct enum { STACK_SIZE = CHAR_BIT * sizeof (size_t) }; static inline stack_node * -push (stack_node *top, char *lo, char *hi) +push (stack_node *top, char *lo, char *hi, size_t depth) { top->lo = lo; top->hi = hi; + top->depth = depth; return ++top; } static inline stack_node * -pop (stack_node *top, char **lo, char **hi) +pop (stack_node *top, char **lo, char **hi, size_t *depth) { --top; *lo = top->lo; *hi = top->hi; + *depth = top->depth; return top; } +/* A fast, small, non-recursive O(nlog n) heapsort, adapted from Linux + lib/sort.c. Used on introsort implementation as a fallback routine with + worst-case performance of O(nlog n) and worst-case space complexity of + O(1). */ + +static inline size_t +parent (size_t i, unsigned int lsbit, size_t size) +{ + i -= size; + i -= size & -(i & lsbit); + return i / 2; +} + +static void +heapsort_r (void *base, void *end, size_t size, swap_func_t swap_func, + __compar_d_fn_t cmp, void *arg) +{ + size_t num = ((uintptr_t) end - (uintptr_t) base) / size; + size_t n = num * size, a = (num/2) * size; + /* Used to find parent */ + const unsigned int lsbit = size & -size; + + /* num < 2 || size == 0. */ + if (a == 0) + return; + + for (;;) + { + size_t b, c, d; + + if (a != 0) + /* Building heap: sift down --a */ + a -= size; + else if (n -= size) + /* Sorting: Extract root to --n */ + do_swap (base, base + n, size, swap_func); + else + break; + + /* Sift element at "a" down into heap. This is the "bottom-up" variant, + which significantly reduces calls to cmp_func(): we find the sift-down + path all the way to the leaves (one compare per level), then backtrack + to find where to insert the target element. + + Because elements tend to sift down close to the leaves, this uses fewer + compares than doing two per level on the way down. (A bit more than + half as many on average, 3/4 worst-case.). */ + for (b = a; c = 2 * b + size, (d = c + size) < n;) + b = cmp (base + c, base + d, arg) >= 0 ? c : d; + if (d == n) + /* Special case last leaf with no sibling. */ + b = c; + + /* Now backtrack from "b" to the correct location for "a". */ + while (b != a && cmp (base + a, base + b, arg) >= 0) + b = parent (b, lsbit, size); + /* Where "a" belongs. */ + c = b; + while (b != a) + { + /* Shift it into place. */ + b = parent (b, lsbit, size); + do_swap (base + b, base + c, size, swap_func); + } + } +} + /* Order size using quicksort. This implementation incorporates four optimizations discussed in Sedgewick: @@ -222,7 +292,7 @@ _quicksort (void *const pbase, size_t total_elems, size_t size, const size_t max_thresh = MAX_THRESH * size; - if (total_elems == 0) + if (total_elems <= 1) /* Avoid lossage with unsigned arithmetic below. */ return; @@ -234,6 +304,10 @@ _quicksort (void *const pbase, size_t total_elems, size_t size, else swap_func = SWAP_BYTES; + /* Maximum depth before quicksort switches to heapsort. */ + size_t depth = 2 * (sizeof (size_t) * CHAR_BIT - 1 + - __builtin_clzl (total_elems)); + if (total_elems > MAX_THRESH) { char *lo = base_ptr; @@ -241,10 +315,17 @@ _quicksort (void *const pbase, size_t total_elems, size_t size, stack_node stack[STACK_SIZE]; stack_node *top = stack; - top = push (top, NULL, NULL); + top = push (top, NULL, NULL, depth); while (stack < top) { + if (depth == 0) + { + heapsort_r (lo, hi, size, swap_func, cmp, arg); + top = pop (top, &lo, &hi, &depth); + continue; + } + char *left_ptr; char *right_ptr; @@ -308,7 +389,7 @@ _quicksort (void *const pbase, size_t total_elems, size_t size, { if ((size_t) (hi - left_ptr) <= max_thresh) /* Ignore both small partitions. */ - top = pop (top, &lo, &hi); + top = pop (top, &lo, &hi, &depth); else /* Ignore small left partition. */ lo = left_ptr; @@ -319,13 +400,13 @@ _quicksort (void *const pbase, size_t total_elems, size_t size, else if ((right_ptr - lo) > (hi - left_ptr)) { /* Push larger left partition indices. */ - top = push (top, lo, right_ptr); + top = push (top, lo, right_ptr, depth - 1); lo = left_ptr; } else { /* Push larger right partition indices. */ - top = push (top, left_ptr, hi); + top = push (top, left_ptr, hi, depth - 1); hi = right_ptr; } }