public inbox for glibc-cvs@sourceware.org help / color / mirror / Atom feed
From: Adhemerval Zanella <azanella@sourceware.org> To: glibc-cvs@sourceware.org Subject: [glibc/azanella/qsort-fixes] stdlib: Optimization qsort{_r} swap implementation (BZ #19305) Date: Mon, 6 Sep 2021 18:22:42 +0000 (GMT) [thread overview] Message-ID: <20210906182242.68390385AC31@sourceware.org> (raw) https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=77e51ac1cf12829a26c9475c2371f0bb2d48e89f commit 77e51ac1cf12829a26c9475c2371f0bb2d48e89f Author: Adhemerval Zanella <adhemerval.zanella@linaro.org> Date: Tue Jan 16 11:19:15 2018 -0200 stdlib: Optimization qsort{_r} swap implementation (BZ #19305) It optimizes take in consideration both the most common elements are either 32 or 64 bit in size [1] and inputs are aligned to the word boundary. This is similar to the optimization done on lib/sort.c from Linux. This patchs adds an optimized swap operation on qsort based in previous msort one. Instead of byte operation, three variants are provided: 1. Using uint32_t loads and stores. 2. Using uint64_t loads and stores. 3. Generic one with a temporary buffer and memcpy/mempcpy. The 1. and 2. options are selected only either if architecture defines _STRING_ARCH_unaligned or if base pointer is aligned to required type. It also fixes BZ#19305 by checking input size against number of elements 1 besides 0. Checked on x86_64-linux-gnu. [1] https://sourceware.org/pipermail/libc-alpha/2018-August/096984.html Diff: --- stdlib/qsort.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 91 insertions(+), 18 deletions(-) diff --git a/stdlib/qsort.c b/stdlib/qsort.c index 0f6d7ff787..d3e11ec3c1 100644 --- a/stdlib/qsort.c +++ b/stdlib/qsort.c @@ -23,20 +23,85 @@ #include <limits.h> #include <stdlib.h> #include <string.h> +#include <stdbool.h> -/* Byte-wise swap two items of size SIZE. */ -#define SWAP(a, b, size) \ - do \ - { \ - size_t __size = (size); \ - char *__a = (a), *__b = (b); \ - do \ - { \ - char __tmp = *__a; \ - *__a++ = *__b; \ - *__b++ = __tmp; \ - } while (--__size > 0); \ - } while (0) +/* Swap SIZE bytes between addresses A and B. These helpers are provided + along the generic one as an optimization. */ + +typedef void (*swap_func_t)(void * restrict, void * restrict, size_t); + +/* Return trues is elements can be copied used word load and sortes. + The size must be a multiple of the alignment, and the base address. */ +static inline bool +is_aligned_to_copy (const void *base, size_t size, size_t align) +{ + unsigned char lsbits = size; +#if !_STRING_ARCH_unaligned + lsbits |= (unsigned char)(uintptr_t) base; +#endif + return (lsbits & (align - 1)) == 0; +} + +#define SWAP_WORDS_64 (swap_func_t)0 +#define SWAP_WORDS_32 (swap_func_t)1 +#define SWAP_BYTES (swap_func_t)2 + +static void +swap_words_64 (void * restrict a, void * restrict b, size_t n) +{ + do + { + n -= 8; + uint64_t t = *(uint64_t *)(a + n); + *(uint64_t *)(a + n) = *(uint64_t *)(b + n); + *(uint64_t *)(b + n) = t; + } while (n); +} + +static void +swap_words_32 (void * restrict a, void * restrict b, size_t n) +{ + do + { + n -= 4; + uint32_t t = *(uint32_t *)(a + n); + *(uint32_t *)(a + n) = *(uint32_t *)(b + n); + *(uint32_t *)(b + n) = t; + } while (n); +} + +static void +swap_bytes (void * restrict a, void * restrict b, size_t n) +{ + /* Use multiple small memcpys with constant size to enable inlining + on most targets. */ + enum { SWAP_GENERIC_SIZE = 32 }; + unsigned char tmp[SWAP_GENERIC_SIZE]; + while (n > SWAP_GENERIC_SIZE) + { + memcpy (tmp, a, SWAP_GENERIC_SIZE); + a = memcpy (a, b, SWAP_GENERIC_SIZE) + SWAP_GENERIC_SIZE; + b = memcpy (b, tmp, SWAP_GENERIC_SIZE) + SWAP_GENERIC_SIZE; + n -= SWAP_GENERIC_SIZE; + } + memcpy (tmp, a, n); + memcpy (a, b, n); + memcpy (b, tmp, n); +} + +/* Replace the indirect call with a serie of if statements. It should help + the branch predictor. */ +static void +do_swap (void * restrict a, void * restrict b, size_t size, + swap_func_t swap_func) +{ + if (swap_func == SWAP_WORDS_64) + swap_words_64 (a, b, size); + else if (swap_func == SWAP_WORDS_32) + swap_words_32 (a, b, size); + else + swap_bytes (a, b, size); +} /* Discontinue quicksort algorithm when partition gets below this size. This particular magic number was chosen to work best on a Sun 4/260. */ @@ -96,6 +161,14 @@ _quicksort (void *const pbase, size_t total_elems, size_t size, /* Avoid lossage with unsigned arithmetic below. */ return; + swap_func_t swap_func; + if (is_aligned_to_copy (pbase, size, 8)) + swap_func = SWAP_WORDS_64; + else if (is_aligned_to_copy (pbase, size, 4)) + swap_func = SWAP_WORDS_32; + else + swap_func = SWAP_BYTES; + if (total_elems > MAX_THRESH) { char *lo = base_ptr; @@ -119,13 +192,13 @@ _quicksort (void *const pbase, size_t total_elems, size_t size, char *mid = lo + size * ((hi - lo) / size >> 1); if ((*cmp) ((void *) mid, (void *) lo, arg) < 0) - SWAP (mid, lo, size); + do_swap (mid, lo, size, swap_func); if ((*cmp) ((void *) hi, (void *) mid, arg) < 0) - SWAP (mid, hi, size); + do_swap (mid, hi, size, swap_func); else goto jump_over; if ((*cmp) ((void *) mid, (void *) lo, arg) < 0) - SWAP (mid, lo, size); + do_swap (mid, lo, size, swap_func); jump_over:; left_ptr = lo + size; @@ -144,7 +217,7 @@ _quicksort (void *const pbase, size_t total_elems, size_t size, if (left_ptr < right_ptr) { - SWAP (left_ptr, right_ptr, size); + do_swap (left_ptr, right_ptr, size, swap_func); if (mid == left_ptr) mid = right_ptr; else if (mid == right_ptr) @@ -216,7 +289,7 @@ _quicksort (void *const pbase, size_t total_elems, size_t size, tmp_ptr = run_ptr; if (tmp_ptr != base_ptr) - SWAP (tmp_ptr, base_ptr, size); + do_swap (tmp_ptr, base_ptr, size, swap_func); /* Insertion sort, running from left-hand-side up to right-hand-side. */
next reply other threads:[~2021-09-06 18:22 UTC|newest] Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top 2021-09-06 18:22 Adhemerval Zanella [this message] 2021-09-06 18:44 Adhemerval Zanella
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20210906182242.68390385AC31@sourceware.org \ --to=azanella@sourceware.org \ --cc=glibc-cvs@sourceware.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).