2007-10-04 Jakub Jelinek * stdlib/msort.c: Include stdint.h. (struct msort_param): New type. (msort_with_tmp): Use struct msort_param pointer for unchanging parameters. Add optimized handling for several common sizes and indirect sorting mode. (qsort): Adjust msort_with_tmp callers. For big S use indirect sorting. Suggested by Belazougui Djamel . * stdlib/Makefile (tests): Add tst-qsort2. * stdlib/tst-qsort2.c: New test. --- libc/stdlib/msort.c.jj 2004-02-07 16:57:34.000000000 +0100 +++ libc/stdlib/msort.c 2007-10-04 18:28:43.000000000 +0200 @@ -1,6 +1,6 @@ /* An alternative to qsort, with an identical interface. This file is part of the GNU C Library. - Copyright (C) 1992,95-97,99,2000,01,02,04 Free Software Foundation, Inc. + Copyright (C) 1992,95-97,99,2000,01,02,04,07 Free Software Foundation, Inc. Written by Mike Haertel, September 1988. The GNU C Library is free software; you can redistribute it and/or @@ -19,20 +19,25 @@ 02111-1307 USA. */ #include +#include #include #include #include #include #include -static void msort_with_tmp (void *b, size_t n, size_t s, - __compar_fn_t cmp, char *t); +struct msort_param +{ + size_t s; + size_t var; + __compar_fn_t cmp; + char *t; +}; +static void msort_with_tmp (const struct msort_param *p, void *b, size_t n); static void -msort_with_tmp (void *b, size_t n, size_t s, __compar_fn_t cmp, - char *t) +msort_with_tmp (const struct msort_param *p, void *b, size_t n) { - char *tmp; char *b1, *b2; size_t n1, n2; @@ -42,65 +47,131 @@ msort_with_tmp (void *b, size_t n, size_ n1 = n / 2; n2 = n - n1; b1 = b; - b2 = (char *) b + (n1 * s); + b2 = (char *) b + (n1 * p->s); - msort_with_tmp (b1, n1, s, cmp, t); - msort_with_tmp (b2, n2, s, cmp, t); + msort_with_tmp (p, b1, n1); + msort_with_tmp (p, b2, n2); - tmp = t; + char *tmp = p->t; + const size_t s = p->s; + __compar_fn_t cmp = p->cmp; + switch (p->var) + { + case 0: + while (n1 > 0 && n2 > 0) + { + if ((*cmp) (b1, b2) <= 0) + { + *(uint32_t *) tmp = *(uint32_t *) b1; + b1 += sizeof (uint32_t); + --n1; + } + else + { + *(uint32_t *) tmp = *(uint32_t *) b2; + b2 += sizeof (uint32_t); + --n2; + } + tmp += sizeof (uint32_t); + } + break; + case 1: + while (n1 > 0 && n2 > 0) + { + if ((*cmp) (b1, b2) <= 0) + { + *(uint64_t *) tmp = *(uint64_t *) b1; + b1 += sizeof (uint64_t); + --n1; + } + else + { + *(uint64_t *) tmp = *(uint64_t *) b2; + b2 += sizeof (uint64_t); + --n2; + } + tmp += sizeof (uint64_t); + } + break; + case 2: + while (n1 > 0 && n2 > 0) + { + unsigned long *tmpl = (unsigned long *) tmp; + unsigned long *bl; + + tmp += s; + if ((*cmp) (b1, b2) <= 0) + { + bl = (unsigned long *) b1; + b1 += s; + --n1; + } + else + { + bl = (unsigned long *) b2; + b2 += s; + --n2; + } + while (tmpl < (unsigned long *) tmp) + *tmpl++ = *bl++; + } + break; + case 3: + while (n1 > 0 && n2 > 0) + { + if ((*cmp) (*(const void **) b1, *(const void **) b2) <= 0) + { + *(void **) tmp = *(void **) b1; + b1 += sizeof (void *); + --n1; + } + else + { + *(void **) tmp = *(void **) b2; + b2 += sizeof (void *); + --n2; + } + tmp += sizeof (void *); + } + break; + default: + while (n1 > 0 && n2 > 0) + { + if ((*cmp) (b1, b2) <= 0) + { + tmp = (char *) __mempcpy (tmp, b1, s); + b1 += s; + --n1; + } + else + { + tmp = (char *) __mempcpy (tmp, b2, s); + b2 += s; + --n2; + } + } + break; + } - if (s == OPSIZ && (b1 - (char *) 0) % OPSIZ == 0) - /* We are operating on aligned words. Use direct word stores. */ - while (n1 > 0 && n2 > 0) - { - if ((*cmp) (b1, b2) <= 0) - { - --n1; - *((op_t *) tmp) = *((op_t *) b1); - tmp += sizeof (op_t); - b1 += sizeof (op_t); - } - else - { - --n2; - *((op_t *) tmp) = *((op_t *) b2); - tmp += sizeof (op_t); - b2 += sizeof (op_t); - } - } - else - while (n1 > 0 && n2 > 0) - { - if ((*cmp) (b1, b2) <= 0) - { - tmp = (char *) __mempcpy (tmp, b1, s); - b1 += s; - --n1; - } - else - { - tmp = (char *) __mempcpy (tmp, b2, s); - b2 += s; - --n2; - } - } if (n1 > 0) memcpy (tmp, b1, n1 * s); - memcpy (b, t, (n - n2) * s); + memcpy (b, p->t, (n - n2) * s); } void qsort (void *b, size_t n, size_t s, __compar_fn_t cmp) { - const size_t size = n * s; + size_t size = n * s; + char *tmp = NULL; + struct msort_param p; + + /* For large object sizes use indirect sorting. */ + if (s > 32) + size = 2 * n * sizeof (void *) + s; if (size < 1024) - { - void *buf = __alloca (size); - - /* The temporary array is small, so put it on the stack. */ - msort_with_tmp (b, n, s, cmp, buf); - } + /* The temporary array is small, so put it on the stack. */ + p.t = __alloca (size); else { /* We should avoid allocating too much memory since this might @@ -135,26 +206,89 @@ qsort (void *b, size_t n, size_t s, __co /* If the memory requirements are too high don't allocate memory. */ if (size / pagesize > (size_t) phys_pages) - _quicksort (b, n, s, cmp); - else { - /* It's somewhat large, so malloc it. */ - int save = errno; - char *tmp = malloc (size); - if (tmp == NULL) - { - /* Couldn't get space, so use the slower algorithm - that doesn't need a temporary array. */ - __set_errno (save); - _quicksort (b, n, s, cmp); - } - else - { - __set_errno (save); - msort_with_tmp (b, n, s, cmp, tmp); - free (tmp); - } + _quicksort (b, n, s, cmp); + return; + } + + /* It's somewhat large, so malloc it. */ + int save = errno; + tmp = malloc (size); + __set_errno (save); + if (tmp == NULL) + { + /* Couldn't get space, so use the slower algorithm + that doesn't need a temporary array. */ + _quicksort (b, n, s, cmp); + return; + } + p.t = tmp; + } + + p.s = s; + p.cmp = cmp; + p.var = 4; + + if (s > 32) + { + /* Indirect sorting. */ + char *ip = (char *) b; + void **tp = (void **) (p.t + n * sizeof (void *)); + void **t = tp; + void *tmp_storage = (void *) (tp + n); + + while ((void *) t < tmp_storage) + { + *t++ = ip; + ip += s; + } + p.s = sizeof (void *); + p.var = 3; + msort_with_tmp (&p, p.t + n * sizeof (void *), n); + + /* tp[0] .. tp[n - 1] is now sorted, copy around entries of + the original array. Knuth vol. 3 (2nd ed.) exercise 5.2-10. */ + char *kp; + size_t i; + for (i = 0, ip = (char *) b; i < n; i++, ip += s) + if ((kp = tp[i]) != ip) + { + size_t j = i; + char *jp = ip; + memcpy (tmp_storage, ip, s); + + do + { + size_t k = (kp - (char *) b) / s; + tp[j] = jp; + memcpy (jp, kp, s); + j = k; + jp = kp; + kp = tp[k]; + } + while (kp != ip); + + tp[j] = jp; + memcpy (jp, tmp_storage, s); + } + } + else + { + if ((s & (sizeof (uint32_t) - 1)) == 0 + && ((char *) b - (char *) 0) % __alignof__ (uint32_t) == 0) + { + if (s == sizeof (uint32_t)) + p.var = 0; + else if (s == sizeof (uint64_t) + && ((char *) b - (char *) 0) % __alignof__ (uint64_t) == 0) + p.var = 1; + else if ((s & (sizeof (unsigned long) - 1)) == 0 + && ((char *) b - (char *) 0) + % __alignof__ (unsigned long) == 0) + p.var = 2; } + msort_with_tmp (&p, b, n); } + free (tmp); } libc_hidden_def (qsort) --- libc/stdlib/Makefile.jj 2007-08-04 21:42:30.000000000 +0200 +++ libc/stdlib/Makefile 2007-10-04 17:47:27.000000000 +0200 @@ -68,7 +68,7 @@ tests := tst-strtol tst-strtod testmb t tst-limits tst-rand48 bug-strtod tst-setcontext \ test-a64l tst-qsort tst-system testmb2 bug-strtod2 \ tst-atof1 tst-atof2 tst-strtod2 tst-strtod3 tst-rand48-2 \ - tst-makecontext tst-strtod4 tst-strtod5 + tst-makecontext tst-strtod4 tst-strtod5 tst-qsort2 include ../Makeconfig --- libc/stdlib/tst-qsort2.c.jj 2007-10-04 17:01:39.000000000 +0200 +++ libc/stdlib/tst-qsort2.c 2007-10-04 17:34:43.000000000 +0200 @@ -0,0 +1,89 @@ +#include +#include + +char *array; +char *array_end; +size_t member_size; + +int +compare (const void *a1, const void *b1) +{ + const char *a = a1; + const char *b = b1; + + if (! (array <= a && a < array_end + && array <= b && b < array_end)) + { + puts ("compare arguments not inside of the array"); + exit (EXIT_FAILURE); + } + int ret = b[0] - a[0]; + if (ret) + return ret; + if (member_size > 1) + return b[1] - a[1]; + return 0; +} + +int +test (size_t nmemb, size_t size) +{ + array = malloc (nmemb * size); + if (array == NULL) + { + printf ("%zd x %zd: no memory", nmemb, size); + return 1; + } + + array_end = array + nmemb * size; + member_size = size; + + char *p; + size_t i; + size_t bias = random (); + for (i = 0, p = array; i < nmemb; i++, p += size) + { + p[0] = (char) (i + bias); + if (size > 1) + p[1] = (char) ((i + bias) >> 8); + } + + qsort (array, nmemb, size, compare); + + for (i = 0, p = array; i < nmemb - 1; i++, p += size) + { + if (p[0] < p[size] + || (size > 1 && p[0] == p[size] && p[1] < p[size + 1])) + { + printf ("%zd x %zd: failure at offset %zd\n", nmemb, + size, i); + free (array); + return 1; + } + } + + free (array); + return 0; +} + +int +main (int argc, char **argv) +{ + int ret = 0; + if (argc >= 2) + ret |= test (atoi (argv[1]), atoi (argv[2])); + else + { + ret |= test (10000, 1); + ret |= test (200000, 2); + ret |= test (2000000, 3); + ret |= test (2132310, 4); + ret |= test (1202730, 7); + ret |= test (1184710, 8); + ret |= test (272710, 12); + ret |= test (14170, 32); + ret |= test (4170, 320); + } + + return ret; +}