[PATCH] Implement introsort for qsort

public inbox for newlib@sourceware.org
 help / color / mirror / Atom feed

* [PATCH] Implement introsort for qsort
@ 2023-12-30 19:59 Kuan-Wei Chiu
  2023-12-31  6:20 ` brian.inglis
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Kuan-Wei Chiu @ 2023-12-30 19:59 UTC (permalink / raw)
  To: newlib; +Cc: Kuan-Wei Chiu

Enhances the qsort implementation by introducing introsort to address
the worst-case time complexity of O(n^2) associated with quicksort.
Introsort is utilized to switch to heapsort when quicksort recursion
depth becomes excessive, ensuring a worst-case time complexity of
O(n log n).

The heapsort implementation adopts a bottom-up approach, significantly
reducing the number of required comparisons and enhancing overall
efficiency.

Refs:
  Introspective Sorting and Selection Algorithms
  David R. Musser
  Software—Practice & Experience, 27(8); Pages 983–993, Aug 1997
  https://dl.acm.org/doi/10.5555/261387.261395

  A killer adversary for quicksort
  M. D. McIlroy
  Software—Practice & Experience, 29(4); Pages 341–344, 10 April 1999
  https://dl.acm.org/doi/10.5555/311868.311871

  BOTTOM-UP-HEAPSORT, a new variant of HEAPSORT beating, on an average,
  QUICKSORT (if n is not very small)
  Ingo Wegener
  Theoretical Computer Science, 118(1); Pages 81-98, 13 September 1993
  https://dl.acm.org/doi/10.5555/162625.162643

Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
---
To assess the performance of the new introsort and the old quicksort in
worst-case scenarios, we examined the number of comparisons required
for sorting based on the paper "A killer adversary for quicksort."

Before the patch:
n = 1000,  cmp_count = 100853
n = 2000,  cmp_count = 395991
n = 3000,  cmp_count = 885617
n = 4000,  cmp_count = 1569692
n = 5000,  cmp_count = 2448183
n = 6000,  cmp_count = 3521140
n = 7000,  cmp_count = 4788493
n = 8000,  cmp_count = 6250342
n = 9000,  cmp_count = 7906610
n = 10000,  cmp_count = 9757353

After the patch:
n = 1000,  cmp_count = 27094
n = 2000,  cmp_count = 61500
n = 3000,  cmp_count = 100529
n = 4000,  cmp_count = 136538
n = 5000,  cmp_count = 182698
n = 6000,  cmp_count = 221120
n = 7000,  cmp_count = 259933
n = 8000,  cmp_count = 299058
n = 9000,  cmp_count = 356405
n = 10000,  cmp_count = 397723

 newlib/libc/search/qsort.c | 153 +++++++++++++++++++++++++++++++++++--
 1 file changed, 147 insertions(+), 6 deletions(-)

diff --git a/newlib/libc/search/qsort.c b/newlib/libc/search/qsort.c
index b53400aa8..61db73746 100644
--- a/newlib/libc/search/qsort.c
+++ b/newlib/libc/search/qsort.c
@@ -65,6 +65,7 @@ PORTABILITY
 #include <_ansi.h>
 #include <sys/cdefs.h>
 #include <stdlib.h>
+#include <limits.h>
 
 #ifndef __GNUC__
 #define inline
@@ -145,6 +146,130 @@ __unused
               :(CMP(thunk, b, c) > 0 ? b : (CMP(thunk, a, c) < 0 ? a : c ));
 }
 
+/*
+ * Sifts the element at index 'i' down into the heap.
+ *
+ * This variant of siftdown follows a bottom-up approach, reducing the overall
+ * number of comparisons. The algorithm identifies the path for the element to
+ * sift down, reaching the leaves with only one comparison per level.
+ * Subsequently, it backtracks to determine the appropriate position to insert
+ * the target element.
+ *
+ * As elements tend to sift down closer to the leaves, this approach minimizes
+ * the number of comparisons compared to the traditional two per level descent.
+ * On average, it uses slightly more than half as many comparisons, with a
+ * worst-case scenario of 3/4th the comparisons.
+ */
+#if defined(I_AM_QSORT_R)
+static inline void
+__bsd_siftdown_r (void *a,
+	size_t i,
+	size_t n,
+	size_t es,
+	int swaptype,
+	void *thunk,
+	cmp_t *cmp)
+#elif defined(I_AM_GNU_QSORT_R)
+static inline void
+siftdown_r (void *a,
+	size_t i,
+	size_t n,
+	size_t es,
+	int swaptype,
+	cmp_t *cmp,
+	void *thunk)
+#else
+#define thunk NULL
+static inline void
+siftdown (void *a,
+	size_t i,
+	size_t n,
+	size_t es,
+	int swaptype,
+	cmp_t *cmp)
+#endif
+{
+    size_t j, k;
+
+	/* Find the sift-down path all the way to the leaves. */
+	for (j = i; k = 2 * j + 1, k + 1 < n;)
+		j = CMP(thunk, (char *) a + k * es, (char *) a + (k + 1) * es) > 0 ? k : (k + 1);
+
+	/* Special case for the last leaf with no sibling. */
+	if (k + 1 == n)
+		j = k;
+
+	/* Backtrack to the correct location. */
+	while (i != j && CMP(thunk, (char *) a + i * es, (char *) a + j * es) > 0)
+		j = (j - 1) / 2;
+
+	/* Shift the element into its correct place. */
+	k = j;
+	while (i != j) {
+		j = (j - 1) / 2;
+		swap((char *) a + j * es, (char *) a + k * es);
+	}
+}
+
+/*
+ * Heapsort is employed to achieve O(n log n) worst-case time complexity and
+ * O(1) additional space complexity for introsort implementation. When
+ * quicksort recursion becomes too deep, switching to heapsort helps maintain
+ * efficiency.
+ */
+#if defined(I_AM_QSORT_R)
+static void
+__bsd_heapsort_r (void *a,
+	size_t n,
+	size_t es,
+	int swaptype,
+	void *thunk,
+	cmp_t *cmp)
+#elif defined(I_AM_GNU_QSORT_R)
+static void
+heapsort_r (void *a,
+	size_t n,
+	size_t es,
+	int swaptype,
+	cmp_t *cmp,
+	void *thunk)
+#else
+#define thunk NULL
+static void
+heapsort (void *a,
+	size_t n,
+	size_t es,
+	int swaptype,
+	cmp_t *cmp)
+#endif
+{
+	size_t i = n / 2;
+
+	/* Build a max heap. */
+    while(i) {
+		--i;
+#if defined(I_AM_QSORT_R)
+        __bsd_siftdown_r(a, i, n, es, swaptype, thunk, cmp);
+#elif defined(I_AM_GNU_QSORT_R)
+        siftdown_r(a, i, n, es, swaptype, cmp, thunk);
+#else
+        siftdown(a, i, n, es, swaptype, cmp);
+#endif
+    }
+
+	/* Extract elements from the heap one by one. */
+    for(i = n - 1; i; i--) {
+        swap((char *) a, (char *) a + i * es);
+#if defined(I_AM_QSORT_R)
+        __bsd_siftdown_r(a, 0, i, es, swaptype, thunk, cmp);
+#elif defined(I_AM_GNU_QSORT_R)
+        siftdown_r(a, 0, i, es, swaptype, cmp, thunk);
+#else
+        siftdown(a, 0, i, es, swaptype, cmp);
+#endif
+    }
+}
+
 /*
  * Classical function call recursion wastes a lot of stack space. Each
  * recursion level requires a full stack frame comprising all local variables
@@ -189,7 +314,9 @@ qsort (void *a,
 	int cmp_result;
 	int swaptype, swap_cnt;
 	size_t recursion_level = 0;
-	struct { void *a; size_t n; } parameter_stack[PARAMETER_STACK_LEVELS];
+	size_t current_level = 0;
+    size_t max_level = 2 * (sizeof(size_t) * CHAR_BIT - 1 - __builtin_clzl(n));
+	struct { void *a; size_t n; size_t level; } parameter_stack[PARAMETER_STACK_LEVELS];
 
 	SWAPINIT(a, es);
 loop:	swap_cnt = 0;
@@ -202,6 +329,18 @@ loop:	swap_cnt = 0;
 		goto pop;
 	}
 
+	/* Switch to heapsort when the current level exceeds the maximum level. */
+	if(++current_level > max_level) {
+#if defined(I_AM_QSORT_R)
+        __bsd_heapsort_r(a, n, es, swaptype, thunk, cmp);
+#elif defined(I_AM_GNU_QSORT_R)
+        heapsort_r(a, n, es, swaptype, thunk, cmp);
+#else
+        heapsort(a, n, es, swaptype, cmp);
+#endif
+        goto pop;
+    }
+
 	/* Select a pivot element, move it to the left. */
 	pm = (char *) a + (n / 2) * es;
 	if (n > 7) {
@@ -310,6 +449,7 @@ loop:	swap_cnt = 0;
 			 */
 			parameter_stack[recursion_level].a = a;
 			parameter_stack[recursion_level].n = n / es;
+			parameter_stack[recursion_level].level = current_level;
 			recursion_level++;
 			a = pa;
 			n = r / es;
@@ -318,15 +458,15 @@ loop:	swap_cnt = 0;
 		else {
 			/*
 			 * The parameter_stack array is full. The smaller part
-			 * is sorted using function call recursion. The larger
-			 * part will be sorted after the function call returns.
+			 * is sorted using heapsort. The larger part will be
+			 * sorted after the function call returns.
 			 */
 #if defined(I_AM_QSORT_R)
-			__bsd_qsort_r(pa, r / es, es, thunk, cmp);
+			__bsd_heapsort_r(pa, r / es, es, swaptype, thunk, cmp);
 #elif defined(I_AM_GNU_QSORT_R)
-			qsort_r(pa, r / es, es, cmp, thunk);
+			heapsort_r(pa, r / es, es, swaptype, cmp, thunk);
 #else
-			qsort(pa, r / es, es, cmp);
+			heapsort(pa, r / es, es, swaptype, cmp);
 #endif
 		}
 	}
@@ -340,6 +480,7 @@ pop:
 		recursion_level--;
 		a = parameter_stack[recursion_level].a;
 		n = parameter_stack[recursion_level].n;
+		current_level = parameter_stack[recursion_level].level;
 		goto loop;
 	}
 }
-- 
2.25.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Implement introsort for qsort
  2023-12-30 19:59 [PATCH] Implement introsort for qsort Kuan-Wei Chiu
@ 2023-12-31  6:20 ` brian.inglis
  2024-01-13 17:25 ` Kuan-Wei Chiu
  2024-01-15 23:38 ` Kuan-Wei Chiu
  2 siblings, 0 replies; 5+ messages in thread
From: brian.inglis @ 2023-12-31  6:20 UTC (permalink / raw)
  To: newlib

On 2023-12-30 12:59, Kuan-Wei Chiu wrote:
> Enhances the qsort implementation by introducing introsort to address
> the worst-case time complexity of O(n^2) associated with quicksort.
> Introsort is utilized to switch to heapsort when quicksort recursion
> depth becomes excessive, ensuring a worst-case time complexity of
> O(n log n).
> 
> The heapsort implementation adopts a bottom-up approach, significantly
> reducing the number of required comparisons and enhancing overall
> efficiency.
> 
> Refs:
>    Introspective Sorting and Selection Algorithms
>    David R. Musser
>    Software—Practice & Experience, 27(8); Pages 983–993, Aug 1997
>    https://dl.acm.org/doi/10.5555/261387.261395
> 
>    A killer adversary for quicksort
>    M. D. McIlroy
>    Software—Practice & Experience, 29(4); Pages 341–344, 10 April 1999
>    https://dl.acm.org/doi/10.5555/311868.311871
> 
>    BOTTOM-UP-HEAPSORT, a new variant of HEAPSORT beating, on an average,
>    QUICKSORT (if n is not very small)
>    Ingo Wegener
>    Theoretical Computer Science, 118(1); Pages 81-98, 13 September 1993
>    https://dl.acm.org/doi/10.5555/162625.162643
> 
> Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
> ---
> To assess the performance of the new introsort and the old quicksort in
> worst-case scenarios, we examined the number of comparisons required
> for sorting based on the paper "A killer adversary for quicksort."
> 
> Before the patch:
> n = 1000,  cmp_count = 100853
> n = 2000,  cmp_count = 395991
> n = 3000,  cmp_count = 885617
> n = 4000,  cmp_count = 1569692
> n = 5000,  cmp_count = 2448183
> n = 6000,  cmp_count = 3521140
> n = 7000,  cmp_count = 4788493
> n = 8000,  cmp_count = 6250342
> n = 9000,  cmp_count = 7906610
> n = 10000,  cmp_count = 9757353
> 
> After the patch:
> n = 1000,  cmp_count = 27094
> n = 2000,  cmp_count = 61500
> n = 3000,  cmp_count = 100529
> n = 4000,  cmp_count = 136538
> n = 5000,  cmp_count = 182698
> n = 6000,  cmp_count = 221120
> n = 7000,  cmp_count = 259933
> n = 8000,  cmp_count = 299058
> n = 9000,  cmp_count = 356405
> n = 10000,  cmp_count = 397723

See an earlier post on the same issue:

https://inbox.sourceware.org/newlib/1361223558.67926.YahooMailNeo@web141001.mail.bf1.yahoo.com/

which addresses points your patch addresses, with more analysis, but no data or 
tests, and was not reworked or resubmitted as a valid patch.

You may also wish to look at the BSD implementations for comparison to see how 
they may have addressed these issues, and replace current newlib with an update 
from another distro, that has already received live testing.

Evaluating sort implementations requires you to also look at number of moves as 
well as comparisons - reducing comparisons is only useful if that results in 
fewer data moves - in general doing (not too many) more comparisons to make 
fewer moves gives better performance, so you have to check that, and also 
measure stack and memory used, as newlib may be built for some low memory 
targets, which may need some light sorting done, but not at the expense of 
(much) more memory or time used.

So you also have to compare behaviour of different test data, including all rows 
identical, already in forward and in reverse order, two values out of order 
repeated to fill the array, in alternating rows and alternate halves, and other 
documented antagonistic sequences, with pre- and post-change counts and times, 
to check there are no data-dependent regressions in time taken or space used.

You may want to make your improved sort implementation available in a way that 
allows easily switching it in and out, using definitions or config options, so 
that you, or other newlib users, can easily do their own comparisons.

-- 
Take care. Thanks, Brian Inglis              Calgary, Alberta, Canada

La perfection est atteinte                   Perfection is achieved
non pas lorsqu'il n'y a plus rien à ajouter  not when there is no more to add
mais lorsqu'il n'y a plus rien à retirer     but when there is no more to cut
                                 -- Antoine de Saint-Exupéry

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Implement introsort for qsort
  2023-12-30 19:59 [PATCH] Implement introsort for qsort Kuan-Wei Chiu
  2023-12-31  6:20 ` brian.inglis
@ 2024-01-13 17:25 ` Kuan-Wei Chiu
  2024-01-14 18:04   ` brian.inglis
  2024-01-15 23:38 ` Kuan-Wei Chiu
  2 siblings, 1 reply; 5+ messages in thread
From: Kuan-Wei Chiu @ 2024-01-13 17:25 UTC (permalink / raw)
  To: newlib, brian.inglis

On Sun, Dec 31, 2023 at 03:59:40AM +0800, Kuan-Wei Chiu wrote:
> Enhances the qsort implementation by introducing introsort to address
> the worst-case time complexity of O(n^2) associated with quicksort.
> Introsort is utilized to switch to heapsort when quicksort recursion
> depth becomes excessive, ensuring a worst-case time complexity of
> O(n log n).
> 
> The heapsort implementation adopts a bottom-up approach, significantly
> reducing the number of required comparisons and enhancing overall
> efficiency.
> 
> Refs:
>   Introspective Sorting and Selection Algorithms
>   David R. Musser
>   Software—Practice & Experience, 27(8); Pages 983–993, Aug 1997
>   https://dl.acm.org/doi/10.5555/261387.261395
> 
>   A killer adversary for quicksort
>   M. D. McIlroy
>   Software—Practice & Experience, 29(4); Pages 341–344, 10 April 1999
>   https://dl.acm.org/doi/10.5555/311868.311871
> 
>   BOTTOM-UP-HEAPSORT, a new variant of HEAPSORT beating, on an average,
>   QUICKSORT (if n is not very small)
>   Ingo Wegener
>   Theoretical Computer Science, 118(1); Pages 81-98, 13 September 1993
>   https://dl.acm.org/doi/10.5555/162625.162643
> 
> Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
> ---
> To assess the performance of the new introsort and the old quicksort in
> worst-case scenarios, we examined the number of comparisons required
> for sorting based on the paper "A killer adversary for quicksort."
> 
> Before the patch:
> n = 1000,  cmp_count = 100853
> n = 2000,  cmp_count = 395991
> n = 3000,  cmp_count = 885617
> n = 4000,  cmp_count = 1569692
> n = 5000,  cmp_count = 2448183
> n = 6000,  cmp_count = 3521140
> n = 7000,  cmp_count = 4788493
> n = 8000,  cmp_count = 6250342
> n = 9000,  cmp_count = 7906610
> n = 10000,  cmp_count = 9757353
> 
> After the patch:
> n = 1000,  cmp_count = 27094
> n = 2000,  cmp_count = 61500
> n = 3000,  cmp_count = 100529
> n = 4000,  cmp_count = 136538
> n = 5000,  cmp_count = 182698
> n = 6000,  cmp_count = 221120
> n = 7000,  cmp_count = 259933
> n = 8000,  cmp_count = 299058
> n = 9000,  cmp_count = 356405
> n = 10000,  cmp_count = 397723
> 
>  newlib/libc/search/qsort.c | 153 +++++++++++++++++++++++++++++++++++--
>  1 file changed, 147 insertions(+), 6 deletions(-)

Hi Brian,

Thank you for your feedback, and I apologize for the late response.

I found your reply in the archives of the newlib mailing list on the
web interface. However, for some reason, I did not find the same email
in my email client.

Upon reviewing the previously mentioned issue, it seems that although
both instances lead to a worst-case degradation to O(n^2), the causes
appear to be different. One may be the adoption of insertion sort at an
inappropriate time, causing degradation, while the other could be
adversarial input leading to the selection of an unsuitable pivot. I
can delve deeper into this issue later, but I believe it should be
different patches.

I looked at the code at BSD implementation [1], but I am skeptical that
it would address the problem of adversarial input degrading to O(n^2).
I will conduct experiments in subsequent emails to verify this.

Thank you for reminding me to consider not only the number of
comparisons but also the number of swaps. Indeed, switching to heapsort
may likely increase the required number of swaps, but while the number
of swaps would be at an O(n log n) level, the number of comparisons
would be at an O(n^2) level. Therefore, a trade-off needs to be made,
and I will conduct further experiments. If increasing the number of
swaps does lead to degradation in efficiency, it would be better to
drop this patch.

Regarding the experimental data, as I am not a native English speaker,
I would like to inquire about the meaning of "two values out of order
repeated to fill the array, in alternating rows and alternate halves."
This clarification will help me avoid any misunderstanding of the text.
I will post new experimental data within the next few days.

Once again, thank you for your feedback and suggestions!

Best regards,
Kuan-Wei Chiu

Link: https://cgit.freebsd.org/src/tree/lib/libc/stdlib/qsort.c [1]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Implement introsort for qsort
  2024-01-13 17:25 ` Kuan-Wei Chiu
@ 2024-01-14 18:04   ` brian.inglis
  0 siblings, 0 replies; 5+ messages in thread
From: brian.inglis @ 2024-01-14 18:04 UTC (permalink / raw)
  To: newlib

On 2024-01-13 10:25, Kuan-Wei Chiu wrote:
> On Sun, Dec 31, 2023 at 03:59:40AM +0800, Kuan-Wei Chiu wrote:
>> Enhances the qsort implementation by introducing introsort to address
>> the worst-case time complexity of O(n^2) associated with quicksort.
>> Introsort is utilized to switch to heapsort when quicksort recursion
>> depth becomes excessive, ensuring a worst-case time complexity of
>> O(n log n).
>>
>> The heapsort implementation adopts a bottom-up approach, significantly
>> reducing the number of required comparisons and enhancing overall
>> efficiency.
>>
>> Refs:
>>    Introspective Sorting and Selection Algorithms
>>    David R. Musser
>>    Software—Practice & Experience, 27(8); Pages 983–993, Aug 1997
>>    https://dl.acm.org/doi/10.5555/261387.261395
>>
>>    A killer adversary for quicksort
>>    M. D. McIlroy
>>    Software—Practice & Experience, 29(4); Pages 341–344, 10 April 1999
>>    https://dl.acm.org/doi/10.5555/311868.311871
>>
>>    BOTTOM-UP-HEAPSORT, a new variant of HEAPSORT beating, on an average,
>>    QUICKSORT (if n is not very small)
>>    Ingo Wegener
>>    Theoretical Computer Science, 118(1); Pages 81-98, 13 September 1993
>>    https://dl.acm.org/doi/10.5555/162625.162643
>>
>> Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
>> ---
>> To assess the performance of the new introsort and the old quicksort in
>> worst-case scenarios, we examined the number of comparisons required
>> for sorting based on the paper "A killer adversary for quicksort."
>>
>> Before the patch:
>> n = 1000,  cmp_count = 100853
>> n = 2000,  cmp_count = 395991
>> n = 3000,  cmp_count = 885617
>> n = 4000,  cmp_count = 1569692
>> n = 5000,  cmp_count = 2448183
>> n = 6000,  cmp_count = 3521140
>> n = 7000,  cmp_count = 4788493
>> n = 8000,  cmp_count = 6250342
>> n = 9000,  cmp_count = 7906610
>> n = 10000,  cmp_count = 9757353
>>
>> After the patch:
>> n = 1000,  cmp_count = 27094
>> n = 2000,  cmp_count = 61500
>> n = 3000,  cmp_count = 100529
>> n = 4000,  cmp_count = 136538
>> n = 5000,  cmp_count = 182698
>> n = 6000,  cmp_count = 221120
>> n = 7000,  cmp_count = 259933
>> n = 8000,  cmp_count = 299058
>> n = 9000,  cmp_count = 356405
>> n = 10000,  cmp_count = 397723
>>
>>   newlib/libc/search/qsort.c | 153 +++++++++++++++++++++++++++++++++++--
>>   1 file changed, 147 insertions(+), 6 deletions(-)
> 
> Hi Brian,
> 
> Thank you for your feedback, and I apologize for the late response.
> 
> I found your reply in the archives of the newlib mailing list on the
> web interface. However, for some reason, I did not find the same email
> in my email client.
> 
> Upon reviewing the previously mentioned issue, it seems that although
> both instances lead to a worst-case degradation to O(n^2), the causes
> appear to be different. One may be the adoption of insertion sort at an
> inappropriate time, causing degradation, while the other could be
> adversarial input leading to the selection of an unsuitable pivot. I
> can delve deeper into this issue later, but I believe it should be
> different patches.
> 
> I looked at the code at BSD implementation [1], but I am skeptical that
> it would address the problem of adversarial input degrading to O(n^2).
> I will conduct experiments in subsequent emails to verify this.
> 
> Thank you for reminding me to consider not only the number of
> comparisons but also the number of swaps. Indeed, switching to heapsort
> may likely increase the required number of swaps, but while the number
> of swaps would be at an O(n log n) level, the number of comparisons
> would be at an O(n^2) level. Therefore, a trade-off needs to be made,
> and I will conduct further experiments. If increasing the number of
> swaps does lead to degradation in efficiency, it would be better to
> drop this patch.

Depending on your target ISA, comparisons can take almost no time with 
conditional instructions, and there are some tricks for unrolling and hard 
wiring code to deal with sections of arrays.

> Regarding the experimental data, as I am not a native English speaker,
> I would like to inquire about the meaning of "two values out of order
> repeated to fill the array, in alternating rows and alternate halves."

For all data types in your tests (or at least strings), try test sequences like:

	A	B	A	B
	B	A	A	B
	A	B	...	...
	B	A	B	A
	...	...	B	A
			...	...

to fill your test arrays with data that robust sort algorithms should handle 
without degradation.

A well engineered quicksort should be able to handle these cases in memory 
without degradation, while heapsort is more useful for managing runs of sorted 
sequences from I/O buffers when data spills from memory.

The literature in (CACM) papers and books by Knuth (tAoCP 3 Sorting...) and 
Sedgewick (Knuth's pupil and successor whose thesis was quicksort! and wsote 
Algorithms) covers the data and algorithmic considerations, while 
implementations and tweaks in papers by McIlroy (some with Bentley and Knuth) 
and articles and books by Bentley cover the pragmatics.

The series of articles below discusses the many problems in the BSD derived 
(faulty) qsort used by newlib/nano/pico and others, with analyses and numbers:

	https://www.raygard.net/2022/01/17/Re-engineering-a-qsort-part-1/

showing newlib worse than ideal by 35% to 1000%!
Please also note the comments about iterating tail calls with gotos.
The articles also mention sort test data sources and test benches, and seems to 
consider 10k reps of 5 data values as representative tests.

The related code qs22j fixes all the known problems, is licensed 0BSD, and could 
replace the newlib/nano/pico libc qsort:

	https://github.com/raygard/qsort_dev

> This clarification will help me avoid any misunderstanding of the text.
> I will post new experimental data within the next few days.
> Once again, thank you for your feedback and suggestions!
> Link: https://cgit.freebsd.org/src/tree/lib/libc/stdlib/qsort.c [1]

-- 
Take care. Thanks, Brian Inglis              Calgary, Alberta, Canada

La perfection est atteinte                   Perfection is achieved
non pas lorsqu'il n'y a plus rien à ajouter  not when there is no more to add
mais lorsqu'il n'y a plus rien à retirer     but when there is no more to cut
                                 -- Antoine de Saint-Exupéry

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Implement introsort for qsort
  2023-12-30 19:59 [PATCH] Implement introsort for qsort Kuan-Wei Chiu
  2023-12-31  6:20 ` brian.inglis
  2024-01-13 17:25 ` Kuan-Wei Chiu
@ 2024-01-15 23:38 ` Kuan-Wei Chiu
  2 siblings, 0 replies; 5+ messages in thread
From: Kuan-Wei Chiu @ 2024-01-15 23:38 UTC (permalink / raw)
  To: newlib, brian.inglis

On Sun, Dec 31, 2023 at 03:59:40AM +0800, Kuan-Wei Chiu wrote:
> Enhances the qsort implementation by introducing introsort to address
> the worst-case time complexity of O(n^2) associated with quicksort.
> Introsort is utilized to switch to heapsort when quicksort recursion
> depth becomes excessive, ensuring a worst-case time complexity of
> O(n log n).
> 
> The heapsort implementation adopts a bottom-up approach, significantly
> reducing the number of required comparisons and enhancing overall
> efficiency.
> 
> Refs:
>   Introspective Sorting and Selection Algorithms
>   David R. Musser
>   Software—Practice & Experience, 27(8); Pages 983–993, Aug 1997
>   https://dl.acm.org/doi/10.5555/261387.261395
> 
>   A killer adversary for quicksort
>   M. D. McIlroy
>   Software—Practice & Experience, 29(4); Pages 341–344, 10 April 1999
>   https://dl.acm.org/doi/10.5555/311868.311871
> 
>   BOTTOM-UP-HEAPSORT, a new variant of HEAPSORT beating, on an average,
>   QUICKSORT (if n is not very small)
>   Ingo Wegener
>   Theoretical Computer Science, 118(1); Pages 81-98, 13 September 1993
>   https://dl.acm.org/doi/10.5555/162625.162643
> 
> Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
> ---
> To assess the performance of the new introsort and the old quicksort in
> worst-case scenarios, we examined the number of comparisons required
> for sorting based on the paper "A killer adversary for quicksort."
> 
> Before the patch:
> n = 1000,  cmp_count = 100853
> n = 2000,  cmp_count = 395991
> n = 3000,  cmp_count = 885617
> n = 4000,  cmp_count = 1569692
> n = 5000,  cmp_count = 2448183
> n = 6000,  cmp_count = 3521140
> n = 7000,  cmp_count = 4788493
> n = 8000,  cmp_count = 6250342
> n = 9000,  cmp_count = 7906610
> n = 10000,  cmp_count = 9757353
> 
> After the patch:
> n = 1000,  cmp_count = 27094
> n = 2000,  cmp_count = 61500
> n = 3000,  cmp_count = 100529
> n = 4000,  cmp_count = 136538
> n = 5000,  cmp_count = 182698
> n = 6000,  cmp_count = 221120
> n = 7000,  cmp_count = 259933
> n = 8000,  cmp_count = 299058
> n = 9000,  cmp_count = 356405
> n = 10000,  cmp_count = 397723
> 
Hi Brian,

Thank you for sharing the information; I will take the time to review
the details you provided.

I noticed that your responses have been directed solely to the newlib
mailing list. For our future correspondence, could you please include
me or CC me? This would greatly help me in staying informed.

I appreciate your assistance.

Best regards,
Kuan-Wei Chiu

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2024-01-15 23:38 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-12-30 19:59 [PATCH] Implement introsort for qsort Kuan-Wei Chiu
2023-12-31  6:20 ` brian.inglis
2024-01-13 17:25 ` Kuan-Wei Chiu
2024-01-14 18:04   ` brian.inglis
2024-01-15 23:38 ` Kuan-Wei Chiu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).