public inbox for newlib@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] Implement introsort for qsort
@ 2023-12-30 19:59 Kuan-Wei Chiu
  2023-12-31  6:20 ` brian.inglis
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Kuan-Wei Chiu @ 2023-12-30 19:59 UTC (permalink / raw)
  To: newlib; +Cc: Kuan-Wei Chiu

Enhances the qsort implementation by introducing introsort to address
the worst-case time complexity of O(n^2) associated with quicksort.
Introsort is utilized to switch to heapsort when quicksort recursion
depth becomes excessive, ensuring a worst-case time complexity of
O(n log n).

The heapsort implementation adopts a bottom-up approach, significantly
reducing the number of required comparisons and enhancing overall
efficiency.

Refs:
  Introspective Sorting and Selection Algorithms
  David R. Musser
  Software—Practice & Experience, 27(8); Pages 983–993, Aug 1997
  https://dl.acm.org/doi/10.5555/261387.261395

  A killer adversary for quicksort
  M. D. McIlroy
  Software—Practice & Experience, 29(4); Pages 341–344, 10 April 1999
  https://dl.acm.org/doi/10.5555/311868.311871

  BOTTOM-UP-HEAPSORT, a new variant of HEAPSORT beating, on an average,
  QUICKSORT (if n is not very small)
  Ingo Wegener
  Theoretical Computer Science, 118(1); Pages 81-98, 13 September 1993
  https://dl.acm.org/doi/10.5555/162625.162643

Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
---
To assess the performance of the new introsort and the old quicksort in
worst-case scenarios, we examined the number of comparisons required
for sorting based on the paper "A killer adversary for quicksort."

Before the patch:
n = 1000,  cmp_count = 100853
n = 2000,  cmp_count = 395991
n = 3000,  cmp_count = 885617
n = 4000,  cmp_count = 1569692
n = 5000,  cmp_count = 2448183
n = 6000,  cmp_count = 3521140
n = 7000,  cmp_count = 4788493
n = 8000,  cmp_count = 6250342
n = 9000,  cmp_count = 7906610
n = 10000,  cmp_count = 9757353

After the patch:
n = 1000,  cmp_count = 27094
n = 2000,  cmp_count = 61500
n = 3000,  cmp_count = 100529
n = 4000,  cmp_count = 136538
n = 5000,  cmp_count = 182698
n = 6000,  cmp_count = 221120
n = 7000,  cmp_count = 259933
n = 8000,  cmp_count = 299058
n = 9000,  cmp_count = 356405
n = 10000,  cmp_count = 397723

 newlib/libc/search/qsort.c | 153 +++++++++++++++++++++++++++++++++++--
 1 file changed, 147 insertions(+), 6 deletions(-)

diff --git a/newlib/libc/search/qsort.c b/newlib/libc/search/qsort.c
index b53400aa8..61db73746 100644
--- a/newlib/libc/search/qsort.c
+++ b/newlib/libc/search/qsort.c
@@ -65,6 +65,7 @@ PORTABILITY
 #include <_ansi.h>
 #include <sys/cdefs.h>
 #include <stdlib.h>
+#include <limits.h>
 
 #ifndef __GNUC__
 #define inline
@@ -145,6 +146,130 @@ __unused
               :(CMP(thunk, b, c) > 0 ? b : (CMP(thunk, a, c) < 0 ? a : c ));
 }
 
+/*
+ * Sifts the element at index 'i' down into the heap.
+ *
+ * This variant of siftdown follows a bottom-up approach, reducing the overall
+ * number of comparisons. The algorithm identifies the path for the element to
+ * sift down, reaching the leaves with only one comparison per level.
+ * Subsequently, it backtracks to determine the appropriate position to insert
+ * the target element.
+ *
+ * As elements tend to sift down closer to the leaves, this approach minimizes
+ * the number of comparisons compared to the traditional two per level descent.
+ * On average, it uses slightly more than half as many comparisons, with a
+ * worst-case scenario of 3/4th the comparisons.
+ */
+#if defined(I_AM_QSORT_R)
+static inline void
+__bsd_siftdown_r (void *a,
+	size_t i,
+	size_t n,
+	size_t es,
+	int swaptype,
+	void *thunk,
+	cmp_t *cmp)
+#elif defined(I_AM_GNU_QSORT_R)
+static inline void
+siftdown_r (void *a,
+	size_t i,
+	size_t n,
+	size_t es,
+	int swaptype,
+	cmp_t *cmp,
+	void *thunk)
+#else
+#define thunk NULL
+static inline void
+siftdown (void *a,
+	size_t i,
+	size_t n,
+	size_t es,
+	int swaptype,
+	cmp_t *cmp)
+#endif
+{
+    size_t j, k;
+
+	/* Find the sift-down path all the way to the leaves. */
+	for (j = i; k = 2 * j + 1, k + 1 < n;)
+		j = CMP(thunk, (char *) a + k * es, (char *) a + (k + 1) * es) > 0 ? k : (k + 1);
+
+	/* Special case for the last leaf with no sibling. */
+	if (k + 1 == n)
+		j = k;
+
+	/* Backtrack to the correct location. */
+	while (i != j && CMP(thunk, (char *) a + i * es, (char *) a + j * es) > 0)
+		j = (j - 1) / 2;
+
+	/* Shift the element into its correct place. */
+	k = j;
+	while (i != j) {
+		j = (j - 1) / 2;
+		swap((char *) a + j * es, (char *) a + k * es);
+	}
+}
+
+/*
+ * Heapsort is employed to achieve O(n log n) worst-case time complexity and
+ * O(1) additional space complexity for introsort implementation. When
+ * quicksort recursion becomes too deep, switching to heapsort helps maintain
+ * efficiency.
+ */
+#if defined(I_AM_QSORT_R)
+static void
+__bsd_heapsort_r (void *a,
+	size_t n,
+	size_t es,
+	int swaptype,
+	void *thunk,
+	cmp_t *cmp)
+#elif defined(I_AM_GNU_QSORT_R)
+static void
+heapsort_r (void *a,
+	size_t n,
+	size_t es,
+	int swaptype,
+	cmp_t *cmp,
+	void *thunk)
+#else
+#define thunk NULL
+static void
+heapsort (void *a,
+	size_t n,
+	size_t es,
+	int swaptype,
+	cmp_t *cmp)
+#endif
+{
+	size_t i = n / 2;
+
+	/* Build a max heap. */
+    while(i) {
+		--i;
+#if defined(I_AM_QSORT_R)
+        __bsd_siftdown_r(a, i, n, es, swaptype, thunk, cmp);
+#elif defined(I_AM_GNU_QSORT_R)
+        siftdown_r(a, i, n, es, swaptype, cmp, thunk);
+#else
+        siftdown(a, i, n, es, swaptype, cmp);
+#endif
+    }
+
+	/* Extract elements from the heap one by one. */
+    for(i = n - 1; i; i--) {
+        swap((char *) a, (char *) a + i * es);
+#if defined(I_AM_QSORT_R)
+        __bsd_siftdown_r(a, 0, i, es, swaptype, thunk, cmp);
+#elif defined(I_AM_GNU_QSORT_R)
+        siftdown_r(a, 0, i, es, swaptype, cmp, thunk);
+#else
+        siftdown(a, 0, i, es, swaptype, cmp);
+#endif
+    }
+}
+
 /*
  * Classical function call recursion wastes a lot of stack space. Each
  * recursion level requires a full stack frame comprising all local variables
@@ -189,7 +314,9 @@ qsort (void *a,
 	int cmp_result;
 	int swaptype, swap_cnt;
 	size_t recursion_level = 0;
-	struct { void *a; size_t n; } parameter_stack[PARAMETER_STACK_LEVELS];
+	size_t current_level = 0;
+    size_t max_level = 2 * (sizeof(size_t) * CHAR_BIT - 1 - __builtin_clzl(n));
+	struct { void *a; size_t n; size_t level; } parameter_stack[PARAMETER_STACK_LEVELS];
 
 	SWAPINIT(a, es);
 loop:	swap_cnt = 0;
@@ -202,6 +329,18 @@ loop:	swap_cnt = 0;
 		goto pop;
 	}
 
+	/* Switch to heapsort when the current level exceeds the maximum level. */
+	if(++current_level > max_level) {
+#if defined(I_AM_QSORT_R)
+        __bsd_heapsort_r(a, n, es, swaptype, thunk, cmp);
+#elif defined(I_AM_GNU_QSORT_R)
+        heapsort_r(a, n, es, swaptype, thunk, cmp);
+#else
+        heapsort(a, n, es, swaptype, cmp);
+#endif
+        goto pop;
+    }
+
 	/* Select a pivot element, move it to the left. */
 	pm = (char *) a + (n / 2) * es;
 	if (n > 7) {
@@ -310,6 +449,7 @@ loop:	swap_cnt = 0;
 			 */
 			parameter_stack[recursion_level].a = a;
 			parameter_stack[recursion_level].n = n / es;
+			parameter_stack[recursion_level].level = current_level;
 			recursion_level++;
 			a = pa;
 			n = r / es;
@@ -318,15 +458,15 @@ loop:	swap_cnt = 0;
 		else {
 			/*
 			 * The parameter_stack array is full. The smaller part
-			 * is sorted using function call recursion. The larger
-			 * part will be sorted after the function call returns.
+			 * is sorted using heapsort. The larger part will be
+			 * sorted after the function call returns.
 			 */
 #if defined(I_AM_QSORT_R)
-			__bsd_qsort_r(pa, r / es, es, thunk, cmp);
+			__bsd_heapsort_r(pa, r / es, es, swaptype, thunk, cmp);
 #elif defined(I_AM_GNU_QSORT_R)
-			qsort_r(pa, r / es, es, cmp, thunk);
+			heapsort_r(pa, r / es, es, swaptype, cmp, thunk);
 #else
-			qsort(pa, r / es, es, cmp);
+			heapsort(pa, r / es, es, swaptype, cmp);
 #endif
 		}
 	}
@@ -340,6 +480,7 @@ pop:
 		recursion_level--;
 		a = parameter_stack[recursion_level].a;
 		n = parameter_stack[recursion_level].n;
+		current_level = parameter_stack[recursion_level].level;
 		goto loop;
 	}
 }
-- 
2.25.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2024-01-15 23:38 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-12-30 19:59 [PATCH] Implement introsort for qsort Kuan-Wei Chiu
2023-12-31  6:20 ` brian.inglis
2024-01-13 17:25 ` Kuan-Wei Chiu
2024-01-14 18:04   ` brian.inglis
2024-01-15 23:38 ` Kuan-Wei Chiu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).