public inbox for glibc-cvs@sourceware.org
help / color / mirror / Atom feed
* [glibc] stdlib: The qsort implementation needs to use heapsort in more cases
@ 2023-11-21 16:10 Florian Weimer
  0 siblings, 0 replies; only message in thread
From: Florian Weimer @ 2023-11-21 16:10 UTC (permalink / raw)
  To: glibc-cvs

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=64e4acf24da15c11cb83f933947df3b2e8a700cd

commit 64e4acf24da15c11cb83f933947df3b2e8a700cd
Author: Florian Weimer <fweimer@redhat.com>
Date:   Tue Nov 21 16:45:35 2023 +0100

    stdlib: The qsort implementation needs to use heapsort in more cases
    
    The existing logic avoided internal stack overflow.  To avoid
    a denial-of-service condition with adversarial input, it is necessary
    to fall over to heapsort if tail-recursing deeply, too, which does
    not result in a deep stack of pending partitions.
    
    The new test stdlib/tst-qsort5 is based on Douglas McIlroy's paper
    on this subject.
    
    Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>

Diff:
---
 stdlib/Makefile     |   3 +
 stdlib/qsort.c      |  17 ++++--
 stdlib/tst-qsort5.c | 171 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 187 insertions(+), 4 deletions(-)

diff --git a/stdlib/Makefile b/stdlib/Makefile
index 48688f6a27..6194d1cb22 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -215,6 +215,7 @@ tests := \
   tst-qsort \
   tst-qsort2 \
   tst-qsort3 \
+  tst-qsort5 \
   tst-quick_exit \
   tst-rand48 \
   tst-rand48-2 \
@@ -512,3 +513,5 @@ $(objpfx)tst-setcontext3.out: tst-setcontext3.sh $(objpfx)tst-setcontext3
 		 '$(run-program-env)' '$(test-program-prefix-after-env)' \
 		 $(common-objpfx)stdlib/; \
 	$(evaluate-test)
+
+$(objpfx)tst-qsort5: $(libm)
diff --git a/stdlib/qsort.c b/stdlib/qsort.c
index a2f9e916ef..be01fb5598 100644
--- a/stdlib/qsort.c
+++ b/stdlib/qsort.c
@@ -389,14 +389,23 @@ __qsort_r (void *const pbase, size_t total_elems, size_t size,
             {
               if ((size_t) (hi - left_ptr) <= max_thresh)
 		/* Ignore both small partitions. */
-                top = pop (top, &lo, &hi, &depth);
+		{
+		  top = pop (top, &lo, &hi, &depth);
+		  --depth;
+		}
               else
-		/* Ignore small left partition. */
-                lo = left_ptr;
+		{
+		  /* Ignore small left partition. */
+		  lo = left_ptr;
+		  --depth;
+		}
             }
           else if ((size_t) (hi - left_ptr) <= max_thresh)
 	    /* Ignore small right partition. */
-            hi = right_ptr;
+		{
+		  hi = right_ptr;
+		  --depth;
+		}
           else if ((right_ptr - lo) > (hi - left_ptr))
             {
 	      /* Push larger left partition indices. */
diff --git a/stdlib/tst-qsort5.c b/stdlib/tst-qsort5.c
new file mode 100644
index 0000000000..d3a88c30f8
--- /dev/null
+++ b/stdlib/tst-qsort5.c
@@ -0,0 +1,171 @@
+/* Adversarial test for qsort_r.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* The approach follows Douglas McIlroy, A Killer Adversary for
+   Quicksort.  Software—Practice and Experience 29 (1999) 341-344.
+   Downloaded <http://www.cs.dartmouth.edu/~doug/mdmspe.pdf>
+   (2023-11-17).  */
+
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <support/check.h>
+#include <support/support.h>
+
+struct context
+{
+  /* Called the gas value in the paper.  This value is larger than all
+     other values (length minus one will do), so comparison with any
+     decided value has a known result.  */
+  int undecided_value;
+
+  /* If comparing undecided values, one of them as to be assigned a
+     value to ensure consistency with future comparisons.  This is the
+     value that will be used.  Starts out at zero.  */
+  int next_decided;
+
+  /* Used to trick pivot selection.  Deciding the value for the last
+     seen undcided value in a decided/undecided comparison happens
+     to trick the many qsort implementations.  */
+  int last_undecided_index;
+
+  /* This array contains the actually asigned values.  The call to
+     qsort_r sorts a different array that contains indices into this
+     array.  */
+  int *decided_values;
+};
+
+static int
+compare_opponent (const void *l1, const void *r1, void *ctx1)
+{
+  const int *l = l1;
+  const int *r = r1;
+  struct context *ctx = ctx1;
+  int rvalue = ctx->decided_values[*r];
+  int lvalue = ctx->decided_values[*l];
+
+  if (lvalue == ctx->undecided_value)
+    {
+      if (rvalue == ctx->undecided_value)
+        {
+          /* Both values are undecided.  In this case, make a decision
+             for the last-used undecided value.  This is tweak is very
+             specific to quicksort.  */
+          if (*l == ctx->last_undecided_index)
+            {
+              ctx->decided_values[*l] = ctx->next_decided;
+              ++ctx->next_decided;
+              /* The undecided value or *r is greater.  */
+              return -1;
+            }
+          else
+            {
+              ctx->decided_values[*r] = ctx->next_decided;
+              ++ctx->next_decided;
+              /* The undecided value for *l is greater.  */
+              return 1;
+            }
+        }
+      else
+        {
+          ctx->last_undecided_index = *l;
+          return 1;
+        }
+    }
+  else
+    {
+      /* *l is a decided value.  */
+      if (rvalue == ctx->undecided_value)
+        {
+          ctx->last_undecided_index = *r;
+          /* The undecided value for *r is greater.  */
+          return -1;
+        }
+      else
+        return lvalue - rvalue;
+    }
+}
+
+/* Return a pointer to the adversarial permutation of length N.  */
+static int *
+create_permutation (size_t n)
+{
+  struct context ctx =
+    {
+      .undecided_value = n - 1, /* Larger than all other values.  */
+      .decided_values = xcalloc (n, sizeof (int)),
+    };
+  for (size_t i = 0; i < n; ++i)
+    ctx.decided_values[i] = ctx.undecided_value;
+  int *scratch = xcalloc (n, sizeof (int));
+  for (size_t i = 0; i < n; ++i)
+    scratch[i] = i;
+  qsort_r (scratch, n, sizeof (*scratch), compare_opponent, &ctx);
+  free (scratch);
+  return ctx.decided_values;
+}
+
+/* Callback function for qsort which counts the number of invocations
+   in *CLOSURE.  */
+static int
+compare_counter (const void *l1, const void *r1, void *closure)
+{
+  const int *l = l1;
+  const int *r = r1;
+  unsigned long long int *counter = closure;
+  ++*counter;
+  return *l - *r;
+}
+
+/* Count the comparisons required for an adversarial permutation of
+   length N.  */
+static unsigned long long int
+count_comparisons (size_t n)
+{
+  int *array = create_permutation (n);
+  unsigned long long int counter = 0;
+  qsort_r (array, n, sizeof (*array), compare_counter, &counter);
+  free (array);
+  return counter;
+}
+
+/* Check the scaling factor for one adversarial permutation of length
+   N, and report some statistics.  */
+static void
+check_one_n (size_t n)
+{
+  unsigned long long int count = count_comparisons (n);
+  double factor = count / (n * log (count));
+  printf ("info: length %zu: %llu comparisons ~ %f * n * log (n)\n",
+          n, count, factor);
+  /* This is an arbitrary factor which is true for the current
+     implementation across a wide range of sizes.  */
+  TEST_VERIFY (factor <= 4.5);
+}
+
+static int
+do_test (void)
+{
+  check_one_n (100);
+  check_one_n (1000);
+  for (int i = 1; i <= 15; ++i)
+    check_one_n (i * 10 * 1000);
+  return 0;
+}
+
+#include <support/test-driver.c>

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-11-21 16:10 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-21 16:10 [glibc] stdlib: The qsort implementation needs to use heapsort in more cases Florian Weimer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).