public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH 1/5] string: Make tests birdirectional test-memcpy.c
@ 2021-08-24  8:27 Noah Goldstein
  2021-08-24  8:27 ` [PATCH 2/5] benchtests: Add new random cases to bench-memcpy-random.c Noah Goldstein
                   ` (5 more replies)
  0 siblings, 6 replies; 18+ messages in thread
From: Noah Goldstein @ 2021-08-24  8:27 UTC (permalink / raw)
  To: libc-alpha

This commit updates the memcpy tests to test both dst > src and dst <
src. This is because there is logic in the code based on the
condition.
---
 string/test-memcpy.c  | 125 +++++++++++++++++++++++++++++++++---------
 string/test-memmove.c |  73 +++++++++++++++++++++++-
 2 files changed, 170 insertions(+), 28 deletions(-)

diff --git a/string/test-memcpy.c b/string/test-memcpy.c
index c9dfc88fed..705d79ba13 100644
--- a/string/test-memcpy.c
+++ b/string/test-memcpy.c
@@ -79,7 +79,7 @@ do_one_test (impl_t *impl, char *dst, const char *src,
 static void
 do_test (size_t align1, size_t align2, size_t len)
 {
-  size_t i, j;
+  size_t i, j, repeats;
   char *s1, *s2;
 
   align1 &= 4095;
@@ -92,12 +92,14 @@ do_test (size_t align1, size_t align2, size_t len)
 
   s1 = (char *) (buf1 + align1);
   s2 = (char *) (buf2 + align2);
+  for (repeats = 0; repeats < 2; ++repeats)
+    {
+      for (i = 0, j = 1; i < len; i++, j += 23)
+        s1[i] = j;
 
-  for (i = 0, j = 1; i < len; i++, j += 23)
-    s1[i] = j;
-
-  FOR_EACH_IMPL (impl, 0)
-    do_one_test (impl, s2, s1, len);
+      FOR_EACH_IMPL (impl, 0)
+        do_one_test (impl, s2, s1, len);
+    }
 }
 
 static void
@@ -213,56 +215,88 @@ do_random_tests (void)
 }
 
 static void
-do_test1 (size_t size)
+do_test1 (size_t align1, size_t align2, size_t size)
 {
   void *large_buf;
-  large_buf = mmap (NULL, size * 2 + page_size, PROT_READ | PROT_WRITE,
-		    MAP_PRIVATE | MAP_ANON, -1, 0);
+  size_t mmap_size, region_size;
+
+  align1 &= (page_size - 1);
+  if (align1 == 0)
+    align1 = page_size;
+
+  align2 &= (page_size - 1);
+  if (align2 == 0)
+    align2 = page_size;
+
+  region_size = (size + page_size - 1) & (~(page_size - 1));
+
+  mmap_size = region_size * 2 + 3 * page_size;
+  large_buf = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
+                   MAP_PRIVATE | MAP_ANON, -1, 0);
   if (large_buf == MAP_FAILED)
     {
-      puts ("Failed to allocat large_buf, skipping do_test1");
+      puts ("Failed to allocate large_buf, skipping do_test1");
       return;
     }
-
-  if (mprotect (large_buf + size, page_size, PROT_NONE))
+  if (mprotect (large_buf + region_size + page_size, page_size, PROT_NONE))
     error (EXIT_FAILURE, errno, "mprotect failed");
 
-  size_t arrary_size = size / sizeof (uint32_t);
-  uint32_t *dest = large_buf;
-  uint32_t *src = large_buf + size + page_size;
+  size_t array_size = size / sizeof (uint32_t);
+  uint32_t *dest = large_buf + align1;
+  uint32_t *src = large_buf + region_size + 2 * page_size + align2;
   size_t i;
   size_t repeats;
   for(repeats = 0; repeats < 2; repeats++)
     {
-      for (i = 0; i < arrary_size; i++)
+      for (i = 0; i < array_size; i++)
         src[i] = (uint32_t) i;
-
       FOR_EACH_IMPL (impl, 0)
         {
-            printf ("\t\tRunning: %s\n", impl->name);
+            //            printf ("\t\tRunning: %s\n", impl->name);
           memset (dest, -1, size);
           CALL (impl, (char *) dest, (char *) src, size);
-          for (i = 0; i < arrary_size; i++)
+          for (i = 0; i < array_size; i++)
         if (dest[i] != src[i])
           {
             error (0, 0,
                "Wrong result in function %s dst \"%p\" src \"%p\" offset \"%zd\"",
                impl->name, dest, src, i);
             ret = 1;
-            munmap ((void *) large_buf, size * 2 + page_size);
+            munmap ((void *) large_buf, mmap_size);
             return;
           }
         }
-      dest = src;
-      src = large_buf;
+      dest = large_buf + region_size + 2 * page_size + align1;
+      src = large_buf + align2;
+    }
+  munmap ((void *) large_buf, mmap_size);
+}
+
+static void
+do_random_large_tests (void)
+{
+  size_t i, align1, align2, size;
+  for (i = 0; i < 32; ++i)
+    {
+      align1 = random ();
+      align2 = random ();
+      size = (random() % 0x1000000) + 0x200000;
+      do_test1 (align1, align2, size);
+    }
+
+  for (i = 0; i < 128; ++i)
+    {
+      align1 = random ();
+      align2 = random ();
+      size = (random() % 32768) + 4096;
+      do_test1 (align1, align2, size);
     }
-  munmap ((void *) large_buf, size * 2 + page_size);
 }
 
 int
 test_main (void)
 {
-  size_t i;
+  size_t i, j;
 
   test_init ();
 
@@ -299,6 +333,7 @@ test_main (void)
   for (i = 19; i <= 25; ++i)
     {
       do_test (255, 0, 1 << i);
+      do_test (0, 4000, 1 << i);
       do_test (0, 255, i);
       do_test (0, 4000, i);
     }
@@ -307,8 +342,46 @@ test_main (void)
 
   do_random_tests ();
 
-  do_test1 (0x100000);
-  do_test1 (0x2000000);
+  do_test1 (0, 0, 0x100000);
+  do_test1 (0, 0, 0x2000000);
+
+  for (i = 4096; i < 32768; i += 4096)
+    {
+      for (j = 1; j <= 1024; j <<= 1)
+        {
+          do_test1 (0, j, i);
+          do_test1 (4095, j, i);
+          do_test1 (4096 - j, 0, i);
+
+          do_test1 (0, j - 1, i);
+          do_test1 (4095, j - 1, i);
+          do_test1 (4096 - j - 1, 0, i);
+
+          do_test1 (0, j + 1, i);
+          do_test1 (4095, j + 1, i);
+          do_test1 (4096 - j, 1, i);
+        }
+    }
+
+  for (i = 0x300000; i < 0x2000000; i += 0x235689)
+    {
+      for (j = 64; j <= 1024; j <<= 1)
+        {
+          do_test1 (0, j, i);
+          do_test1 (4095, j, i);
+          do_test1 (4096 - j, 0, i);
+
+          do_test1 (0, j - 1, i);
+          do_test1 (4095, j - 1, i);
+          do_test1 (4096 - j - 1, 0, i);
+
+          do_test1 (0, j + 1, i);
+          do_test1 (4095, j + 1, i);
+          do_test1 (4096 - j, 1, i);
+        }
+    }
+
+  do_random_large_tests ();
   return ret;
 }
 
diff --git a/string/test-memmove.c b/string/test-memmove.c
index 670094c9dc..5ba79acf61 100644
--- a/string/test-memmove.c
+++ b/string/test-memmove.c
@@ -101,11 +101,11 @@ do_test (size_t align1, size_t align2, size_t len)
   size_t i, j;
   char *s1, *s2;
 
-  align1 &= 63;
+  align1 &= (getpagesize() - 1);
   if (align1 + len >= page_size)
     return;
 
-  align2 &= 63;
+  align2 &= (getpagesize() - 1);
   if (align2 + len >= page_size)
     return;
 
@@ -356,6 +356,51 @@ do_test3 (size_t bytes_move, size_t offset)
   munmap ((void *) buf, size);
 }
 
+static void
+do_test4 (size_t bytes_move, size_t offset1, size_t offset2)
+{
+  size_t size, repeats, i;
+  uint8_t *buf, *dst, *src;
+
+  size = bytes_move + MAX(offset1, offset2);
+  buf  = mmap(NULL, size, PROT_READ | PROT_WRITE,
+             MAP_PRIVATE | MAP_ANON, -1, 0);
+
+  if (buf == MAP_FAILED)
+    error (EXIT_UNSUPPORTED, errno, "mmap failed");
+
+  dst = &buf[offset1];
+  src = &buf[offset2];
+  for (repeats = 0; repeats < 2; ++repeats)
+    {
+      FOR_EACH_IMPL (impl, 0)
+        {
+          for (i = 0; i < bytes_move; i++)
+              src[i] = (uint8_t) i;
+#ifdef TEST_BCOPY
+          CALL (impl, (char *) src, (char *) dst, bytes_move);
+#else
+          CALL (impl, (char *) dst, (char *) src, bytes_move);
+#endif
+          for (i = 0; i < bytes_move; i++)
+            {
+              if (dst[i] != (uint8_t) i)
+                {
+                  error (0, 0,
+                         "Wrong result in function %s dst \"%p\" src \"%p\" offset \"%zd\"",
+                         impl->name, dst, buf, i);
+                  ret = 1;
+                  break;
+                }
+            }
+        }
+      dst = &buf[offset2];
+      src = &buf[offset1];
+    }
+  munmap ((void *) buf, size);
+}
+
+
 int
 test_main (void)
 {
@@ -396,13 +441,37 @@ test_main (void)
 
   do_random_tests ();
 
+  do_test2 (0);
   do_test2 (33);
   do_test2 (0x200000);
+  do_test2 (0x200000 - 1);
+  do_test2 (0x200000 + 1);
+  do_test2 (0x1000000 + 1);
   do_test2 (0x4000000 - 1);
   do_test2 (0x4000000);
 
+
   /* Copy 16KB data.  */
   do_test3 (16384, 3);
+  for (i = 4096; i <= 16384; i <<= 1)
+    {
+      do_test4 (i, 0, i);
+      do_test4 (i, 0, i - 1);
+      do_test4 (i, 0, i + 1);      
+      do_test4 (i, 63, i + 63);
+      do_test4 (i, 63, i + 64);
+      do_test4 (i, 63, i);
+
+      do_test4 (i, 0, 1);
+      do_test4 (i, 0, 15);
+      do_test4 (i, 0, 31);
+      do_test4 (i, 0, 63);
+      do_test4 (i, 0, 64);
+      do_test4 (i, 0, 65);
+      do_test4 (i, 0, 127);
+      do_test4 (i, 0, 129);
+    }
+
 
   return ret;
 }
-- 
2.25.1


^ permalink raw reply	[flat|nested] 18+ messages in thread
* [PATCH 2/5] benchtests: Add new random cases to bench-memcpy-random.c
@ 2021-08-24 17:09 Wilco Dijkstra
  2021-08-24 19:32 ` Noah Goldstein
  0 siblings, 1 reply; 18+ messages in thread
From: Wilco Dijkstra @ 2021-08-24 17:09 UTC (permalink / raw)
  To: Noah Goldstein; +Cc: 'GNU C Library'

Hi Noah,

> This commit adds three new benchmarks for the SPEC2017
> distribution. One randomized if dst > src and the other two set it
> either 1/0.

What is the goal of this? Have you ever seen memcpy do something
radically differently depending on whether src < dst (assuming no overlaps)?
Generally it's the same basic loop that gets used, so I don't see the usefulness
of adding this.

Some comments on the implementation details are below, there are
several issues that need to be resolved.

> As well add some tests for fixed sizes with randomize alignment and
> value of dst > src. This can be useful for testing different alignment
> configurations.

This tests fixed size copies from 1KB to 128KB. Large copies are typically
aligned, so I'm not sure it makes sense to use the random alignment 
distribution for small copies here.
 
-#define MIN_PAGE_SIZE (512*1024+getpagesize())
+#define MAX_TEST_SIZE (512*1024)

This makes sense. Please also use it in the loops. Also it may be
worthwhile to increase this value a bit if larger sizes are what you
are after (and the minimum size of 4K could be increased a bit as
well as L1/L2 caches have increased a lot in recent years).

+#define MIN_PAGE_SIZE (3*MAX_TEST_SIZE+3*getpagesize())

I don't get this at all. The goal is to copy within the MAX_TEST_SIZE
region, not to copy outside it. And why 3 times the size?

 typedef struct
 {
-  uint64_t src : 24;
-  uint64_t dst : 24;
-  uint64_t len : 16;
+/* 26 bits for src and dst so we have extra bit for alternating dst >
+   src without a branch.  */
+  uint64_t src : 26;
+  uint64_t dst : 26;
+  /* For size < 4096 12 bits is enough.  */
+  uint64_t len : 12;
 } copy_t;

This doesn't make any sense, where do we need offsets larger than 2^24?
 
+static size_t
+init_copy(size_t max_size, int dst_gt_src)
+{
+  size_t i, dst_offset, src_offset;
+  if (dst_gt_src <= 0)
+    {
+      dst_offset = 0;
+      src_offset = MAX_TEST_SIZE + getpagesize();
+    }
+  else
+    {
+      dst_offset = MAX_TEST_SIZE + getpagesize();
+      src_offset = 0;
+    }

This doesn't make sense since this is now copying outside of the region,
effectively enlarging the region but also always reading from one part and
writing to another. The data outside the region is uninitialized which means
it is using the zero page which gives unrepresentative results.

+    /* Create a random set of copies with the given size and alignment
      distributions.  */
   for (i = 0; i < MAX_COPIES; i++)
     {
+      dst_offset  = dst_gt_src == -1
+                        ? (rand() & 1) ? MAX_TEST_SIZE + getpagesize() : 0
+                        : dst_offset;

Same here. If you want to force src > dst then just skip cases where
src < dst. Similarly for avoiding accidental overlaps (which are likely
to happen for small values of max_size).

Given the much larger sizes used for some tests, it is also worth
ensuring the copy stays within the region.

       copy[i].dst = (rand () & (max_size - 1));
       copy[i].dst &= ~dst_align_arr[rand () & ALIGN_MASK];
+      copy[i].dst += dst_offset;
       copy[i].src = (rand () & (max_size - 1));
       copy[i].src &= ~src_align_arr[rand () & ALIGN_MASK];
+      copy[i].src += src_offset;
       copy[i].len = size_arr[rand () & SIZE_MASK];
     }
+  return i;
+}
 
+static void
+do_test (json_ctx_t *json_ctx, size_t max_size, int dst_gt_src)
+{
+  size_t n;
+  memset (buf1, 1, max_size);

Note this doesn't initialize the extra data in the buffer which creates
odd performance behaviour due to the zero page.

+  n = init_copy(max_size, dst_gt_src);
   json_element_object_begin (json_ctx);
-  json_attr_uint (json_ctx, "length", (double) max_size);
+  json_attr_uint (json_ctx, "max-alignment", (double) max_size);

max-alignment, where does that come from???
 
   json_array_begin (&json_ctx, "results");
   for (int i = 4; i <= 512; i = i * 2)
-    do_test (&json_ctx, i * 1024);
+    {
+      if (i * 1024 > MAX_TEST_SIZE)
+          continue;

It's much easier to change the for loop to i = 4096 to MAX_TEST_SIZE
and remove the various i * 1024. Then changing MAX_TEST_SIZE just works.

Cheers,
Wilco

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2021-08-26 17:06 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-24  8:27 [PATCH 1/5] string: Make tests birdirectional test-memcpy.c Noah Goldstein
2021-08-24  8:27 ` [PATCH 2/5] benchtests: Add new random cases to bench-memcpy-random.c Noah Goldstein
2021-08-24 15:18   ` H.J. Lu
2021-08-24  8:27 ` [PATCH 3/5] benchtests: Add partial overlap case in bench-memmove-walk.c Noah Goldstein
2021-08-24 15:18   ` H.J. Lu
2021-08-24  8:27 ` [PATCH 4/5] benchtests: Add additional cases to bench-memcpy.c and bench-memmove.c Noah Goldstein
2021-08-24 15:19   ` H.J. Lu
2021-08-24  8:27 ` [PATCH 5/5] X86-64: Optimize memmove-vec-unaligned-erms.S Noah Goldstein
2021-08-24  9:12   ` Noah Goldstein
2021-08-24 15:17 ` [PATCH 1/5] string: Make tests birdirectional test-memcpy.c H.J. Lu
2021-08-24 19:32 ` [PATCH v1 " Noah Goldstein
2021-08-24 19:32   ` [PATCH v1 2/5] benchtests: Add new random cases to bench-memcpy-random.c Noah Goldstein
2021-08-24 19:32   ` [PATCH v1 3/5] benchtests: Add partial overlap case in bench-memmove-walk.c Noah Goldstein
2021-08-24 19:32   ` [PATCH v1 4/5] benchtests: Add additional cases to bench-memcpy.c and bench-memmove.c Noah Goldstein
2021-08-24 19:32   ` [PATCH v1 5/5] X86-64: Optimize memmove-vec-unaligned-erms.S Noah Goldstein
2021-08-24 17:09 [PATCH 2/5] benchtests: Add new random cases to bench-memcpy-random.c Wilco Dijkstra
2021-08-24 19:32 ` Noah Goldstein
2021-08-26 17:06   ` Wilco Dijkstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).