* Re: [PATCH v1] x86: Fix wcsnlen-avx2 page cross length comparison [BZ #29591] [not found] ` <CAMe9rOobcrtq0dSy5pPqDNahBjJVd7LVs-iT6FcQw5VO7mRi=g@mail.gmail.com> @ 2022-11-23 22:20 ` Sunil Pandey 2022-11-24 0:22 ` H.J. Lu 0 siblings, 1 reply; 4+ messages in thread From: Sunil Pandey @ 2022-11-23 22:20 UTC (permalink / raw) To: H.J. Lu, Libc-stable Mailing List; +Cc: Noah Goldstein, libc-alpha On Wed, Sep 21, 2022 at 3:02 PM H.J. Lu via Libc-alpha <libc-alpha@sourceware.org> wrote: > > On Tue, Sep 20, 2022 at 5:58 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > Previous implementation was adjusting length (rsi) to match > > bytes (eax), but since there is no bound to length this can cause > > overflow. > > > > Fix is to just convert the byte-count (eax) to length by dividing by > > sizeof (wchar_t) before the comparison. > > > > Full check passes on x86-64 and build succeeds w/ and w/o multiarch. > > --- > > string/test-strnlen.c | 70 +++++++++++++++----------- > > sysdeps/x86_64/multiarch/strlen-avx2.S | 7 +-- > > 2 files changed, 43 insertions(+), 34 deletions(-) > > > > diff --git a/string/test-strnlen.c b/string/test-strnlen.c > > index 4a9375112a..5cbaf4b734 100644 > > --- a/string/test-strnlen.c > > +++ b/string/test-strnlen.c > > @@ -73,7 +73,7 @@ do_test (size_t align, size_t len, size_t maxlen, int max_char) > > { > > size_t i; > > > > - align &= 63; > > + align &= (getpagesize () / sizeof (CHAR) - 1); > > if ((align + len) * sizeof (CHAR) >= page_size) > > return; > > > > @@ -90,38 +90,50 @@ do_test (size_t align, size_t len, size_t maxlen, int max_char) > > static void > > do_overflow_tests (void) > > { > > - size_t i, j, len; > > + size_t i, j, al_idx, repeats, len; > > const size_t one = 1; > > uintptr_t buf_addr = (uintptr_t) buf1; > > + const size_t alignments[] = { 0, 1, 7, 9, 31, 33, 63, 65, 95, 97, 127, 129 }; > > > > - for (i = 0; i < 750; ++i) > > + for (al_idx = 0; al_idx < sizeof (alignments) / sizeof (alignments[0]); > > + al_idx++) > > { > > - do_test (1, i, SIZE_MAX, BIG_CHAR); > > - > > - do_test (0, i, SIZE_MAX - i, BIG_CHAR); > > - do_test (0, i, i - buf_addr, BIG_CHAR); > > - do_test (0, i, -buf_addr - i, BIG_CHAR); > > - do_test (0, i, SIZE_MAX - buf_addr - i, BIG_CHAR); > > - do_test (0, i, SIZE_MAX - buf_addr + i, BIG_CHAR); > > - > > - len = 0; > > - for (j = 8 * sizeof(size_t) - 1; j ; --j) > > - { > > - len |= one << j; > > - do_test (0, i, len - i, BIG_CHAR); > > - do_test (0, i, len + i, BIG_CHAR); > > - do_test (0, i, len - buf_addr - i, BIG_CHAR); > > - do_test (0, i, len - buf_addr + i, BIG_CHAR); > > - > > - do_test (0, i, ~len - i, BIG_CHAR); > > - do_test (0, i, ~len + i, BIG_CHAR); > > - do_test (0, i, ~len - buf_addr - i, BIG_CHAR); > > - do_test (0, i, ~len - buf_addr + i, BIG_CHAR); > > - > > - do_test (0, i, -buf_addr, BIG_CHAR); > > - do_test (0, i, j - buf_addr, BIG_CHAR); > > - do_test (0, i, -buf_addr - j, BIG_CHAR); > > - } > > + for (repeats = 0; repeats < 2; ++repeats) > > + { > > + size_t align = repeats ? (getpagesize () - alignments[al_idx]) > > + : alignments[al_idx]; > > + align /= sizeof (CHAR); > > + for (i = 0; i < 750; ++i) > > + { > > + do_test (align, i, SIZE_MAX, BIG_CHAR); > > + > > + do_test (align, i, SIZE_MAX - i, BIG_CHAR); > > + do_test (align, i, i - buf_addr, BIG_CHAR); > > + do_test (align, i, -buf_addr - i, BIG_CHAR); > > + do_test (align, i, SIZE_MAX - buf_addr - i, BIG_CHAR); > > + do_test (align, i, SIZE_MAX - buf_addr + i, BIG_CHAR); > > + > > + len = 0; > > + for (j = 8 * sizeof (size_t) - 1; j; --j) > > + { > > + len |= one << j; > > + do_test (align, i, len, BIG_CHAR); > > + do_test (align, i, len - i, BIG_CHAR); > > + do_test (align, i, len + i, BIG_CHAR); > > + do_test (align, i, len - buf_addr - i, BIG_CHAR); > > + do_test (align, i, len - buf_addr + i, BIG_CHAR); > > + > > + do_test (align, i, ~len - i, BIG_CHAR); > > + do_test (align, i, ~len + i, BIG_CHAR); > > + do_test (align, i, ~len - buf_addr - i, BIG_CHAR); > > + do_test (align, i, ~len - buf_addr + i, BIG_CHAR); > > + > > + do_test (align, i, -buf_addr, BIG_CHAR); > > + do_test (align, i, j - buf_addr, BIG_CHAR); > > + do_test (align, i, -buf_addr - j, BIG_CHAR); > > + } > > + } > > + } > > } > > } > > > > diff --git a/sysdeps/x86_64/multiarch/strlen-avx2.S b/sysdeps/x86_64/multiarch/strlen-avx2.S > > index 0593fb303b..b9b58ef599 100644 > > --- a/sysdeps/x86_64/multiarch/strlen-avx2.S > > +++ b/sysdeps/x86_64/multiarch/strlen-avx2.S > > @@ -544,14 +544,11 @@ L(return_vzeroupper): > > L(cross_page_less_vec): > > tzcntl %eax, %eax > > # ifdef USE_AS_WCSLEN > > - /* NB: Multiply length by 4 to get byte count. */ > > - sall $2, %esi > > + /* NB: Divide by 4 to convert from byte-count to length. */ > > + shrl $2, %eax > > # endif > > cmpq %rax, %rsi > > cmovb %esi, %eax > > -# ifdef USE_AS_WCSLEN > > - shrl $2, %eax > > -# endif > > VZEROUPPER_RETURN > > # endif > > > > -- > > 2.34.1 > > > > LGTM. > > Thanks. > > -- > H.J. I would like to backport this patch to affected release branches from 2.36 to 2.33. Any comments/suggestions or objections on this. --Sunil ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v1] x86: Fix wcsnlen-avx2 page cross length comparison [BZ #29591] 2022-11-23 22:20 ` [PATCH v1] x86: Fix wcsnlen-avx2 page cross length comparison [BZ #29591] Sunil Pandey @ 2022-11-24 0:22 ` H.J. Lu 2022-11-24 3:04 ` Sunil Pandey 0 siblings, 1 reply; 4+ messages in thread From: H.J. Lu @ 2022-11-24 0:22 UTC (permalink / raw) To: Sunil Pandey; +Cc: Libc-stable Mailing List, Noah Goldstein, libc-alpha On Wed, Nov 23, 2022 at 2:21 PM Sunil Pandey <skpgkp2@gmail.com> wrote: > > On Wed, Sep 21, 2022 at 3:02 PM H.J. Lu via Libc-alpha > <libc-alpha@sourceware.org> wrote: > > > > On Tue, Sep 20, 2022 at 5:58 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > > > Previous implementation was adjusting length (rsi) to match > > > bytes (eax), but since there is no bound to length this can cause > > > overflow. > > > > > > Fix is to just convert the byte-count (eax) to length by dividing by > > > sizeof (wchar_t) before the comparison. > > > > > > Full check passes on x86-64 and build succeeds w/ and w/o multiarch. > > > --- > > > string/test-strnlen.c | 70 +++++++++++++++----------- > > > sysdeps/x86_64/multiarch/strlen-avx2.S | 7 +-- > > > 2 files changed, 43 insertions(+), 34 deletions(-) > > > > > > diff --git a/string/test-strnlen.c b/string/test-strnlen.c > > > index 4a9375112a..5cbaf4b734 100644 > > > --- a/string/test-strnlen.c > > > +++ b/string/test-strnlen.c > > > @@ -73,7 +73,7 @@ do_test (size_t align, size_t len, size_t maxlen, int max_char) > > > { > > > size_t i; > > > > > > - align &= 63; > > > + align &= (getpagesize () / sizeof (CHAR) - 1); > > > if ((align + len) * sizeof (CHAR) >= page_size) > > > return; > > > > > > @@ -90,38 +90,50 @@ do_test (size_t align, size_t len, size_t maxlen, int max_char) > > > static void > > > do_overflow_tests (void) > > > { > > > - size_t i, j, len; > > > + size_t i, j, al_idx, repeats, len; > > > const size_t one = 1; > > > uintptr_t buf_addr = (uintptr_t) buf1; > > > + const size_t alignments[] = { 0, 1, 7, 9, 31, 33, 63, 65, 95, 97, 127, 129 }; > > > > > > - for (i = 0; i < 750; ++i) > > > + for (al_idx = 0; al_idx < sizeof (alignments) / sizeof (alignments[0]); > > > + al_idx++) > > > { > > > - do_test (1, i, SIZE_MAX, BIG_CHAR); > > > - > > > - do_test (0, i, SIZE_MAX - i, BIG_CHAR); > > > - do_test (0, i, i - buf_addr, BIG_CHAR); > > > - do_test (0, i, -buf_addr - i, BIG_CHAR); > > > - do_test (0, i, SIZE_MAX - buf_addr - i, BIG_CHAR); > > > - do_test (0, i, SIZE_MAX - buf_addr + i, BIG_CHAR); > > > - > > > - len = 0; > > > - for (j = 8 * sizeof(size_t) - 1; j ; --j) > > > - { > > > - len |= one << j; > > > - do_test (0, i, len - i, BIG_CHAR); > > > - do_test (0, i, len + i, BIG_CHAR); > > > - do_test (0, i, len - buf_addr - i, BIG_CHAR); > > > - do_test (0, i, len - buf_addr + i, BIG_CHAR); > > > - > > > - do_test (0, i, ~len - i, BIG_CHAR); > > > - do_test (0, i, ~len + i, BIG_CHAR); > > > - do_test (0, i, ~len - buf_addr - i, BIG_CHAR); > > > - do_test (0, i, ~len - buf_addr + i, BIG_CHAR); > > > - > > > - do_test (0, i, -buf_addr, BIG_CHAR); > > > - do_test (0, i, j - buf_addr, BIG_CHAR); > > > - do_test (0, i, -buf_addr - j, BIG_CHAR); > > > - } > > > + for (repeats = 0; repeats < 2; ++repeats) > > > + { > > > + size_t align = repeats ? (getpagesize () - alignments[al_idx]) > > > + : alignments[al_idx]; > > > + align /= sizeof (CHAR); > > > + for (i = 0; i < 750; ++i) > > > + { > > > + do_test (align, i, SIZE_MAX, BIG_CHAR); > > > + > > > + do_test (align, i, SIZE_MAX - i, BIG_CHAR); > > > + do_test (align, i, i - buf_addr, BIG_CHAR); > > > + do_test (align, i, -buf_addr - i, BIG_CHAR); > > > + do_test (align, i, SIZE_MAX - buf_addr - i, BIG_CHAR); > > > + do_test (align, i, SIZE_MAX - buf_addr + i, BIG_CHAR); > > > + > > > + len = 0; > > > + for (j = 8 * sizeof (size_t) - 1; j; --j) > > > + { > > > + len |= one << j; > > > + do_test (align, i, len, BIG_CHAR); > > > + do_test (align, i, len - i, BIG_CHAR); > > > + do_test (align, i, len + i, BIG_CHAR); > > > + do_test (align, i, len - buf_addr - i, BIG_CHAR); > > > + do_test (align, i, len - buf_addr + i, BIG_CHAR); > > > + > > > + do_test (align, i, ~len - i, BIG_CHAR); > > > + do_test (align, i, ~len + i, BIG_CHAR); > > > + do_test (align, i, ~len - buf_addr - i, BIG_CHAR); > > > + do_test (align, i, ~len - buf_addr + i, BIG_CHAR); > > > + > > > + do_test (align, i, -buf_addr, BIG_CHAR); > > > + do_test (align, i, j - buf_addr, BIG_CHAR); > > > + do_test (align, i, -buf_addr - j, BIG_CHAR); > > > + } > > > + } > > > + } > > > } > > > } > > > > > > diff --git a/sysdeps/x86_64/multiarch/strlen-avx2.S b/sysdeps/x86_64/multiarch/strlen-avx2.S > > > index 0593fb303b..b9b58ef599 100644 > > > --- a/sysdeps/x86_64/multiarch/strlen-avx2.S > > > +++ b/sysdeps/x86_64/multiarch/strlen-avx2.S > > > @@ -544,14 +544,11 @@ L(return_vzeroupper): > > > L(cross_page_less_vec): > > > tzcntl %eax, %eax > > > # ifdef USE_AS_WCSLEN > > > - /* NB: Multiply length by 4 to get byte count. */ > > > - sall $2, %esi > > > + /* NB: Divide by 4 to convert from byte-count to length. */ > > > + shrl $2, %eax > > > # endif > > > cmpq %rax, %rsi > > > cmovb %esi, %eax > > > -# ifdef USE_AS_WCSLEN > > > - shrl $2, %eax > > > -# endif > > > VZEROUPPER_RETURN > > > # endif > > > > > > -- > > > 2.34.1 > > > > > > > LGTM. > > > > Thanks. > > > > -- > > H.J. > > I would like to backport this patch to affected release branches from > 2.36 to 2.33. > > Any comments/suggestions or objections on this. > OK. Thanks. -- H.J. ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v1] x86: Fix wcsnlen-avx2 page cross length comparison [BZ #29591] 2022-11-24 0:22 ` H.J. Lu @ 2022-11-24 3:04 ` Sunil Pandey 2022-11-24 3:12 ` H.J. Lu 0 siblings, 1 reply; 4+ messages in thread From: Sunil Pandey @ 2022-11-24 3:04 UTC (permalink / raw) To: H.J. Lu; +Cc: Libc-stable Mailing List, Noah Goldstein, libc-alpha On Wed, Nov 23, 2022 at 4:23 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > On Wed, Nov 23, 2022 at 2:21 PM Sunil Pandey <skpgkp2@gmail.com> wrote: > > > > On Wed, Sep 21, 2022 at 3:02 PM H.J. Lu via Libc-alpha > > <libc-alpha@sourceware.org> wrote: > > > > > > On Tue, Sep 20, 2022 at 5:58 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > > > > > Previous implementation was adjusting length (rsi) to match > > > > bytes (eax), but since there is no bound to length this can cause > > > > overflow. > > > > > > > > Fix is to just convert the byte-count (eax) to length by dividing by > > > > sizeof (wchar_t) before the comparison. > > > > > > > > Full check passes on x86-64 and build succeeds w/ and w/o multiarch. > > > > --- > > > > string/test-strnlen.c | 70 +++++++++++++++----------- > > > > sysdeps/x86_64/multiarch/strlen-avx2.S | 7 +-- > > > > 2 files changed, 43 insertions(+), 34 deletions(-) > > > > > > > > diff --git a/string/test-strnlen.c b/string/test-strnlen.c > > > > index 4a9375112a..5cbaf4b734 100644 > > > > --- a/string/test-strnlen.c > > > > +++ b/string/test-strnlen.c > > > > @@ -73,7 +73,7 @@ do_test (size_t align, size_t len, size_t maxlen, int max_char) > > > > { > > > > size_t i; > > > > > > > > - align &= 63; > > > > + align &= (getpagesize () / sizeof (CHAR) - 1); > > > > if ((align + len) * sizeof (CHAR) >= page_size) > > > > return; > > > > > > > > @@ -90,38 +90,50 @@ do_test (size_t align, size_t len, size_t maxlen, int max_char) > > > > static void > > > > do_overflow_tests (void) > > > > { > > > > - size_t i, j, len; > > > > + size_t i, j, al_idx, repeats, len; > > > > const size_t one = 1; > > > > uintptr_t buf_addr = (uintptr_t) buf1; > > > > + const size_t alignments[] = { 0, 1, 7, 9, 31, 33, 63, 65, 95, 97, 127, 129 }; > > > > > > > > - for (i = 0; i < 750; ++i) > > > > + for (al_idx = 0; al_idx < sizeof (alignments) / sizeof (alignments[0]); > > > > + al_idx++) > > > > { > > > > - do_test (1, i, SIZE_MAX, BIG_CHAR); > > > > - > > > > - do_test (0, i, SIZE_MAX - i, BIG_CHAR); > > > > - do_test (0, i, i - buf_addr, BIG_CHAR); > > > > - do_test (0, i, -buf_addr - i, BIG_CHAR); > > > > - do_test (0, i, SIZE_MAX - buf_addr - i, BIG_CHAR); > > > > - do_test (0, i, SIZE_MAX - buf_addr + i, BIG_CHAR); > > > > - > > > > - len = 0; > > > > - for (j = 8 * sizeof(size_t) - 1; j ; --j) > > > > - { > > > > - len |= one << j; > > > > - do_test (0, i, len - i, BIG_CHAR); > > > > - do_test (0, i, len + i, BIG_CHAR); > > > > - do_test (0, i, len - buf_addr - i, BIG_CHAR); > > > > - do_test (0, i, len - buf_addr + i, BIG_CHAR); > > > > - > > > > - do_test (0, i, ~len - i, BIG_CHAR); > > > > - do_test (0, i, ~len + i, BIG_CHAR); > > > > - do_test (0, i, ~len - buf_addr - i, BIG_CHAR); > > > > - do_test (0, i, ~len - buf_addr + i, BIG_CHAR); > > > > - > > > > - do_test (0, i, -buf_addr, BIG_CHAR); > > > > - do_test (0, i, j - buf_addr, BIG_CHAR); > > > > - do_test (0, i, -buf_addr - j, BIG_CHAR); > > > > - } > > > > + for (repeats = 0; repeats < 2; ++repeats) > > > > + { > > > > + size_t align = repeats ? (getpagesize () - alignments[al_idx]) > > > > + : alignments[al_idx]; > > > > + align /= sizeof (CHAR); > > > > + for (i = 0; i < 750; ++i) > > > > + { > > > > + do_test (align, i, SIZE_MAX, BIG_CHAR); > > > > + > > > > + do_test (align, i, SIZE_MAX - i, BIG_CHAR); > > > > + do_test (align, i, i - buf_addr, BIG_CHAR); > > > > + do_test (align, i, -buf_addr - i, BIG_CHAR); > > > > + do_test (align, i, SIZE_MAX - buf_addr - i, BIG_CHAR); > > > > + do_test (align, i, SIZE_MAX - buf_addr + i, BIG_CHAR); > > > > + > > > > + len = 0; > > > > + for (j = 8 * sizeof (size_t) - 1; j; --j) > > > > + { > > > > + len |= one << j; > > > > + do_test (align, i, len, BIG_CHAR); > > > > + do_test (align, i, len - i, BIG_CHAR); > > > > + do_test (align, i, len + i, BIG_CHAR); > > > > + do_test (align, i, len - buf_addr - i, BIG_CHAR); > > > > + do_test (align, i, len - buf_addr + i, BIG_CHAR); > > > > + > > > > + do_test (align, i, ~len - i, BIG_CHAR); > > > > + do_test (align, i, ~len + i, BIG_CHAR); > > > > + do_test (align, i, ~len - buf_addr - i, BIG_CHAR); > > > > + do_test (align, i, ~len - buf_addr + i, BIG_CHAR); > > > > + > > > > + do_test (align, i, -buf_addr, BIG_CHAR); > > > > + do_test (align, i, j - buf_addr, BIG_CHAR); > > > > + do_test (align, i, -buf_addr - j, BIG_CHAR); > > > > + } > > > > + } > > > > + } > > > > } > > > > } > > > > > > > > diff --git a/sysdeps/x86_64/multiarch/strlen-avx2.S b/sysdeps/x86_64/multiarch/strlen-avx2.S > > > > index 0593fb303b..b9b58ef599 100644 > > > > --- a/sysdeps/x86_64/multiarch/strlen-avx2.S > > > > +++ b/sysdeps/x86_64/multiarch/strlen-avx2.S > > > > @@ -544,14 +544,11 @@ L(return_vzeroupper): > > > > L(cross_page_less_vec): > > > > tzcntl %eax, %eax > > > > # ifdef USE_AS_WCSLEN > > > > - /* NB: Multiply length by 4 to get byte count. */ > > > > - sall $2, %esi > > > > + /* NB: Divide by 4 to convert from byte-count to length. */ > > > > + shrl $2, %eax > > > > # endif > > > > cmpq %rax, %rsi > > > > cmovb %esi, %eax > > > > -# ifdef USE_AS_WCSLEN > > > > - shrl $2, %eax > > > > -# endif > > > > VZEROUPPER_RETURN > > > > # endif > > > > > > > > -- > > > > 2.34.1 > > > > > > > > > > LGTM. > > > > > > Thanks. > > > > > > -- > > > H.J. > > > > I would like to backport this patch to affected release branches from > > 2.36 to 2.33. > > > > Any comments/suggestions or objections on this. > > > > OK. > > Thanks. > > > -- > H.J. Just ran testing from 2.32 to 2.26. All of them have this issue. Ok for 2.32 to 2.26 branches? --Sunil ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v1] x86: Fix wcsnlen-avx2 page cross length comparison [BZ #29591] 2022-11-24 3:04 ` Sunil Pandey @ 2022-11-24 3:12 ` H.J. Lu 0 siblings, 0 replies; 4+ messages in thread From: H.J. Lu @ 2022-11-24 3:12 UTC (permalink / raw) To: Sunil Pandey; +Cc: Libc-stable Mailing List, Noah Goldstein, libc-alpha On Wed, Nov 23, 2022 at 7:04 PM Sunil Pandey <skpgkp2@gmail.com> wrote: > > On Wed, Nov 23, 2022 at 4:23 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > On Wed, Nov 23, 2022 at 2:21 PM Sunil Pandey <skpgkp2@gmail.com> wrote: > > > > > > On Wed, Sep 21, 2022 at 3:02 PM H.J. Lu via Libc-alpha > > > <libc-alpha@sourceware.org> wrote: > > > > > > > > On Tue, Sep 20, 2022 at 5:58 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote: > > > > > > > > > > Previous implementation was adjusting length (rsi) to match > > > > > bytes (eax), but since there is no bound to length this can cause > > > > > overflow. > > > > > > > > > > Fix is to just convert the byte-count (eax) to length by dividing by > > > > > sizeof (wchar_t) before the comparison. > > > > > > > > > > Full check passes on x86-64 and build succeeds w/ and w/o multiarch. > > > > > --- > > > > > string/test-strnlen.c | 70 +++++++++++++++----------- > > > > > sysdeps/x86_64/multiarch/strlen-avx2.S | 7 +-- > > > > > 2 files changed, 43 insertions(+), 34 deletions(-) > > > > > > > > > > diff --git a/string/test-strnlen.c b/string/test-strnlen.c > > > > > index 4a9375112a..5cbaf4b734 100644 > > > > > --- a/string/test-strnlen.c > > > > > +++ b/string/test-strnlen.c > > > > > @@ -73,7 +73,7 @@ do_test (size_t align, size_t len, size_t maxlen, int max_char) > > > > > { > > > > > size_t i; > > > > > > > > > > - align &= 63; > > > > > + align &= (getpagesize () / sizeof (CHAR) - 1); > > > > > if ((align + len) * sizeof (CHAR) >= page_size) > > > > > return; > > > > > > > > > > @@ -90,38 +90,50 @@ do_test (size_t align, size_t len, size_t maxlen, int max_char) > > > > > static void > > > > > do_overflow_tests (void) > > > > > { > > > > > - size_t i, j, len; > > > > > + size_t i, j, al_idx, repeats, len; > > > > > const size_t one = 1; > > > > > uintptr_t buf_addr = (uintptr_t) buf1; > > > > > + const size_t alignments[] = { 0, 1, 7, 9, 31, 33, 63, 65, 95, 97, 127, 129 }; > > > > > > > > > > - for (i = 0; i < 750; ++i) > > > > > + for (al_idx = 0; al_idx < sizeof (alignments) / sizeof (alignments[0]); > > > > > + al_idx++) > > > > > { > > > > > - do_test (1, i, SIZE_MAX, BIG_CHAR); > > > > > - > > > > > - do_test (0, i, SIZE_MAX - i, BIG_CHAR); > > > > > - do_test (0, i, i - buf_addr, BIG_CHAR); > > > > > - do_test (0, i, -buf_addr - i, BIG_CHAR); > > > > > - do_test (0, i, SIZE_MAX - buf_addr - i, BIG_CHAR); > > > > > - do_test (0, i, SIZE_MAX - buf_addr + i, BIG_CHAR); > > > > > - > > > > > - len = 0; > > > > > - for (j = 8 * sizeof(size_t) - 1; j ; --j) > > > > > - { > > > > > - len |= one << j; > > > > > - do_test (0, i, len - i, BIG_CHAR); > > > > > - do_test (0, i, len + i, BIG_CHAR); > > > > > - do_test (0, i, len - buf_addr - i, BIG_CHAR); > > > > > - do_test (0, i, len - buf_addr + i, BIG_CHAR); > > > > > - > > > > > - do_test (0, i, ~len - i, BIG_CHAR); > > > > > - do_test (0, i, ~len + i, BIG_CHAR); > > > > > - do_test (0, i, ~len - buf_addr - i, BIG_CHAR); > > > > > - do_test (0, i, ~len - buf_addr + i, BIG_CHAR); > > > > > - > > > > > - do_test (0, i, -buf_addr, BIG_CHAR); > > > > > - do_test (0, i, j - buf_addr, BIG_CHAR); > > > > > - do_test (0, i, -buf_addr - j, BIG_CHAR); > > > > > - } > > > > > + for (repeats = 0; repeats < 2; ++repeats) > > > > > + { > > > > > + size_t align = repeats ? (getpagesize () - alignments[al_idx]) > > > > > + : alignments[al_idx]; > > > > > + align /= sizeof (CHAR); > > > > > + for (i = 0; i < 750; ++i) > > > > > + { > > > > > + do_test (align, i, SIZE_MAX, BIG_CHAR); > > > > > + > > > > > + do_test (align, i, SIZE_MAX - i, BIG_CHAR); > > > > > + do_test (align, i, i - buf_addr, BIG_CHAR); > > > > > + do_test (align, i, -buf_addr - i, BIG_CHAR); > > > > > + do_test (align, i, SIZE_MAX - buf_addr - i, BIG_CHAR); > > > > > + do_test (align, i, SIZE_MAX - buf_addr + i, BIG_CHAR); > > > > > + > > > > > + len = 0; > > > > > + for (j = 8 * sizeof (size_t) - 1; j; --j) > > > > > + { > > > > > + len |= one << j; > > > > > + do_test (align, i, len, BIG_CHAR); > > > > > + do_test (align, i, len - i, BIG_CHAR); > > > > > + do_test (align, i, len + i, BIG_CHAR); > > > > > + do_test (align, i, len - buf_addr - i, BIG_CHAR); > > > > > + do_test (align, i, len - buf_addr + i, BIG_CHAR); > > > > > + > > > > > + do_test (align, i, ~len - i, BIG_CHAR); > > > > > + do_test (align, i, ~len + i, BIG_CHAR); > > > > > + do_test (align, i, ~len - buf_addr - i, BIG_CHAR); > > > > > + do_test (align, i, ~len - buf_addr + i, BIG_CHAR); > > > > > + > > > > > + do_test (align, i, -buf_addr, BIG_CHAR); > > > > > + do_test (align, i, j - buf_addr, BIG_CHAR); > > > > > + do_test (align, i, -buf_addr - j, BIG_CHAR); > > > > > + } > > > > > + } > > > > > + } > > > > > } > > > > > } > > > > > > > > > > diff --git a/sysdeps/x86_64/multiarch/strlen-avx2.S b/sysdeps/x86_64/multiarch/strlen-avx2.S > > > > > index 0593fb303b..b9b58ef599 100644 > > > > > --- a/sysdeps/x86_64/multiarch/strlen-avx2.S > > > > > +++ b/sysdeps/x86_64/multiarch/strlen-avx2.S > > > > > @@ -544,14 +544,11 @@ L(return_vzeroupper): > > > > > L(cross_page_less_vec): > > > > > tzcntl %eax, %eax > > > > > # ifdef USE_AS_WCSLEN > > > > > - /* NB: Multiply length by 4 to get byte count. */ > > > > > - sall $2, %esi > > > > > + /* NB: Divide by 4 to convert from byte-count to length. */ > > > > > + shrl $2, %eax > > > > > # endif > > > > > cmpq %rax, %rsi > > > > > cmovb %esi, %eax > > > > > -# ifdef USE_AS_WCSLEN > > > > > - shrl $2, %eax > > > > > -# endif > > > > > VZEROUPPER_RETURN > > > > > # endif > > > > > > > > > > -- > > > > > 2.34.1 > > > > > > > > > > > > > LGTM. > > > > > > > > Thanks. > > > > > > > > -- > > > > H.J. > > > > > > I would like to backport this patch to affected release branches from > > > 2.36 to 2.33. > > > > > > Any comments/suggestions or objections on this. > > > > > > > OK. > > > > Thanks. > > > > > > -- > > H.J. > > Just ran testing from 2.32 to 2.26. All of them have this issue. > > Ok for 2.32 to 2.26 branches? > > --Sunil Let's stop at 2.28. -- H.J. ^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2022-11-24 3:12 UTC | newest] Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- [not found] <20220921005804.7131-1-goldstein.w.n@gmail.com> [not found] ` <CAMe9rOobcrtq0dSy5pPqDNahBjJVd7LVs-iT6FcQw5VO7mRi=g@mail.gmail.com> 2022-11-23 22:20 ` [PATCH v1] x86: Fix wcsnlen-avx2 page cross length comparison [BZ #29591] Sunil Pandey 2022-11-24 0:22 ` H.J. Lu 2022-11-24 3:04 ` Sunil Pandey 2022-11-24 3:12 ` H.J. Lu
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).