From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-oi1-x22f.google.com (mail-oi1-x22f.google.com [IPv6:2607:f8b0:4864:20::22f]) by sourceware.org (Postfix) with ESMTPS id 3DD3B385043C for ; Mon, 8 Feb 2021 19:33:39 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.3.2 sourceware.org 3DD3B385043C Received: by mail-oi1-x22f.google.com with SMTP id k204so15268217oih.3 for ; Mon, 08 Feb 2021 11:33:39 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:mime-version:references:in-reply-to:from:date :message-id:subject:to:cc; bh=8g9Q2zwZFe5Xba9w3Rxt42qiP95vLA8R90nF99rh/74=; b=j3gZKYvvPyTeL5vsVCx7/VEj0Uq3kvXyJ9aFQNRCI0ilBg7YTpjjgqWT7DC8cY3oRr dDPsb78LEExdG9ZkGVi69r0H2HbO5tJxgigrDdopEjWV2ufoDi5mCAXNs7DHwIeYO9Pn zmYkoIBZhQKzSpJXU7r9wjdwRobJa3Wr6itRnIFCXwrJkx9IYWKqB7vCvErX1U8UUbQg q8yBeBu/nDL/pOICjAZeVc1s5YQG4uUtYkNS33U0pKmx3LKZx8qR3UMUaJ2cXzPN2Urd Ddh4ZUfQpq6zcpu4R5Om6yyl3uqztSDxtSTrMJbNdiTMmkTzu24c8hU51W+NZC/cQrSY rjJg== X-Gm-Message-State: AOAM533C2cb7ube1YFcHydkoyy10chT3EYXvjIO36N8Ysjjq78qWVZxH xx3zxLAuOj1h1xm8g3JFMEwgKJsxe99Dl1+5o2c= X-Google-Smtp-Source: ABdhPJwCuDea5piDLKAS+3TWXIeJYK0gorXx8EW44EElLyTC1P06LHkxzGF12t6HRZhhoxFhjxLnw+8PhysAlIdSAAg= X-Received: by 2002:aca:df84:: with SMTP id w126mr227405oig.58.1612812818534; Mon, 08 Feb 2021 11:33:38 -0800 (PST) MIME-Version: 1.0 References: <20210203053900.4125403-1-goldstein.w.n@gmail.com> In-Reply-To: From: "H.J. Lu" Date: Mon, 8 Feb 2021 11:33:02 -0800 Message-ID: Subject: Re: [PATCH v4 1/2] x86: Refactor and improve performance of strchr-avx2.S To: noah Cc: GNU C Library , "Carlos O'Donell" Content-Type: multipart/mixed; boundary="00000000000023701605bad83f6e" X-Spam-Status: No, score=-3035.7 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, FREEMAIL_FROM, GIT_PATCH_0, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP autolearn=ham autolearn_force=no version=3.4.2 X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on server2.sourceware.org X-BeenThere: libc-alpha@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libc-alpha mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 08 Feb 2021 19:33:41 -0000 --00000000000023701605bad83f6e Content-Type: text/plain; charset="UTF-8" On Mon, Feb 8, 2021 at 6:08 AM H.J. Lu wrote: > > On Tue, Feb 2, 2021 at 9:39 PM wrote: > > > > From: noah > > > > No bug. Just seemed the performance could be improved a bit. Observed > > and expected behavior are unchanged. Optimized body of main > > loop. Updated page cross logic and optimized accordingly. Made a few > > minor instruction selection modifications. No regressions in test > > suite. Both test-strchrnul and test-strchr passed. > > > > Signed-off-by: noah > > --- > > sysdeps/x86_64/multiarch/strchr-avx2.S | 235 ++++++++++++------------- > > sysdeps/x86_64/multiarch/strchr.c | 1 + > > 2 files changed, 118 insertions(+), 118 deletions(-) > > > > diff --git a/sysdeps/x86_64/multiarch/strchr-avx2.S b/sysdeps/x86_64/multiarch/strchr-avx2.S > > index d416558d04..8b9d78b55a 100644 > > --- a/sysdeps/x86_64/multiarch/strchr-avx2.S > > +++ b/sysdeps/x86_64/multiarch/strchr-avx2.S > > @@ -27,10 +27,12 @@ > > # ifdef USE_AS_WCSCHR > > # define VPBROADCAST vpbroadcastd > > # define VPCMPEQ vpcmpeqd > > +# define VPMINU vpminud > > # define CHAR_REG esi > > # else > > # define VPBROADCAST vpbroadcastb > > # define VPCMPEQ vpcmpeqb > > +# define VPMINU vpminub > > # define CHAR_REG sil > > # endif > > > > @@ -39,20 +41,26 @@ > > # endif > > > > # define VEC_SIZE 32 > > +# define PAGE_SIZE 4096 > > > > .section .text.avx,"ax",@progbits > > ENTRY (STRCHR) > > movl %edi, %ecx > > - /* Broadcast CHAR to YMM0. */ > > +# ifndef USE_AS_STRCHRNUL > > + xorl %edx, %edx > > +# endif > > + > > + /* Broadcast CHAR to YMM0. */ > > vmovd %esi, %xmm0 > > vpxor %xmm9, %xmm9, %xmm9 > > VPBROADCAST %xmm0, %ymm0 > > - /* Check if we may cross page boundary with one vector load. */ > > - andl $(2 * VEC_SIZE - 1), %ecx > > - cmpl $VEC_SIZE, %ecx > > - ja L(cros_page_boundary) > > - > > - /* Check the first VEC_SIZE bytes. Search for both CHAR and the > > + > > + /* Check if we cross page boundary with one vector load. */ > > + andl $(PAGE_SIZE - 1), %ecx > > + cmpl $(PAGE_SIZE - VEC_SIZE), %ecx > > + ja L(cross_page_boundary) > > + > > + /* Check the first VEC_SIZE bytes. Search for both CHAR and the > > null byte. */ > > vmovdqu (%rdi), %ymm8 > > VPCMPEQ %ymm8, %ymm0, %ymm1 > > @@ -60,50 +68,27 @@ ENTRY (STRCHR) > > vpor %ymm1, %ymm2, %ymm1 > > vpmovmskb %ymm1, %eax > > testl %eax, %eax > > - jnz L(first_vec_x0) > > - > > - /* Align data for aligned loads in the loop. */ > > - addq $VEC_SIZE, %rdi > > - andl $(VEC_SIZE - 1), %ecx > > - andq $-VEC_SIZE, %rdi > > - > > - jmp L(more_4x_vec) > > - > > - .p2align 4 > > -L(cros_page_boundary): > > - andl $(VEC_SIZE - 1), %ecx > > - andq $-VEC_SIZE, %rdi > > - vmovdqu (%rdi), %ymm8 > > - VPCMPEQ %ymm8, %ymm0, %ymm1 > > - VPCMPEQ %ymm8, %ymm9, %ymm2 > > - vpor %ymm1, %ymm2, %ymm1 > > - vpmovmskb %ymm1, %eax > > - /* Remove the leading bytes. */ > > - sarl %cl, %eax > > - testl %eax, %eax > > - jz L(aligned_more) > > - /* Found CHAR or the null byte. */ > > + jz L(more_vecs) > > tzcntl %eax, %eax > > - addq %rcx, %rax > > -# ifdef USE_AS_STRCHRNUL > > + /* Found CHAR or the null byte. */ > > addq %rdi, %rax > > -# else > > - xorl %edx, %edx > > - leaq (%rdi, %rax), %rax > > - cmp (%rax), %CHAR_REG > > +# ifndef USE_AS_STRCHRNUL > > + cmp (%rax), %CHAR_REG > > cmovne %rdx, %rax > > # endif > > VZEROUPPER > > ret > > > > .p2align 4 > > +L(more_vecs): > > + /* Align data for aligned loads in the loop. */ > > + andq $-VEC_SIZE, %rdi > > L(aligned_more): > > - addq $VEC_SIZE, %rdi > > > > -L(more_4x_vec): > > - /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time > > - since data is only aligned to VEC_SIZE. */ > > - vmovdqa (%rdi), %ymm8 > > + /* Check the next 4 * VEC_SIZE. Only one VEC_SIZE at a time > > + since data is only aligned to VEC_SIZE. */ > > + vmovdqa VEC_SIZE(%rdi), %ymm8 > > + addq $VEC_SIZE, %rdi > > VPCMPEQ %ymm8, %ymm0, %ymm1 > > VPCMPEQ %ymm8, %ymm9, %ymm2 > > vpor %ymm1, %ymm2, %ymm1 > > @@ -125,7 +110,7 @@ L(more_4x_vec): > > vpor %ymm1, %ymm2, %ymm1 > > vpmovmskb %ymm1, %eax > > testl %eax, %eax > > - jnz L(first_vec_x2) > > + jnz L(first_vec_x2) > > > > vmovdqa (VEC_SIZE * 3)(%rdi), %ymm8 > > VPCMPEQ %ymm8, %ymm0, %ymm1 > > @@ -133,122 +118,136 @@ L(more_4x_vec): > > vpor %ymm1, %ymm2, %ymm1 > > vpmovmskb %ymm1, %eax > > testl %eax, %eax > > - jnz L(first_vec_x3) > > - > > - addq $(VEC_SIZE * 4), %rdi > > - > > - /* Align data to 4 * VEC_SIZE. */ > > - movq %rdi, %rcx > > - andl $(4 * VEC_SIZE - 1), %ecx > > - andq $-(4 * VEC_SIZE), %rdi > > - > > - .p2align 4 > > -L(loop_4x_vec): > > - /* Compare 4 * VEC at a time forward. */ > > - vmovdqa (%rdi), %ymm5 > > - vmovdqa VEC_SIZE(%rdi), %ymm6 > > - vmovdqa (VEC_SIZE * 2)(%rdi), %ymm7 > > - vmovdqa (VEC_SIZE * 3)(%rdi), %ymm8 > > - > > - VPCMPEQ %ymm5, %ymm0, %ymm1 > > - VPCMPEQ %ymm6, %ymm0, %ymm2 > > - VPCMPEQ %ymm7, %ymm0, %ymm3 > > - VPCMPEQ %ymm8, %ymm0, %ymm4 > > - > > - VPCMPEQ %ymm5, %ymm9, %ymm5 > > - VPCMPEQ %ymm6, %ymm9, %ymm6 > > - VPCMPEQ %ymm7, %ymm9, %ymm7 > > - VPCMPEQ %ymm8, %ymm9, %ymm8 > > - > > - vpor %ymm1, %ymm5, %ymm1 > > - vpor %ymm2, %ymm6, %ymm2 > > - vpor %ymm3, %ymm7, %ymm3 > > - vpor %ymm4, %ymm8, %ymm4 > > - > > - vpor %ymm1, %ymm2, %ymm5 > > - vpor %ymm3, %ymm4, %ymm6 > > - > > - vpor %ymm5, %ymm6, %ymm5 > > - > > - vpmovmskb %ymm5, %eax > > - testl %eax, %eax > > - jnz L(4x_vec_end) > > - > > - addq $(VEC_SIZE * 4), %rdi > > + jz L(prep_loop_4x) > > > > - jmp L(loop_4x_vec) > > + tzcntl %eax, %eax > > + leaq (VEC_SIZE * 3)(%rdi, %rax), %rax > > +# ifndef USE_AS_STRCHRNUL > > + cmp (%rax), %CHAR_REG > > + cmovne %rdx, %rax > > +# endif > > + VZEROUPPER > > + ret > > > > .p2align 4 > > L(first_vec_x0): > > - /* Found CHAR or the null byte. */ > > tzcntl %eax, %eax > > -# ifdef USE_AS_STRCHRNUL > > + /* Found CHAR or the null byte. */ > > addq %rdi, %rax > > -# else > > - xorl %edx, %edx > > - leaq (%rdi, %rax), %rax > > - cmp (%rax), %CHAR_REG > > +# ifndef USE_AS_STRCHRNUL > > + cmp (%rax), %CHAR_REG > > cmovne %rdx, %rax > > # endif > > VZEROUPPER > > ret > > - > > + > > .p2align 4 > > L(first_vec_x1): > > tzcntl %eax, %eax > > -# ifdef USE_AS_STRCHRNUL > > - addq $VEC_SIZE, %rax > > - addq %rdi, %rax > > -# else > > - xorl %edx, %edx > > leaq VEC_SIZE(%rdi, %rax), %rax > > - cmp (%rax), %CHAR_REG > > +# ifndef USE_AS_STRCHRNUL > > + cmp (%rax), %CHAR_REG > > cmovne %rdx, %rax > > # endif > > VZEROUPPER > > - ret > > - > > + ret > > + > > .p2align 4 > > L(first_vec_x2): > > tzcntl %eax, %eax > > -# ifdef USE_AS_STRCHRNUL > > - addq $(VEC_SIZE * 2), %rax > > - addq %rdi, %rax > > -# else > > - xorl %edx, %edx > > + /* Found CHAR or the null byte. */ > > leaq (VEC_SIZE * 2)(%rdi, %rax), %rax > > - cmp (%rax), %CHAR_REG > > +# ifndef USE_AS_STRCHRNUL > > + cmp (%rax), %CHAR_REG > > cmovne %rdx, %rax > > # endif > > VZEROUPPER > > ret > > + > > +L(prep_loop_4x): > > + /* Align data to 4 * VEC_SIZE. */ > > + andq $-(VEC_SIZE * 4), %rdi > > > > .p2align 4 > > -L(4x_vec_end): > > +L(loop_4x_vec): > > + /* Compare 4 * VEC at a time forward. */ > > + vmovdqa (VEC_SIZE * 4)(%rdi), %ymm5 > > + vmovdqa (VEC_SIZE * 5)(%rdi), %ymm6 > > + vmovdqa (VEC_SIZE * 6)(%rdi), %ymm7 > > + vmovdqa (VEC_SIZE * 7)(%rdi), %ymm8 > > + > > + /* Leaves only CHARS matching esi as 0. */ > > + vpxor %ymm5, %ymm0, %ymm1 > > + vpxor %ymm6, %ymm0, %ymm2 > > + vpxor %ymm7, %ymm0, %ymm3 > > + vpxor %ymm8, %ymm0, %ymm4 > > + > > + VPMINU %ymm1, %ymm5, %ymm1 > > + VPMINU %ymm2, %ymm6, %ymm2 > > + VPMINU %ymm3, %ymm7, %ymm3 > > + VPMINU %ymm4, %ymm8, %ymm4 > > + > > + VPMINU %ymm1, %ymm2, %ymm5 > > + VPMINU %ymm3, %ymm4, %ymm6 > > + > > + VPMINU %ymm5, %ymm6, %ymm5 > > + > > + VPCMPEQ %ymm5, %ymm9, %ymm5 > > + vpmovmskb %ymm5, %eax > > + > > + addq $(VEC_SIZE * 4), %rdi > > + testl %eax, %eax > > + jz L(loop_4x_vec) > > + > > + VPCMPEQ %ymm1, %ymm9, %ymm1 > > vpmovmskb %ymm1, %eax > > testl %eax, %eax > > jnz L(first_vec_x0) > > + > > + VPCMPEQ %ymm2, %ymm9, %ymm2 > > vpmovmskb %ymm2, %eax > > testl %eax, %eax > > jnz L(first_vec_x1) > > - vpmovmskb %ymm3, %eax > > - testl %eax, %eax > > - jnz L(first_vec_x2) > > + > > + VPCMPEQ %ymm3, %ymm9, %ymm3 > > + VPCMPEQ %ymm4, %ymm9, %ymm4 > > + vpmovmskb %ymm3, %ecx > > vpmovmskb %ymm4, %eax > > + salq $32, %rax > > + orq %rcx, %rax > > + tzcntq %rax, %rax > > + leaq (VEC_SIZE * 2)(%rdi, %rax), %rax > > +# ifndef USE_AS_STRCHRNUL > > + cmp (%rax), %CHAR_REG > > + cmovne %rdx, %rax > > +# endif > > + VZEROUPPER > > + ret > > + > > + /* Cold case for crossing page with first load. */ > > + .p2align 4 > > +L(cross_page_boundary): > > + andq $-VEC_SIZE, %rdi > > + andl $(VEC_SIZE - 1), %ecx > > + > > + vmovdqa (%rdi), %ymm8 > > + VPCMPEQ %ymm8, %ymm0, %ymm1 > > + VPCMPEQ %ymm8, %ymm9, %ymm2 > > + vpor %ymm1, %ymm2, %ymm1 > > + vpmovmskb %ymm1, %eax > > + /* Remove the leading bits. */ > > + sarxl %ecx, %eax, %eax > > testl %eax, %eax > > -L(first_vec_x3): > > + jz L(aligned_more) > > tzcntl %eax, %eax > > -# ifdef USE_AS_STRCHRNUL > > - addq $(VEC_SIZE * 3), %rax > > + addq %rcx, %rdi > > addq %rdi, %rax > > -# else > > - xorl %edx, %edx > > - leaq (VEC_SIZE * 3)(%rdi, %rax), %rax > > - cmp (%rax), %CHAR_REG > > +# ifndef USE_AS_STRCHRNUL > > + cmp (%rax), %CHAR_REG > > cmovne %rdx, %rax > > # endif > > VZEROUPPER > > ret > > > > END (STRCHR) > > -#endif > > +# endif > > diff --git a/sysdeps/x86_64/multiarch/strchr.c b/sysdeps/x86_64/multiarch/strchr.c > > index 583a152794..4dfbe3b58b 100644 > > --- a/sysdeps/x86_64/multiarch/strchr.c > > +++ b/sysdeps/x86_64/multiarch/strchr.c > > @@ -37,6 +37,7 @@ IFUNC_SELECTOR (void) > > > > if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) > > && CPU_FEATURE_USABLE_P (cpu_features, AVX2) > > + && CPU_FEATURE_USABLE_P (cpu_features, BMI2) > > && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) > > return OPTIMIZE (avx2); > > > > -- > > 2.29.2 > > > > LGTM. > > Thanks. > This is the updated patch with extra white spaces fixed I am checking in. -- H.J. --00000000000023701605bad83f6e Content-Type: text/x-patch; charset="US-ASCII"; name="0001-x86-64-Refactor-and-improve-performance-of-strchr-av.patch" Content-Disposition: attachment; filename="0001-x86-64-Refactor-and-improve-performance-of-strchr-av.patch" Content-Transfer-Encoding: base64 Content-ID: X-Attachment-Id: f_kkwyz9t80 RnJvbSAxZjc0NWVjYzIxMDk4OTA4ODZiMTYxZDQ3OTFlMTQwNmZkZmMyOWI4IE1vbiBTZXAgMTcg MDA6MDA6MDAgMjAwMQpGcm9tOiBub2FoIDxnb2xkc3RlaW4udy5uQGdtYWlsLmNvbT4KRGF0ZTog V2VkLCAzIEZlYiAyMDIxIDAwOjM4OjU5IC0wNTAwClN1YmplY3Q6IFtQQVRDSCAxLzJdIHg4Ni02 NDogUmVmYWN0b3IgYW5kIGltcHJvdmUgcGVyZm9ybWFuY2Ugb2Ygc3RyY2hyLWF2eDIuUwoKTm8g YnVnLiBKdXN0IHNlZW1lZCB0aGUgcGVyZm9ybWFuY2UgY291bGQgYmUgaW1wcm92ZWQgYSBiaXQu IE9ic2VydmVkCmFuZCBleHBlY3RlZCBiZWhhdmlvciBhcmUgdW5jaGFuZ2VkLiBPcHRpbWl6ZWQg Ym9keSBvZiBtYWluCmxvb3AuIFVwZGF0ZWQgcGFnZSBjcm9zcyBsb2dpYyBhbmQgb3B0aW1pemVk IGFjY29yZGluZ2x5LiBNYWRlIGEgZmV3Cm1pbm9yIGluc3RydWN0aW9uIHNlbGVjdGlvbiBtb2Rp ZmljYXRpb25zLiBObyByZWdyZXNzaW9ucyBpbiB0ZXN0CnN1aXRlLiBCb3RoIHRlc3Qtc3RyY2hy bnVsIGFuZCB0ZXN0LXN0cmNociBwYXNzZWQuCi0tLQogc3lzZGVwcy94ODZfNjQvbXVsdGlhcmNo L3N0cmNoci1hdngyLlMgfCAyMjUgKysrKysrKysrKysrLS0tLS0tLS0tLS0tLQogc3lzZGVwcy94 ODZfNjQvbXVsdGlhcmNoL3N0cmNoci5jICAgICAgfCAgIDEgKwogMiBmaWxlcyBjaGFuZ2VkLCAx MTMgaW5zZXJ0aW9ucygrKSwgMTEzIGRlbGV0aW9ucygtKQoKZGlmZiAtLWdpdCBhL3N5c2RlcHMv eDg2XzY0L211bHRpYXJjaC9zdHJjaHItYXZ4Mi5TIGIvc3lzZGVwcy94ODZfNjQvbXVsdGlhcmNo L3N0cmNoci1hdngyLlMKaW5kZXggZDQxNjU1OGQwNC4uNDc2YzIwYzI4OSAxMDA2NDQKLS0tIGEv c3lzZGVwcy94ODZfNjQvbXVsdGlhcmNoL3N0cmNoci1hdngyLlMKKysrIGIvc3lzZGVwcy94ODZf NjQvbXVsdGlhcmNoL3N0cmNoci1hdngyLlMKQEAgLTI3LDEwICsyNywxMiBAQAogIyBpZmRlZiBV U0VfQVNfV0NTQ0hSCiAjICBkZWZpbmUgVlBCUk9BRENBU1QJdnBicm9hZGNhc3RkCiAjICBkZWZp bmUgVlBDTVBFUQl2cGNtcGVxZAorIyAgZGVmaW5lIFZQTUlOVQl2cG1pbnVkCiAjICBkZWZpbmUg Q0hBUl9SRUcJZXNpCiAjIGVsc2UKICMgIGRlZmluZSBWUEJST0FEQ0FTVAl2cGJyb2FkY2FzdGIK ICMgIGRlZmluZSBWUENNUEVRCXZwY21wZXFiCisjICBkZWZpbmUgVlBNSU5VCXZwbWludWIKICMg IGRlZmluZSBDSEFSX1JFRwlzaWwKICMgZW5kaWYKIApAQCAtMzksNzEgKzQxLDU0IEBACiAjIGVu ZGlmCiAKICMgZGVmaW5lIFZFQ19TSVpFIDMyCisjIGRlZmluZSBQQUdFX1NJWkUgNDA5NgogCiAJ LnNlY3Rpb24gLnRleHQuYXZ4LCJheCIsQHByb2diaXRzCiBFTlRSWSAoU1RSQ0hSKQogCW1vdmwJ JWVkaSwgJWVjeAotCS8qIEJyb2FkY2FzdCBDSEFSIHRvIFlNTTAuICAqLworIyBpZm5kZWYgVVNF X0FTX1NUUkNIUk5VTAorCXhvcmwJJWVkeCwgJWVkeAorIyBlbmRpZgorCisJLyogQnJvYWRjYXN0 IENIQVIgdG8gWU1NMC4JKi8KIAl2bW92ZAklZXNpLCAleG1tMAogCXZweG9yCSV4bW05LCAleG1t OSwgJXhtbTkKIAlWUEJST0FEQ0FTVCAleG1tMCwgJXltbTAKLQkvKiBDaGVjayBpZiB3ZSBtYXkg Y3Jvc3MgcGFnZSBib3VuZGFyeSB3aXRoIG9uZSB2ZWN0b3IgbG9hZC4gICovCi0JYW5kbAkkKDIg KiBWRUNfU0laRSAtIDEpLCAlZWN4Ci0JY21wbAkkVkVDX1NJWkUsICVlY3gKLQlqYQlMKGNyb3Nf cGFnZV9ib3VuZGFyeSkKLQotCS8qIENoZWNrIHRoZSBmaXJzdCBWRUNfU0laRSBieXRlcy4gIFNl YXJjaCBmb3IgYm90aCBDSEFSIGFuZCB0aGUKLQkgICBudWxsIGJ5dGUuICAqLwotCXZtb3ZkcXUJ KCVyZGkpLCAleW1tOAotCVZQQ01QRVEgJXltbTgsICV5bW0wLCAleW1tMQotCVZQQ01QRVEgJXlt bTgsICV5bW05LCAleW1tMgotCXZwb3IJJXltbTEsICV5bW0yLCAleW1tMQotCXZwbW92bXNrYiAl eW1tMSwgJWVheAotCXRlc3RsCSVlYXgsICVlYXgKLQlqbnoJTChmaXJzdF92ZWNfeDApCi0KLQkv KiBBbGlnbiBkYXRhIGZvciBhbGlnbmVkIGxvYWRzIGluIHRoZSBsb29wLiAgKi8KLQlhZGRxCSRW RUNfU0laRSwgJXJkaQotCWFuZGwJJChWRUNfU0laRSAtIDEpLCAlZWN4Ci0JYW5kcQkkLVZFQ19T SVpFLCAlcmRpCiAKLQlqbXAJTChtb3JlXzR4X3ZlYykKKwkvKiBDaGVjayBpZiB3ZSBjcm9zcyBw YWdlIGJvdW5kYXJ5IHdpdGggb25lIHZlY3RvciBsb2FkLiAgKi8KKwlhbmRsCSQoUEFHRV9TSVpF IC0gMSksICVlY3gKKwljbXBsCSQoUEFHRV9TSVpFIC0gVkVDX1NJWkUpLCAlZWN4CisJamEgIEwo Y3Jvc3NfcGFnZV9ib3VuZGFyeSkKIAotCS5wMmFsaWduIDQKLUwoY3Jvc19wYWdlX2JvdW5kYXJ5 KToKLQlhbmRsCSQoVkVDX1NJWkUgLSAxKSwgJWVjeAotCWFuZHEJJC1WRUNfU0laRSwgJXJkaQor CS8qIENoZWNrIHRoZSBmaXJzdCBWRUNfU0laRSBieXRlcy4JU2VhcmNoIGZvciBib3RoIENIQVIg YW5kIHRoZQorCSAgIG51bGwgYnl0ZS4gICovCiAJdm1vdmRxdQkoJXJkaSksICV5bW04CiAJVlBD TVBFUSAleW1tOCwgJXltbTAsICV5bW0xCiAJVlBDTVBFUSAleW1tOCwgJXltbTksICV5bW0yCiAJ dnBvcgkleW1tMSwgJXltbTIsICV5bW0xCiAJdnBtb3Ztc2tiICV5bW0xLCAlZWF4Ci0JLyogUmVt b3ZlIHRoZSBsZWFkaW5nIGJ5dGVzLiAgKi8KLQlzYXJsCSVjbCwgJWVheAogCXRlc3RsCSVlYXgs ICVlYXgKLQlqeglMKGFsaWduZWRfbW9yZSkKLQkvKiBGb3VuZCBDSEFSIG9yIHRoZSBudWxsIGJ5 dGUuICAqLworCWp6CUwobW9yZV92ZWNzKQogCXR6Y250bAklZWF4LCAlZWF4Ci0JYWRkcQklcmN4 LCAlcmF4Ci0jIGlmZGVmIFVTRV9BU19TVFJDSFJOVUwKKwkvKiBGb3VuZCBDSEFSIG9yIHRoZSBu dWxsIGJ5dGUuCSAqLwogCWFkZHEJJXJkaSwgJXJheAotIyBlbHNlCi0JeG9ybAklZWR4LCAlZWR4 Ci0JbGVhcQkoJXJkaSwgJXJheCksICVyYXgKLQljbXAJKCVyYXgpLCAlQ0hBUl9SRUcKKyMgaWZu ZGVmIFVTRV9BU19TVFJDSFJOVUwKKwljbXAgKCVyYXgpLCAlQ0hBUl9SRUcKIAljbW92bmUJJXJk eCwgJXJheAogIyBlbmRpZgogCVZaRVJPVVBQRVIKIAlyZXQKIAogCS5wMmFsaWduIDQKK0wobW9y ZV92ZWNzKToKKwkvKiBBbGlnbiBkYXRhIGZvciBhbGlnbmVkIGxvYWRzIGluIHRoZSBsb29wLiAg Ki8KKwlhbmRxCSQtVkVDX1NJWkUsICVyZGkKIEwoYWxpZ25lZF9tb3JlKToKLQlhZGRxCSRWRUNf U0laRSwgJXJkaQogCi1MKG1vcmVfNHhfdmVjKToKLQkvKiBDaGVjayB0aGUgZmlyc3QgNCAqIFZF Q19TSVpFLiAgT25seSBvbmUgVkVDX1NJWkUgYXQgYSB0aW1lCi0JICAgc2luY2UgZGF0YSBpcyBv bmx5IGFsaWduZWQgdG8gVkVDX1NJWkUuICAqLwotCXZtb3ZkcWEJKCVyZGkpLCAleW1tOAorCS8q IENoZWNrIHRoZSBuZXh0IDQgKiBWRUNfU0laRS4JIE9ubHkgb25lIFZFQ19TSVpFIGF0IGEgdGlt ZQorCSAgIHNpbmNlIGRhdGEgaXMgb25seSBhbGlnbmVkIHRvIFZFQ19TSVpFLgkqLworCXZtb3Zk cWEJVkVDX1NJWkUoJXJkaSksICV5bW04CisJYWRkcQkkVkVDX1NJWkUsICVyZGkKIAlWUENNUEVR ICV5bW04LCAleW1tMCwgJXltbTEKIAlWUENNUEVRICV5bW04LCAleW1tOSwgJXltbTIKIAl2cG9y CSV5bW0xLCAleW1tMiwgJXltbTEKQEAgLTEzMyw2MSArMTE4LDI0IEBAIEwobW9yZV80eF92ZWMp OgogCXZwb3IJJXltbTEsICV5bW0yLCAleW1tMQogCXZwbW92bXNrYiAleW1tMSwgJWVheAogCXRl c3RsCSVlYXgsICVlYXgKLQlqbnoJTChmaXJzdF92ZWNfeDMpCi0KLQlhZGRxCSQoVkVDX1NJWkUg KiA0KSwgJXJkaQotCi0JLyogQWxpZ24gZGF0YSB0byA0ICogVkVDX1NJWkUuICAqLwotCW1vdnEJ JXJkaSwgJXJjeAotCWFuZGwJJCg0ICogVkVDX1NJWkUgLSAxKSwgJWVjeAotCWFuZHEJJC0oNCAq IFZFQ19TSVpFKSwgJXJkaQotCi0JLnAyYWxpZ24gNAotTChsb29wXzR4X3ZlYyk6Ci0JLyogQ29t cGFyZSA0ICogVkVDIGF0IGEgdGltZSBmb3J3YXJkLiAgKi8KLQl2bW92ZHFhCSglcmRpKSwgJXlt bTUKLQl2bW92ZHFhCVZFQ19TSVpFKCVyZGkpLCAleW1tNgotCXZtb3ZkcWEJKFZFQ19TSVpFICog MikoJXJkaSksICV5bW03Ci0Jdm1vdmRxYQkoVkVDX1NJWkUgKiAzKSglcmRpKSwgJXltbTgKLQot CVZQQ01QRVEgJXltbTUsICV5bW0wLCAleW1tMQotCVZQQ01QRVEgJXltbTYsICV5bW0wLCAleW1t MgotCVZQQ01QRVEgJXltbTcsICV5bW0wLCAleW1tMwotCVZQQ01QRVEgJXltbTgsICV5bW0wLCAl eW1tNAorCWp6CUwocHJlcF9sb29wXzR4KQogCi0JVlBDTVBFUSAleW1tNSwgJXltbTksICV5bW01 Ci0JVlBDTVBFUSAleW1tNiwgJXltbTksICV5bW02Ci0JVlBDTVBFUSAleW1tNywgJXltbTksICV5 bW03Ci0JVlBDTVBFUSAleW1tOCwgJXltbTksICV5bW04Ci0KLQl2cG9yCSV5bW0xLCAleW1tNSwg JXltbTEKLQl2cG9yCSV5bW0yLCAleW1tNiwgJXltbTIKLQl2cG9yCSV5bW0zLCAleW1tNywgJXlt bTMKLQl2cG9yCSV5bW00LCAleW1tOCwgJXltbTQKLQotCXZwb3IJJXltbTEsICV5bW0yLCAleW1t NQotCXZwb3IJJXltbTMsICV5bW00LCAleW1tNgotCi0JdnBvcgkleW1tNSwgJXltbTYsICV5bW01 Ci0KLQl2cG1vdm1za2IgJXltbTUsICVlYXgKLQl0ZXN0bAklZWF4LCAlZWF4Ci0Jam56CUwoNHhf dmVjX2VuZCkKLQotCWFkZHEJJChWRUNfU0laRSAqIDQpLCAlcmRpCi0KLQlqbXAJTChsb29wXzR4 X3ZlYykKKwl0emNudGwJJWVheCwgJWVheAorCWxlYXEJKFZFQ19TSVpFICogMykoJXJkaSwgJXJh eCksICVyYXgKKyMgaWZuZGVmIFVTRV9BU19TVFJDSFJOVUwKKwljbXAgKCVyYXgpLCAlQ0hBUl9S RUcKKwljbW92bmUJJXJkeCwgJXJheAorIyBlbmRpZgorCVZaRVJPVVBQRVIKKwlyZXQKIAogCS5w MmFsaWduIDQKIEwoZmlyc3RfdmVjX3gwKToKLQkvKiBGb3VuZCBDSEFSIG9yIHRoZSBudWxsIGJ5 dGUuICAqLwogCXR6Y250bAklZWF4LCAlZWF4Ci0jIGlmZGVmIFVTRV9BU19TVFJDSFJOVUwKKwkv KiBGb3VuZCBDSEFSIG9yIHRoZSBudWxsIGJ5dGUuCSAqLwogCWFkZHEJJXJkaSwgJXJheAotIyBl bHNlCi0JeG9ybAklZWR4LCAlZWR4Ci0JbGVhcQkoJXJkaSwgJXJheCksICVyYXgKLQljbXAJKCVy YXgpLCAlQ0hBUl9SRUcKKyMgaWZuZGVmIFVTRV9BU19TVFJDSFJOVUwKKwljbXAgKCVyYXgpLCAl Q0hBUl9SRUcKIAljbW92bmUJJXJkeCwgJXJheAogIyBlbmRpZgogCVZaRVJPVVBQRVIKQEAgLTE5 NiwxMyArMTQ0LDkgQEAgTChmaXJzdF92ZWNfeDApOgogCS5wMmFsaWduIDQKIEwoZmlyc3RfdmVj X3gxKToKIAl0emNudGwJJWVheCwgJWVheAotIyBpZmRlZiBVU0VfQVNfU1RSQ0hSTlVMCi0JYWRk cQkkVkVDX1NJWkUsICVyYXgKLQlhZGRxCSVyZGksICVyYXgKLSMgZWxzZQotCXhvcmwJJWVkeCwg JWVkeAogCWxlYXEJVkVDX1NJWkUoJXJkaSwgJXJheCksICVyYXgKLQljbXAJKCVyYXgpLCAlQ0hB Ul9SRUcKKyMgaWZuZGVmIFVTRV9BU19TVFJDSFJOVUwKKwljbXAgKCVyYXgpLCAlQ0hBUl9SRUcK IAljbW92bmUJJXJkeCwgJXJheAogIyBlbmRpZgogCVZaRVJPVVBQRVIKQEAgLTIxMSw0NCArMTU1 LDk5IEBAIEwoZmlyc3RfdmVjX3gxKToKIAkucDJhbGlnbiA0CiBMKGZpcnN0X3ZlY194Mik6CiAJ dHpjbnRsCSVlYXgsICVlYXgKLSMgaWZkZWYgVVNFX0FTX1NUUkNIUk5VTAotCWFkZHEJJChWRUNf U0laRSAqIDIpLCAlcmF4Ci0JYWRkcQklcmRpLCAlcmF4Ci0jIGVsc2UKLQl4b3JsCSVlZHgsICVl ZHgKKwkvKiBGb3VuZCBDSEFSIG9yIHRoZSBudWxsIGJ5dGUuCSAqLwogCWxlYXEJKFZFQ19TSVpF ICogMikoJXJkaSwgJXJheCksICVyYXgKLQljbXAJKCVyYXgpLCAlQ0hBUl9SRUcKKyMgaWZuZGVm IFVTRV9BU19TVFJDSFJOVUwKKwljbXAgKCVyYXgpLCAlQ0hBUl9SRUcKIAljbW92bmUJJXJkeCwg JXJheAogIyBlbmRpZgogCVZaRVJPVVBQRVIKIAlyZXQKIAorTChwcmVwX2xvb3BfNHgpOgorCS8q IEFsaWduIGRhdGEgdG8gNCAqIFZFQ19TSVpFLgkqLworCWFuZHEJJC0oVkVDX1NJWkUgKiA0KSwg JXJkaQorCiAJLnAyYWxpZ24gNAotTCg0eF92ZWNfZW5kKToKK0wobG9vcF80eF92ZWMpOgorCS8q IENvbXBhcmUgNCAqIFZFQyBhdCBhIHRpbWUgZm9yd2FyZC4gICovCisJdm1vdmRxYQkoVkVDX1NJ WkUgKiA0KSglcmRpKSwgJXltbTUKKwl2bW92ZHFhCShWRUNfU0laRSAqIDUpKCVyZGkpLCAleW1t NgorCXZtb3ZkcWEJKFZFQ19TSVpFICogNikoJXJkaSksICV5bW03CisJdm1vdmRxYQkoVkVDX1NJ WkUgKiA3KSglcmRpKSwgJXltbTgKKworCS8qIExlYXZlcyBvbmx5IENIQVJTIG1hdGNoaW5nIGVz aSBhcyAwLgkgKi8KKwl2cHhvcgkleW1tNSwgJXltbTAsICV5bW0xCisJdnB4b3IJJXltbTYsICV5 bW0wLCAleW1tMgorCXZweG9yCSV5bW03LCAleW1tMCwgJXltbTMKKwl2cHhvcgkleW1tOCwgJXlt bTAsICV5bW00CisKKwlWUE1JTlUJJXltbTEsICV5bW01LCAleW1tMQorCVZQTUlOVQkleW1tMiwg JXltbTYsICV5bW0yCisJVlBNSU5VCSV5bW0zLCAleW1tNywgJXltbTMKKwlWUE1JTlUJJXltbTQs ICV5bW04LCAleW1tNAorCisJVlBNSU5VCSV5bW0xLCAleW1tMiwgJXltbTUKKwlWUE1JTlUJJXlt bTMsICV5bW00LCAleW1tNgorCisJVlBNSU5VCSV5bW01LCAleW1tNiwgJXltbTUKKworCVZQQ01Q RVEgJXltbTUsICV5bW05LCAleW1tNQorCXZwbW92bXNrYiAleW1tNSwgJWVheAorCisJYWRkcQkk KFZFQ19TSVpFICogNCksICVyZGkKKwl0ZXN0bAklZWF4LCAlZWF4CisJanogIEwobG9vcF80eF92 ZWMpCisKKwlWUENNUEVRICV5bW0xLCAleW1tOSwgJXltbTEKIAl2cG1vdm1za2IgJXltbTEsICVl YXgKIAl0ZXN0bAklZWF4LCAlZWF4CiAJam56CUwoZmlyc3RfdmVjX3gwKQorCisJVlBDTVBFUSAl eW1tMiwgJXltbTksICV5bW0yCiAJdnBtb3Ztc2tiICV5bW0yLCAlZWF4CiAJdGVzdGwJJWVheCwg JWVheAogCWpueglMKGZpcnN0X3ZlY194MSkKLQl2cG1vdm1za2IgJXltbTMsICVlYXgKLQl0ZXN0 bAklZWF4LCAlZWF4Ci0Jam56CUwoZmlyc3RfdmVjX3gyKQorCisJVlBDTVBFUSAleW1tMywgJXlt bTksICV5bW0zCisJVlBDTVBFUSAleW1tNCwgJXltbTksICV5bW00CisJdnBtb3Ztc2tiICV5bW0z LCAlZWN4CiAJdnBtb3Ztc2tiICV5bW00LCAlZWF4CisJc2FscQkkMzIsICVyYXgKKwlvcnEgJXJj eCwgJXJheAorCXR6Y250cSAgJXJheCwgJXJheAorCWxlYXEJKFZFQ19TSVpFICogMikoJXJkaSwg JXJheCksICVyYXgKKyMgaWZuZGVmIFVTRV9BU19TVFJDSFJOVUwKKwljbXAgKCVyYXgpLCAlQ0hB Ul9SRUcKKwljbW92bmUJJXJkeCwgJXJheAorIyBlbmRpZgorCVZaRVJPVVBQRVIKKwlyZXQKKwor CS8qIENvbGQgY2FzZSBmb3IgY3Jvc3NpbmcgcGFnZSB3aXRoIGZpcnN0IGxvYWQuCSAqLworCS5w MmFsaWduIDQKK0woY3Jvc3NfcGFnZV9ib3VuZGFyeSk6CisJYW5kcQkkLVZFQ19TSVpFLCAlcmRp CisJYW5kbAkkKFZFQ19TSVpFIC0gMSksICVlY3gKKworCXZtb3ZkcWEJKCVyZGkpLCAleW1tOAor CVZQQ01QRVEgJXltbTgsICV5bW0wLCAleW1tMQorCVZQQ01QRVEgJXltbTgsICV5bW05LCAleW1t MgorCXZwb3IJJXltbTEsICV5bW0yLCAleW1tMQorCXZwbW92bXNrYiAleW1tMSwgJWVheAorCS8q IFJlbW92ZSB0aGUgbGVhZGluZyBiaXRzLgkgKi8KKwlzYXJ4bAklZWN4LCAlZWF4LCAlZWF4CiAJ dGVzdGwJJWVheCwgJWVheAotTChmaXJzdF92ZWNfeDMpOgorCWp6CUwoYWxpZ25lZF9tb3JlKQog CXR6Y250bAklZWF4LCAlZWF4Ci0jIGlmZGVmIFVTRV9BU19TVFJDSFJOVUwKLQlhZGRxCSQoVkVD X1NJWkUgKiAzKSwgJXJheAorCWFkZHEJJXJjeCwgJXJkaQogCWFkZHEJJXJkaSwgJXJheAotIyBl bHNlCi0JeG9ybAklZWR4LCAlZWR4Ci0JbGVhcQkoVkVDX1NJWkUgKiAzKSglcmRpLCAlcmF4KSwg JXJheAotCWNtcAkoJXJheCksICVDSEFSX1JFRworIyBpZm5kZWYgVVNFX0FTX1NUUkNIUk5VTAor CWNtcCAoJXJheCksICVDSEFSX1JFRwogCWNtb3ZuZQklcmR4LCAlcmF4CiAjIGVuZGlmCiAJVlpF Uk9VUFBFUgogCXJldAogCiBFTkQgKFNUUkNIUikKLSNlbmRpZgorIyBlbmRpZgpkaWZmIC0tZ2l0 IGEvc3lzZGVwcy94ODZfNjQvbXVsdGlhcmNoL3N0cmNoci5jIGIvc3lzZGVwcy94ODZfNjQvbXVs dGlhcmNoL3N0cmNoci5jCmluZGV4IDU4M2ExNTI3OTQuLjRkZmJlM2I1OGIgMTAwNjQ0Ci0tLSBh L3N5c2RlcHMveDg2XzY0L211bHRpYXJjaC9zdHJjaHIuYworKysgYi9zeXNkZXBzL3g4Nl82NC9t dWx0aWFyY2gvc3RyY2hyLmMKQEAgLTM3LDYgKzM3LDcgQEAgSUZVTkNfU0VMRUNUT1IgKHZvaWQp CiAKICAgaWYgKCFDUFVfRkVBVFVSRVNfQVJDSF9QIChjcHVfZmVhdHVyZXMsIFByZWZlcl9Ob19W WkVST1VQUEVSKQogICAgICAgJiYgQ1BVX0ZFQVRVUkVfVVNBQkxFX1AgKGNwdV9mZWF0dXJlcywg QVZYMikKKyAgICAgICYmIENQVV9GRUFUVVJFX1VTQUJMRV9QIChjcHVfZmVhdHVyZXMsIEJNSTIp CiAgICAgICAmJiBDUFVfRkVBVFVSRVNfQVJDSF9QIChjcHVfZmVhdHVyZXMsIEFWWF9GYXN0X1Vu YWxpZ25lZF9Mb2FkKSkKICAgICByZXR1cm4gT1BUSU1JWkUgKGF2eDIpOwogCi0tIAoyLjI5LjIK Cg== --00000000000023701605bad83f6e--