From: Sunil Pandey <skpgkp2@gmail.com>
To: "H.J. Lu" <hjl.tools@gmail.com>,
Libc-stable Mailing List <libc-stable@sourceware.org>
Cc: Noah Goldstein <goldstein.w.n@gmail.com>,
GNU C Library <libc-alpha@sourceware.org>
Subject: Re: [PATCH v3 3/3] x86: Cleanup page cross code in memcmp-avx2-movbe.S
Date: Thu, 12 May 2022 13:03:42 -0700 [thread overview]
Message-ID: <CAMAf5_fyRHKEKt-k7P7z5kegqaZT6JnJCP_rsqQk8oYTO48mnw@mail.gmail.com> (raw)
In-Reply-To: <CAMe9rOofRMrG0Hvjq42r_c-24juXbS+rRb7QXKGjetmSp93Uwg@mail.gmail.com>
[-- Attachment #1: Type: text/plain, Size: 10139 bytes --]
On Fri, Apr 15, 2022 at 10:34 AM H.J. Lu via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> On Fri, Apr 15, 2022 at 10:28 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > Old code was both inefficient and wasted code size. New code (-62
> > bytes) and comparable or better performance in the page cross case.
> >
> > geometric_mean(N=20) of page cross cases New / Original: 0.960
> >
> > size, align0, align1, ret, New Time/Old Time
> > 1, 4095, 0, 0, 1.001
> > 1, 4095, 0, 1, 0.999
> > 1, 4095, 0, -1, 1.0
> > 2, 4094, 0, 0, 1.0
> > 2, 4094, 0, 1, 1.0
> > 2, 4094, 0, -1, 1.0
> > 3, 4093, 0, 0, 1.0
> > 3, 4093, 0, 1, 1.0
> > 3, 4093, 0, -1, 1.0
> > 4, 4092, 0, 0, 0.987
> > 4, 4092, 0, 1, 1.0
> > 4, 4092, 0, -1, 1.0
> > 5, 4091, 0, 0, 0.984
> > 5, 4091, 0, 1, 1.002
> > 5, 4091, 0, -1, 1.005
> > 6, 4090, 0, 0, 0.993
> > 6, 4090, 0, 1, 1.001
> > 6, 4090, 0, -1, 1.003
> > 7, 4089, 0, 0, 0.991
> > 7, 4089, 0, 1, 1.0
> > 7, 4089, 0, -1, 1.001
> > 8, 4088, 0, 0, 0.875
> > 8, 4088, 0, 1, 0.881
> > 8, 4088, 0, -1, 0.888
> > 9, 4087, 0, 0, 0.872
> > 9, 4087, 0, 1, 0.879
> > 9, 4087, 0, -1, 0.883
> > 10, 4086, 0, 0, 0.878
> > 10, 4086, 0, 1, 0.886
> > 10, 4086, 0, -1, 0.873
> > 11, 4085, 0, 0, 0.878
> > 11, 4085, 0, 1, 0.881
> > 11, 4085, 0, -1, 0.879
> > 12, 4084, 0, 0, 0.873
> > 12, 4084, 0, 1, 0.889
> > 12, 4084, 0, -1, 0.875
> > 13, 4083, 0, 0, 0.873
> > 13, 4083, 0, 1, 0.863
> > 13, 4083, 0, -1, 0.863
> > 14, 4082, 0, 0, 0.838
> > 14, 4082, 0, 1, 0.869
> > 14, 4082, 0, -1, 0.877
> > 15, 4081, 0, 0, 0.841
> > 15, 4081, 0, 1, 0.869
> > 15, 4081, 0, -1, 0.876
> > 16, 4080, 0, 0, 0.988
> > 16, 4080, 0, 1, 0.99
> > 16, 4080, 0, -1, 0.989
> > 17, 4079, 0, 0, 0.978
> > 17, 4079, 0, 1, 0.981
> > 17, 4079, 0, -1, 0.98
> > 18, 4078, 0, 0, 0.981
> > 18, 4078, 0, 1, 0.98
> > 18, 4078, 0, -1, 0.985
> > 19, 4077, 0, 0, 0.977
> > 19, 4077, 0, 1, 0.979
> > 19, 4077, 0, -1, 0.986
> > 20, 4076, 0, 0, 0.977
> > 20, 4076, 0, 1, 0.986
> > 20, 4076, 0, -1, 0.984
> > 21, 4075, 0, 0, 0.977
> > 21, 4075, 0, 1, 0.983
> > 21, 4075, 0, -1, 0.988
> > 22, 4074, 0, 0, 0.983
> > 22, 4074, 0, 1, 0.994
> > 22, 4074, 0, -1, 0.993
> > 23, 4073, 0, 0, 0.98
> > 23, 4073, 0, 1, 0.992
> > 23, 4073, 0, -1, 0.995
> > 24, 4072, 0, 0, 0.989
> > 24, 4072, 0, 1, 0.989
> > 24, 4072, 0, -1, 0.991
> > 25, 4071, 0, 0, 0.99
> > 25, 4071, 0, 1, 0.999
> > 25, 4071, 0, -1, 0.996
> > 26, 4070, 0, 0, 0.993
> > 26, 4070, 0, 1, 0.995
> > 26, 4070, 0, -1, 0.998
> > 27, 4069, 0, 0, 0.993
> > 27, 4069, 0, 1, 0.999
> > 27, 4069, 0, -1, 1.0
> > 28, 4068, 0, 0, 0.997
> > 28, 4068, 0, 1, 1.0
> > 28, 4068, 0, -1, 0.999
> > 29, 4067, 0, 0, 0.996
> > 29, 4067, 0, 1, 0.999
> > 29, 4067, 0, -1, 0.999
> > 30, 4066, 0, 0, 0.991
> > 30, 4066, 0, 1, 1.001
> > 30, 4066, 0, -1, 0.999
> > 31, 4065, 0, 0, 0.988
> > 31, 4065, 0, 1, 0.998
> > 31, 4065, 0, -1, 0.998
> > ---
> > sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S | 98 ++++++++++++--------
> > 1 file changed, 61 insertions(+), 37 deletions(-)
> >
> > diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
> > index a34ea1645d..210c9925b6 100644
> > --- a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
> > +++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
> > @@ -429,22 +429,21 @@ L(page_cross_less_vec):
> > # ifndef USE_AS_WMEMCMP
> > cmpl $8, %edx
> > jae L(between_8_15)
> > + /* Fall through for [4, 7]. */
> > cmpl $4, %edx
> > - jae L(between_4_7)
> > + jb L(between_2_3)
> >
> > - /* Load as big endian to avoid branches. */
> > - movzwl (%rdi), %eax
> > - movzwl (%rsi), %ecx
> > - shll $8, %eax
> > - shll $8, %ecx
> > - bswap %eax
> > - bswap %ecx
> > - movzbl -1(%rdi, %rdx), %edi
> > - movzbl -1(%rsi, %rdx), %esi
> > - orl %edi, %eax
> > - orl %esi, %ecx
> > - /* Subtraction is okay because the upper 8 bits are zero. */
> > - subl %ecx, %eax
> > + movbe (%rdi), %eax
> > + movbe (%rsi), %ecx
> > + shlq $32, %rax
> > + shlq $32, %rcx
> > + movbe -4(%rdi, %rdx), %edi
> > + movbe -4(%rsi, %rdx), %esi
> > + orq %rdi, %rax
> > + orq %rsi, %rcx
> > + subq %rcx, %rax
> > + /* Fast path for return zero. */
> > + jnz L(ret_nonzero)
> > /* No ymm register was touched. */
> > ret
> >
> > @@ -457,9 +456,33 @@ L(one_or_less):
> > /* No ymm register was touched. */
> > ret
> >
> > + .p2align 4,, 5
> > +L(ret_nonzero):
> > + sbbl %eax, %eax
> > + orl $1, %eax
> > + /* No ymm register was touched. */
> > + ret
> > +
> > + .p2align 4,, 2
> > +L(zero):
> > + xorl %eax, %eax
> > + /* No ymm register was touched. */
> > + ret
> > +
> > .p2align 4
> > L(between_8_15):
> > -# endif
> > + movbe (%rdi), %rax
> > + movbe (%rsi), %rcx
> > + subq %rcx, %rax
> > + jnz L(ret_nonzero)
> > + movbe -8(%rdi, %rdx), %rax
> > + movbe -8(%rsi, %rdx), %rcx
> > + subq %rcx, %rax
> > + /* Fast path for return zero. */
> > + jnz L(ret_nonzero)
> > + /* No ymm register was touched. */
> > + ret
> > +# else
> > /* If USE_AS_WMEMCMP fall through into 8-15 byte case. */
> > vmovq (%rdi), %xmm1
> > vmovq (%rsi), %xmm2
> > @@ -475,16 +498,13 @@ L(between_8_15):
> > VPCMPEQ %xmm1, %xmm2, %xmm2
> > vpmovmskb %xmm2, %eax
> > subl $0xffff, %eax
> > + /* Fast path for return zero. */
> > jnz L(return_vec_0)
> > /* No ymm register was touched. */
> > ret
> > +# endif
> >
> > - .p2align 4
> > -L(zero):
> > - xorl %eax, %eax
> > - ret
> > -
> > - .p2align 4
> > + .p2align 4,, 10
> > L(between_16_31):
> > /* From 16 to 31 bytes. No branch when size == 16. */
> > vmovdqu (%rsi), %xmm2
> > @@ -501,11 +521,17 @@ L(between_16_31):
> > VPCMPEQ (%rdi), %xmm2, %xmm2
> > vpmovmskb %xmm2, %eax
> > subl $0xffff, %eax
> > + /* Fast path for return zero. */
> > jnz L(return_vec_0)
> > /* No ymm register was touched. */
> > ret
> >
> > # ifdef USE_AS_WMEMCMP
> > + .p2align 4,, 2
> > +L(zero):
> > + xorl %eax, %eax
> > + ret
> > +
> > .p2align 4
> > L(one_or_less):
> > jb L(zero)
> > @@ -520,22 +546,20 @@ L(one_or_less):
> > # else
> >
> > .p2align 4
> > -L(between_4_7):
> > - /* Load as big endian with overlapping movbe to avoid branches.
> > - */
> > - movbe (%rdi), %eax
> > - movbe (%rsi), %ecx
> > - shlq $32, %rax
> > - shlq $32, %rcx
> > - movbe -4(%rdi, %rdx), %edi
> > - movbe -4(%rsi, %rdx), %esi
> > - orq %rdi, %rax
> > - orq %rsi, %rcx
> > - subq %rcx, %rax
> > - jz L(zero_4_7)
> > - sbbl %eax, %eax
> > - orl $1, %eax
> > -L(zero_4_7):
> > +L(between_2_3):
> > + /* Load as big endian to avoid branches. */
> > + movzwl (%rdi), %eax
> > + movzwl (%rsi), %ecx
> > + bswap %eax
> > + bswap %ecx
> > + shrl %eax
> > + shrl %ecx
> > + movzbl -1(%rdi, %rdx), %edi
> > + movzbl -1(%rsi, %rdx), %esi
> > + orl %edi, %eax
> > + orl %esi, %ecx
> > + /* Subtraction is okay because the upper bit is zero. */
> > + subl %ecx, %eax
> > /* No ymm register was touched. */
> > ret
> > # endif
> > --
> > 2.25.1
> >
>
> LGTM.
>
> Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
>
> Thanks.
>
> --
> H.J.
I would like to backport this patch to release branches.
Any comments or objections?
Conflict resolution patch attached.
--Sunil
[-- Attachment #2: 0021-x86-Cleanup-page-cross-code-in-memcmp-avx2-movbe.S.patch --]
[-- Type: application/octet-stream, Size: 8202 bytes --]
From 3d74dd4ebc4098bc7f4b91b2794684427c7694dd Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n@gmail.com>
Date: Fri, 15 Apr 2022 12:28:01 -0500
Subject: [PATCH 21/26] x86: Cleanup page cross code in memcmp-avx2-movbe.S
Old code was both inefficient and wasted code size. New code (-62
bytes) and comparable or better performance in the page cross case.
geometric_mean(N=20) of page cross cases New / Original: 0.960
size, align0, align1, ret, New Time/Old Time
1, 4095, 0, 0, 1.001
1, 4095, 0, 1, 0.999
1, 4095, 0, -1, 1.0
2, 4094, 0, 0, 1.0
2, 4094, 0, 1, 1.0
2, 4094, 0, -1, 1.0
3, 4093, 0, 0, 1.0
3, 4093, 0, 1, 1.0
3, 4093, 0, -1, 1.0
4, 4092, 0, 0, 0.987
4, 4092, 0, 1, 1.0
4, 4092, 0, -1, 1.0
5, 4091, 0, 0, 0.984
5, 4091, 0, 1, 1.002
5, 4091, 0, -1, 1.005
6, 4090, 0, 0, 0.993
6, 4090, 0, 1, 1.001
6, 4090, 0, -1, 1.003
7, 4089, 0, 0, 0.991
7, 4089, 0, 1, 1.0
7, 4089, 0, -1, 1.001
8, 4088, 0, 0, 0.875
8, 4088, 0, 1, 0.881
8, 4088, 0, -1, 0.888
9, 4087, 0, 0, 0.872
9, 4087, 0, 1, 0.879
9, 4087, 0, -1, 0.883
10, 4086, 0, 0, 0.878
10, 4086, 0, 1, 0.886
10, 4086, 0, -1, 0.873
11, 4085, 0, 0, 0.878
11, 4085, 0, 1, 0.881
11, 4085, 0, -1, 0.879
12, 4084, 0, 0, 0.873
12, 4084, 0, 1, 0.889
12, 4084, 0, -1, 0.875
13, 4083, 0, 0, 0.873
13, 4083, 0, 1, 0.863
13, 4083, 0, -1, 0.863
14, 4082, 0, 0, 0.838
14, 4082, 0, 1, 0.869
14, 4082, 0, -1, 0.877
15, 4081, 0, 0, 0.841
15, 4081, 0, 1, 0.869
15, 4081, 0, -1, 0.876
16, 4080, 0, 0, 0.988
16, 4080, 0, 1, 0.99
16, 4080, 0, -1, 0.989
17, 4079, 0, 0, 0.978
17, 4079, 0, 1, 0.981
17, 4079, 0, -1, 0.98
18, 4078, 0, 0, 0.981
18, 4078, 0, 1, 0.98
18, 4078, 0, -1, 0.985
19, 4077, 0, 0, 0.977
19, 4077, 0, 1, 0.979
19, 4077, 0, -1, 0.986
20, 4076, 0, 0, 0.977
20, 4076, 0, 1, 0.986
20, 4076, 0, -1, 0.984
21, 4075, 0, 0, 0.977
21, 4075, 0, 1, 0.983
21, 4075, 0, -1, 0.988
22, 4074, 0, 0, 0.983
22, 4074, 0, 1, 0.994
22, 4074, 0, -1, 0.993
23, 4073, 0, 0, 0.98
23, 4073, 0, 1, 0.992
23, 4073, 0, -1, 0.995
24, 4072, 0, 0, 0.989
24, 4072, 0, 1, 0.989
24, 4072, 0, -1, 0.991
25, 4071, 0, 0, 0.99
25, 4071, 0, 1, 0.999
25, 4071, 0, -1, 0.996
26, 4070, 0, 0, 0.993
26, 4070, 0, 1, 0.995
26, 4070, 0, -1, 0.998
27, 4069, 0, 0, 0.993
27, 4069, 0, 1, 0.999
27, 4069, 0, -1, 1.0
28, 4068, 0, 0, 0.997
28, 4068, 0, 1, 1.0
28, 4068, 0, -1, 0.999
29, 4067, 0, 0, 0.996
29, 4067, 0, 1, 0.999
29, 4067, 0, -1, 0.999
30, 4066, 0, 0, 0.991
30, 4066, 0, 1, 1.001
30, 4066, 0, -1, 0.999
31, 4065, 0, 0, 0.988
31, 4065, 0, 1, 0.998
31, 4065, 0, -1, 0.998
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
(cherry picked from commit 23102686ec67b856a2d4fd25ddaa1c0b8d175c4f)
---
sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S | 98 ++++++++++++--------
1 file changed, 61 insertions(+), 37 deletions(-)
diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
index a34ea1645d..210c9925b6 100644
--- a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
+++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
@@ -429,22 +429,21 @@ L(page_cross_less_vec):
# ifndef USE_AS_WMEMCMP
cmpl $8, %edx
jae L(between_8_15)
+ /* Fall through for [4, 7]. */
cmpl $4, %edx
- jae L(between_4_7)
+ jb L(between_2_3)
- /* Load as big endian to avoid branches. */
- movzwl (%rdi), %eax
- movzwl (%rsi), %ecx
- shll $8, %eax
- shll $8, %ecx
- bswap %eax
- bswap %ecx
- movzbl -1(%rdi, %rdx), %edi
- movzbl -1(%rsi, %rdx), %esi
- orl %edi, %eax
- orl %esi, %ecx
- /* Subtraction is okay because the upper 8 bits are zero. */
- subl %ecx, %eax
+ movbe (%rdi), %eax
+ movbe (%rsi), %ecx
+ shlq $32, %rax
+ shlq $32, %rcx
+ movbe -4(%rdi, %rdx), %edi
+ movbe -4(%rsi, %rdx), %esi
+ orq %rdi, %rax
+ orq %rsi, %rcx
+ subq %rcx, %rax
+ /* Fast path for return zero. */
+ jnz L(ret_nonzero)
/* No ymm register was touched. */
ret
@@ -457,9 +456,33 @@ L(one_or_less):
/* No ymm register was touched. */
ret
+ .p2align 4,, 5
+L(ret_nonzero):
+ sbbl %eax, %eax
+ orl $1, %eax
+ /* No ymm register was touched. */
+ ret
+
+ .p2align 4,, 2
+L(zero):
+ xorl %eax, %eax
+ /* No ymm register was touched. */
+ ret
+
.p2align 4
L(between_8_15):
-# endif
+ movbe (%rdi), %rax
+ movbe (%rsi), %rcx
+ subq %rcx, %rax
+ jnz L(ret_nonzero)
+ movbe -8(%rdi, %rdx), %rax
+ movbe -8(%rsi, %rdx), %rcx
+ subq %rcx, %rax
+ /* Fast path for return zero. */
+ jnz L(ret_nonzero)
+ /* No ymm register was touched. */
+ ret
+# else
/* If USE_AS_WMEMCMP fall through into 8-15 byte case. */
vmovq (%rdi), %xmm1
vmovq (%rsi), %xmm2
@@ -475,16 +498,13 @@ L(between_8_15):
VPCMPEQ %xmm1, %xmm2, %xmm2
vpmovmskb %xmm2, %eax
subl $0xffff, %eax
+ /* Fast path for return zero. */
jnz L(return_vec_0)
/* No ymm register was touched. */
ret
+# endif
- .p2align 4
-L(zero):
- xorl %eax, %eax
- ret
-
- .p2align 4
+ .p2align 4,, 10
L(between_16_31):
/* From 16 to 31 bytes. No branch when size == 16. */
vmovdqu (%rsi), %xmm2
@@ -501,11 +521,17 @@ L(between_16_31):
VPCMPEQ (%rdi), %xmm2, %xmm2
vpmovmskb %xmm2, %eax
subl $0xffff, %eax
+ /* Fast path for return zero. */
jnz L(return_vec_0)
/* No ymm register was touched. */
ret
# ifdef USE_AS_WMEMCMP
+ .p2align 4,, 2
+L(zero):
+ xorl %eax, %eax
+ ret
+
.p2align 4
L(one_or_less):
jb L(zero)
@@ -520,22 +546,20 @@ L(one_or_less):
# else
.p2align 4
-L(between_4_7):
- /* Load as big endian with overlapping movbe to avoid branches.
- */
- movbe (%rdi), %eax
- movbe (%rsi), %ecx
- shlq $32, %rax
- shlq $32, %rcx
- movbe -4(%rdi, %rdx), %edi
- movbe -4(%rsi, %rdx), %esi
- orq %rdi, %rax
- orq %rsi, %rcx
- subq %rcx, %rax
- jz L(zero_4_7)
- sbbl %eax, %eax
- orl $1, %eax
-L(zero_4_7):
+L(between_2_3):
+ /* Load as big endian to avoid branches. */
+ movzwl (%rdi), %eax
+ movzwl (%rsi), %ecx
+ bswap %eax
+ bswap %ecx
+ shrl %eax
+ shrl %ecx
+ movzbl -1(%rdi, %rdx), %edi
+ movzbl -1(%rsi, %rdx), %esi
+ orl %edi, %eax
+ orl %esi, %ecx
+ /* Subtraction is okay because the upper bit is zero. */
+ subl %ecx, %eax
/* No ymm register was touched. */
ret
# endif
--
2.35.1
prev parent reply other threads:[~2022-05-12 20:04 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20220415055132.1257272-1-goldstein.w.n@gmail.com>
[not found] ` <20220415172801.1525674-1-goldstein.w.n@gmail.com>
[not found] ` <CAMe9rOrZGGy6-wRu1V+20bBJUZH_D_aOWzwxTp-BAyZ8ANB7YA@mail.gmail.com>
[not found] ` <CAFUsyfLpQLwN=H5Tnhsqn0vK3i=a5LE-G_rV3Z6QxShtkJRm3g@mail.gmail.com>
[not found] ` <alpine.DEB.2.22.394.2204192028050.18024@digraph.polyomino.org.uk>
[not found] ` <CAFUsyfJt_HOQ1msmj_xSfAYm_7jB-XZg-7jNepXSD-Xd7Ui2nw@mail.gmail.com>
[not found] ` <CAFUsyfLOTir+QsJOnhmQiU1n7jw4NP8=TrAecgMbww5HEFG=-g@mail.gmail.com>
[not found] ` <CAFUsyfKre7d72QZtAx_E67vP7x-N8fuePorgg5vhe+5HDPQaVA@mail.gmail.com>
2022-05-12 19:59 ` [PATCH v3 1/3] x86: Optimize memcmp SSE2 in memcmp.S Sunil Pandey
[not found] ` <20220415172801.1525674-2-goldstein.w.n@gmail.com>
[not found] ` <CAMe9rOr2Cr4FRJDY2p7J3HVtDt4VmQAkt-MPtETj-+cCfvUJ0w@mail.gmail.com>
2022-05-12 20:01 ` [PATCH v3 2/3] x86: Remove memcmp-sse4.S Sunil Pandey
[not found] ` <20220415172801.1525674-3-goldstein.w.n@gmail.com>
[not found] ` <CAMe9rOofRMrG0Hvjq42r_c-24juXbS+rRb7QXKGjetmSp93Uwg@mail.gmail.com>
2022-05-12 20:03 ` Sunil Pandey [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=CAMAf5_fyRHKEKt-k7P7z5kegqaZT6JnJCP_rsqQk8oYTO48mnw@mail.gmail.com \
--to=skpgkp2@gmail.com \
--cc=goldstein.w.n@gmail.com \
--cc=hjl.tools@gmail.com \
--cc=libc-alpha@sourceware.org \
--cc=libc-stable@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).