public inbox for glibc-cvs@sourceware.org
help / color / mirror / Atom feed
* [glibc] x86_64: Fix svml_s_atanf16_core_avx512.S code formatting
@ 2022-03-08 5:45 Sunil Pandey
0 siblings, 0 replies; only message in thread
From: Sunil Pandey @ 2022-03-08 5:45 UTC (permalink / raw)
To: glibc-cvs
https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=67a8f9b86fa0f265bf67674e557b4ccaea81accb
commit 67a8f9b86fa0f265bf67674e557b4ccaea81accb
Author: Sunil K Pandey <skpgkp2@gmail.com>
Date: Mon Mar 7 10:47:09 2022 -0800
x86_64: Fix svml_s_atanf16_core_avx512.S code formatting
This commit contains following formatting changes
1. Instructions proceeded by a tab.
2. Instruction less than 8 characters in length have a tab
between it and the first operand.
3. Instruction greater than 7 characters in length have a
space between it and the first operand.
4. Tabs after `#define`d names and their value.
5. 8 space at the beginning of line replaced by tab.
6. Indent comments with code.
7. Remove redundent .text section.
8. 1 space between line content and line comment.
9. Space after all commas.
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
Diff:
---
.../fpu/multiarch/svml_s_atanf16_core_avx512.S | 257 ++++++++++-----------
1 file changed, 128 insertions(+), 129 deletions(-)
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_atanf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_atanf16_core_avx512.S
index 4285a4ba42..62d96d13ea 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_atanf16_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_atanf16_core_avx512.S
@@ -30,145 +30,144 @@
/* Offsets for data table __svml_satan_data_internal_avx512
*/
-#define AbsMask 0
-#define Shifter 64
-#define MaxThreshold 128
-#define MOne 192
-#define One 256
-#define LargeX 320
-#define Zero 384
-#define Tbl_H 448
-#define Pi2 576
-#define coeff_1 640
-#define coeff_2 704
-#define coeff_3 768
+#define AbsMask 0
+#define Shifter 64
+#define MaxThreshold 128
+#define MOne 192
+#define One 256
+#define LargeX 320
+#define Zero 384
+#define Tbl_H 448
+#define Pi2 576
+#define coeff_1 640
+#define coeff_2 704
+#define coeff_3 768
#include <sysdep.h>
- .text
- .section .text.exex512,"ax",@progbits
+ .section .text.exex512, "ax", @progbits
ENTRY(_ZGVeN16v_atanf_skx)
- vandps __svml_satan_data_internal_avx512(%rip), %zmm0, %zmm7
- vmovups MaxThreshold+__svml_satan_data_internal_avx512(%rip), %zmm3
- vmovups One+__svml_satan_data_internal_avx512(%rip), %zmm8
-
-/* round to 2 bits after binary point */
- vreduceps $40, {sae}, %zmm7, %zmm5
-
-/* saturate X range */
- vmovups LargeX+__svml_satan_data_internal_avx512(%rip), %zmm6
- vmovups Shifter+__svml_satan_data_internal_avx512(%rip), %zmm2
- vcmpps $29, {sae}, %zmm3, %zmm7, %k1
-
-/* table lookup sequence */
- vmovups Tbl_H+__svml_satan_data_internal_avx512(%rip), %zmm3
- vsubps {rn-sae}, %zmm5, %zmm7, %zmm4
- vaddps {rn-sae}, %zmm2, %zmm7, %zmm1
- vxorps %zmm0, %zmm7, %zmm0
- vfmadd231ps {rn-sae}, %zmm7, %zmm4, %zmm8
- vmovups coeff_2+__svml_satan_data_internal_avx512(%rip), %zmm4
-
-/* if|X|>=MaxThreshold, set DiffX=-1 */
- vblendmps MOne+__svml_satan_data_internal_avx512(%rip), %zmm5, %zmm9{%k1}
- vmovups coeff_3+__svml_satan_data_internal_avx512(%rip), %zmm5
-
-/* if|X|>=MaxThreshold, set Y=X */
- vminps {sae}, %zmm7, %zmm6, %zmm8{%k1}
-
-/* R+Rl = DiffX/Y */
- vgetmantps $0, {sae}, %zmm9, %zmm12
- vgetexpps {sae}, %zmm9, %zmm10
- vpermt2ps Tbl_H+64+__svml_satan_data_internal_avx512(%rip), %zmm1, %zmm3
- vgetmantps $0, {sae}, %zmm8, %zmm15
- vgetexpps {sae}, %zmm8, %zmm11
- vmovups coeff_1+__svml_satan_data_internal_avx512(%rip), %zmm1
-
-/* set table value to Pi/2 for large X */
- vblendmps Pi2+__svml_satan_data_internal_avx512(%rip), %zmm3, %zmm9{%k1}
- vrcp14ps %zmm15, %zmm13
- vsubps {rn-sae}, %zmm11, %zmm10, %zmm2
- vmulps {rn-sae}, %zmm13, %zmm12, %zmm14
- vfnmadd213ps {rn-sae}, %zmm12, %zmm14, %zmm15
- vfmadd213ps {rn-sae}, %zmm14, %zmm13, %zmm15
- vscalefps {rn-sae}, %zmm2, %zmm15, %zmm7
-
-/* polynomial evaluation */
- vmulps {rn-sae}, %zmm7, %zmm7, %zmm8
- vmulps {rn-sae}, %zmm7, %zmm8, %zmm6
- vfmadd231ps {rn-sae}, %zmm8, %zmm1, %zmm4
- vfmadd213ps {rn-sae}, %zmm5, %zmm4, %zmm8
- vfmadd213ps {rn-sae}, %zmm7, %zmm6, %zmm8
- vaddps {rn-sae}, %zmm9, %zmm8, %zmm10
- vxorps %zmm0, %zmm10, %zmm0
- ret
+ vandps __svml_satan_data_internal_avx512(%rip), %zmm0, %zmm7
+ vmovups MaxThreshold+__svml_satan_data_internal_avx512(%rip), %zmm3
+ vmovups One+__svml_satan_data_internal_avx512(%rip), %zmm8
+
+ /* round to 2 bits after binary point */
+ vreduceps $40, {sae}, %zmm7, %zmm5
+
+ /* saturate X range */
+ vmovups LargeX+__svml_satan_data_internal_avx512(%rip), %zmm6
+ vmovups Shifter+__svml_satan_data_internal_avx512(%rip), %zmm2
+ vcmpps $29, {sae}, %zmm3, %zmm7, %k1
+
+ /* table lookup sequence */
+ vmovups Tbl_H+__svml_satan_data_internal_avx512(%rip), %zmm3
+ vsubps {rn-sae}, %zmm5, %zmm7, %zmm4
+ vaddps {rn-sae}, %zmm2, %zmm7, %zmm1
+ vxorps %zmm0, %zmm7, %zmm0
+ vfmadd231ps {rn-sae}, %zmm7, %zmm4, %zmm8
+ vmovups coeff_2+__svml_satan_data_internal_avx512(%rip), %zmm4
+
+ /* if|X|>=MaxThreshold, set DiffX=-1 */
+ vblendmps MOne+__svml_satan_data_internal_avx512(%rip), %zmm5, %zmm9{%k1}
+ vmovups coeff_3+__svml_satan_data_internal_avx512(%rip), %zmm5
+
+ /* if|X|>=MaxThreshold, set Y=X */
+ vminps {sae}, %zmm7, %zmm6, %zmm8{%k1}
+
+ /* R+Rl = DiffX/Y */
+ vgetmantps $0, {sae}, %zmm9, %zmm12
+ vgetexpps {sae}, %zmm9, %zmm10
+ vpermt2ps Tbl_H+64+__svml_satan_data_internal_avx512(%rip), %zmm1, %zmm3
+ vgetmantps $0, {sae}, %zmm8, %zmm15
+ vgetexpps {sae}, %zmm8, %zmm11
+ vmovups coeff_1+__svml_satan_data_internal_avx512(%rip), %zmm1
+
+ /* set table value to Pi/2 for large X */
+ vblendmps Pi2+__svml_satan_data_internal_avx512(%rip), %zmm3, %zmm9{%k1}
+ vrcp14ps %zmm15, %zmm13
+ vsubps {rn-sae}, %zmm11, %zmm10, %zmm2
+ vmulps {rn-sae}, %zmm13, %zmm12, %zmm14
+ vfnmadd213ps {rn-sae}, %zmm12, %zmm14, %zmm15
+ vfmadd213ps {rn-sae}, %zmm14, %zmm13, %zmm15
+ vscalefps {rn-sae}, %zmm2, %zmm15, %zmm7
+
+ /* polynomial evaluation */
+ vmulps {rn-sae}, %zmm7, %zmm7, %zmm8
+ vmulps {rn-sae}, %zmm7, %zmm8, %zmm6
+ vfmadd231ps {rn-sae}, %zmm8, %zmm1, %zmm4
+ vfmadd213ps {rn-sae}, %zmm5, %zmm4, %zmm8
+ vfmadd213ps {rn-sae}, %zmm7, %zmm6, %zmm8
+ vaddps {rn-sae}, %zmm9, %zmm8, %zmm10
+ vxorps %zmm0, %zmm10, %zmm0
+ ret
END(_ZGVeN16v_atanf_skx)
- .section .rodata, "a"
- .align 64
+ .section .rodata, "a"
+ .align 64
#ifdef __svml_satan_data_internal_avx512_typedef
typedef unsigned int VUINT32;
typedef struct {
- __declspec(align(64)) VUINT32 AbsMask[16][1];
- __declspec(align(64)) VUINT32 Shifter[16][1];
- __declspec(align(64)) VUINT32 MaxThreshold[16][1];
- __declspec(align(64)) VUINT32 MOne[16][1];
- __declspec(align(64)) VUINT32 One[16][1];
- __declspec(align(64)) VUINT32 LargeX[16][1];
- __declspec(align(64)) VUINT32 Zero[16][1];
- __declspec(align(64)) VUINT32 Tbl_H[32][1];
- __declspec(align(64)) VUINT32 Pi2[16][1];
- __declspec(align(64)) VUINT32 coeff[3][16][1];
- } __svml_satan_data_internal_avx512;
+ __declspec(align(64)) VUINT32 AbsMask[16][1];
+ __declspec(align(64)) VUINT32 Shifter[16][1];
+ __declspec(align(64)) VUINT32 MaxThreshold[16][1];
+ __declspec(align(64)) VUINT32 MOne[16][1];
+ __declspec(align(64)) VUINT32 One[16][1];
+ __declspec(align(64)) VUINT32 LargeX[16][1];
+ __declspec(align(64)) VUINT32 Zero[16][1];
+ __declspec(align(64)) VUINT32 Tbl_H[32][1];
+ __declspec(align(64)) VUINT32 Pi2[16][1];
+ __declspec(align(64)) VUINT32 coeff[3][16][1];
+} __svml_satan_data_internal_avx512;
#endif
__svml_satan_data_internal_avx512:
- /*== AbsMask ==*/
- .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff
- /*== Shifter ==*/
- .align 64
- .long 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000
- /*== MaxThreshold ==*/
- .align 64
- .long 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000
- /*== MOne ==*/
- .align 64
- .long 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000
- /*== One ==*/
- .align 64
- .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
- /*== LargeX ==*/
- .align 64
- .long 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000
- /*== Zero ==*/
- .align 64
- .long 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
- /*== Tbl_H ==*/
- .align 64
- .long 0x00000000, 0x3e7adbb0
- .long 0x3eed6338, 0x3f24bc7d
- .long 0x3f490fdb, 0x3f6563e3
- .long 0x3f7b985f, 0x3f869c79
- .long 0x3f8db70d, 0x3f93877b
- .long 0x3f985b6c, 0x3f9c6b53
- .long 0x3f9fe0bb, 0x3fa2daa4
- .long 0x3fa57088, 0x3fa7b46f
- .long 0x3fa9b465, 0x3fab7b7a
- .long 0x3fad1283, 0x3fae809e
- .long 0x3fafcb99, 0x3fb0f836
- .long 0x3fb20a6a, 0x3fb30581
- .long 0x3fb3ec43, 0x3fb4c10a
- .long 0x3fb585d7, 0x3fb63c64
- .long 0x3fb6e62c, 0x3fb78478
- .long 0x3fb81868, 0x3fb8a2f5
- /*== Pi2 ==*/
- .align 64
- .long 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB
- /*== coeff3 ==*/
- .align 64
- .long 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de
- .long 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2
- .long 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa
- .align 64
- .type __svml_satan_data_internal_avx512,@object
- .size __svml_satan_data_internal_avx512,.-__svml_satan_data_internal_avx512
+ /* AbsMask */
+ .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff
+ /* Shifter */
+ .align 64
+ .long 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000
+ /* MaxThreshold */
+ .align 64
+ .long 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000
+ /* MOne */
+ .align 64
+ .long 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000
+ /* One */
+ .align 64
+ .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
+ /* LargeX */
+ .align 64
+ .long 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000
+ /* Zero */
+ .align 64
+ .long 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
+ /* Tbl_H */
+ .align 64
+ .long 0x00000000, 0x3e7adbb0
+ .long 0x3eed6338, 0x3f24bc7d
+ .long 0x3f490fdb, 0x3f6563e3
+ .long 0x3f7b985f, 0x3f869c79
+ .long 0x3f8db70d, 0x3f93877b
+ .long 0x3f985b6c, 0x3f9c6b53
+ .long 0x3f9fe0bb, 0x3fa2daa4
+ .long 0x3fa57088, 0x3fa7b46f
+ .long 0x3fa9b465, 0x3fab7b7a
+ .long 0x3fad1283, 0x3fae809e
+ .long 0x3fafcb99, 0x3fb0f836
+ .long 0x3fb20a6a, 0x3fb30581
+ .long 0x3fb3ec43, 0x3fb4c10a
+ .long 0x3fb585d7, 0x3fb63c64
+ .long 0x3fb6e62c, 0x3fb78478
+ .long 0x3fb81868, 0x3fb8a2f5
+ /* Pi2 */
+ .align 64
+ .long 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB
+ /* coeff3 */
+ .align 64
+ .long 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de
+ .long 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2
+ .long 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa
+ .align 64
+ .type __svml_satan_data_internal_avx512, @object
+ .size __svml_satan_data_internal_avx512, .-__svml_satan_data_internal_avx512
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2022-03-08 5:45 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-08 5:45 [glibc] x86_64: Fix svml_s_atanf16_core_avx512.S code formatting Sunil Pandey
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).