From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 7852) id D2512385800E; Tue, 8 Mar 2022 05:45:20 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org D2512385800E Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Sunil Pandey To: glibc-cvs@sourceware.org Subject: [glibc] x86_64: Fix svml_s_atanf16_core_avx512.S code formatting X-Act-Checkin: glibc X-Git-Author: Sunil K Pandey X-Git-Refname: refs/heads/master X-Git-Oldrev: f2857115061eb25e0e0ca708f0ca740fbaa8972e X-Git-Newrev: 67a8f9b86fa0f265bf67674e557b4ccaea81accb Message-Id: <20220308054520.D2512385800E@sourceware.org> Date: Tue, 8 Mar 2022 05:45:20 +0000 (GMT) X-BeenThere: glibc-cvs@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Glibc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 08 Mar 2022 05:45:20 -0000 https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=67a8f9b86fa0f265bf67674e557b4ccaea81accb commit 67a8f9b86fa0f265bf67674e557b4ccaea81accb Author: Sunil K Pandey Date: Mon Mar 7 10:47:09 2022 -0800 x86_64: Fix svml_s_atanf16_core_avx512.S code formatting This commit contains following formatting changes 1. Instructions proceeded by a tab. 2. Instruction less than 8 characters in length have a tab between it and the first operand. 3. Instruction greater than 7 characters in length have a space between it and the first operand. 4. Tabs after `#define`d names and their value. 5. 8 space at the beginning of line replaced by tab. 6. Indent comments with code. 7. Remove redundent .text section. 8. 1 space between line content and line comment. 9. Space after all commas. Reviewed-by: Noah Goldstein Diff: --- .../fpu/multiarch/svml_s_atanf16_core_avx512.S | 257 ++++++++++----------- 1 file changed, 128 insertions(+), 129 deletions(-) diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_atanf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_atanf16_core_avx512.S index 4285a4ba42..62d96d13ea 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_s_atanf16_core_avx512.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_s_atanf16_core_avx512.S @@ -30,145 +30,144 @@ /* Offsets for data table __svml_satan_data_internal_avx512 */ -#define AbsMask 0 -#define Shifter 64 -#define MaxThreshold 128 -#define MOne 192 -#define One 256 -#define LargeX 320 -#define Zero 384 -#define Tbl_H 448 -#define Pi2 576 -#define coeff_1 640 -#define coeff_2 704 -#define coeff_3 768 +#define AbsMask 0 +#define Shifter 64 +#define MaxThreshold 128 +#define MOne 192 +#define One 256 +#define LargeX 320 +#define Zero 384 +#define Tbl_H 448 +#define Pi2 576 +#define coeff_1 640 +#define coeff_2 704 +#define coeff_3 768 #include - .text - .section .text.exex512,"ax",@progbits + .section .text.exex512, "ax", @progbits ENTRY(_ZGVeN16v_atanf_skx) - vandps __svml_satan_data_internal_avx512(%rip), %zmm0, %zmm7 - vmovups MaxThreshold+__svml_satan_data_internal_avx512(%rip), %zmm3 - vmovups One+__svml_satan_data_internal_avx512(%rip), %zmm8 - -/* round to 2 bits after binary point */ - vreduceps $40, {sae}, %zmm7, %zmm5 - -/* saturate X range */ - vmovups LargeX+__svml_satan_data_internal_avx512(%rip), %zmm6 - vmovups Shifter+__svml_satan_data_internal_avx512(%rip), %zmm2 - vcmpps $29, {sae}, %zmm3, %zmm7, %k1 - -/* table lookup sequence */ - vmovups Tbl_H+__svml_satan_data_internal_avx512(%rip), %zmm3 - vsubps {rn-sae}, %zmm5, %zmm7, %zmm4 - vaddps {rn-sae}, %zmm2, %zmm7, %zmm1 - vxorps %zmm0, %zmm7, %zmm0 - vfmadd231ps {rn-sae}, %zmm7, %zmm4, %zmm8 - vmovups coeff_2+__svml_satan_data_internal_avx512(%rip), %zmm4 - -/* if|X|>=MaxThreshold, set DiffX=-1 */ - vblendmps MOne+__svml_satan_data_internal_avx512(%rip), %zmm5, %zmm9{%k1} - vmovups coeff_3+__svml_satan_data_internal_avx512(%rip), %zmm5 - -/* if|X|>=MaxThreshold, set Y=X */ - vminps {sae}, %zmm7, %zmm6, %zmm8{%k1} - -/* R+Rl = DiffX/Y */ - vgetmantps $0, {sae}, %zmm9, %zmm12 - vgetexpps {sae}, %zmm9, %zmm10 - vpermt2ps Tbl_H+64+__svml_satan_data_internal_avx512(%rip), %zmm1, %zmm3 - vgetmantps $0, {sae}, %zmm8, %zmm15 - vgetexpps {sae}, %zmm8, %zmm11 - vmovups coeff_1+__svml_satan_data_internal_avx512(%rip), %zmm1 - -/* set table value to Pi/2 for large X */ - vblendmps Pi2+__svml_satan_data_internal_avx512(%rip), %zmm3, %zmm9{%k1} - vrcp14ps %zmm15, %zmm13 - vsubps {rn-sae}, %zmm11, %zmm10, %zmm2 - vmulps {rn-sae}, %zmm13, %zmm12, %zmm14 - vfnmadd213ps {rn-sae}, %zmm12, %zmm14, %zmm15 - vfmadd213ps {rn-sae}, %zmm14, %zmm13, %zmm15 - vscalefps {rn-sae}, %zmm2, %zmm15, %zmm7 - -/* polynomial evaluation */ - vmulps {rn-sae}, %zmm7, %zmm7, %zmm8 - vmulps {rn-sae}, %zmm7, %zmm8, %zmm6 - vfmadd231ps {rn-sae}, %zmm8, %zmm1, %zmm4 - vfmadd213ps {rn-sae}, %zmm5, %zmm4, %zmm8 - vfmadd213ps {rn-sae}, %zmm7, %zmm6, %zmm8 - vaddps {rn-sae}, %zmm9, %zmm8, %zmm10 - vxorps %zmm0, %zmm10, %zmm0 - ret + vandps __svml_satan_data_internal_avx512(%rip), %zmm0, %zmm7 + vmovups MaxThreshold+__svml_satan_data_internal_avx512(%rip), %zmm3 + vmovups One+__svml_satan_data_internal_avx512(%rip), %zmm8 + + /* round to 2 bits after binary point */ + vreduceps $40, {sae}, %zmm7, %zmm5 + + /* saturate X range */ + vmovups LargeX+__svml_satan_data_internal_avx512(%rip), %zmm6 + vmovups Shifter+__svml_satan_data_internal_avx512(%rip), %zmm2 + vcmpps $29, {sae}, %zmm3, %zmm7, %k1 + + /* table lookup sequence */ + vmovups Tbl_H+__svml_satan_data_internal_avx512(%rip), %zmm3 + vsubps {rn-sae}, %zmm5, %zmm7, %zmm4 + vaddps {rn-sae}, %zmm2, %zmm7, %zmm1 + vxorps %zmm0, %zmm7, %zmm0 + vfmadd231ps {rn-sae}, %zmm7, %zmm4, %zmm8 + vmovups coeff_2+__svml_satan_data_internal_avx512(%rip), %zmm4 + + /* if|X|>=MaxThreshold, set DiffX=-1 */ + vblendmps MOne+__svml_satan_data_internal_avx512(%rip), %zmm5, %zmm9{%k1} + vmovups coeff_3+__svml_satan_data_internal_avx512(%rip), %zmm5 + + /* if|X|>=MaxThreshold, set Y=X */ + vminps {sae}, %zmm7, %zmm6, %zmm8{%k1} + + /* R+Rl = DiffX/Y */ + vgetmantps $0, {sae}, %zmm9, %zmm12 + vgetexpps {sae}, %zmm9, %zmm10 + vpermt2ps Tbl_H+64+__svml_satan_data_internal_avx512(%rip), %zmm1, %zmm3 + vgetmantps $0, {sae}, %zmm8, %zmm15 + vgetexpps {sae}, %zmm8, %zmm11 + vmovups coeff_1+__svml_satan_data_internal_avx512(%rip), %zmm1 + + /* set table value to Pi/2 for large X */ + vblendmps Pi2+__svml_satan_data_internal_avx512(%rip), %zmm3, %zmm9{%k1} + vrcp14ps %zmm15, %zmm13 + vsubps {rn-sae}, %zmm11, %zmm10, %zmm2 + vmulps {rn-sae}, %zmm13, %zmm12, %zmm14 + vfnmadd213ps {rn-sae}, %zmm12, %zmm14, %zmm15 + vfmadd213ps {rn-sae}, %zmm14, %zmm13, %zmm15 + vscalefps {rn-sae}, %zmm2, %zmm15, %zmm7 + + /* polynomial evaluation */ + vmulps {rn-sae}, %zmm7, %zmm7, %zmm8 + vmulps {rn-sae}, %zmm7, %zmm8, %zmm6 + vfmadd231ps {rn-sae}, %zmm8, %zmm1, %zmm4 + vfmadd213ps {rn-sae}, %zmm5, %zmm4, %zmm8 + vfmadd213ps {rn-sae}, %zmm7, %zmm6, %zmm8 + vaddps {rn-sae}, %zmm9, %zmm8, %zmm10 + vxorps %zmm0, %zmm10, %zmm0 + ret END(_ZGVeN16v_atanf_skx) - .section .rodata, "a" - .align 64 + .section .rodata, "a" + .align 64 #ifdef __svml_satan_data_internal_avx512_typedef typedef unsigned int VUINT32; typedef struct { - __declspec(align(64)) VUINT32 AbsMask[16][1]; - __declspec(align(64)) VUINT32 Shifter[16][1]; - __declspec(align(64)) VUINT32 MaxThreshold[16][1]; - __declspec(align(64)) VUINT32 MOne[16][1]; - __declspec(align(64)) VUINT32 One[16][1]; - __declspec(align(64)) VUINT32 LargeX[16][1]; - __declspec(align(64)) VUINT32 Zero[16][1]; - __declspec(align(64)) VUINT32 Tbl_H[32][1]; - __declspec(align(64)) VUINT32 Pi2[16][1]; - __declspec(align(64)) VUINT32 coeff[3][16][1]; - } __svml_satan_data_internal_avx512; + __declspec(align(64)) VUINT32 AbsMask[16][1]; + __declspec(align(64)) VUINT32 Shifter[16][1]; + __declspec(align(64)) VUINT32 MaxThreshold[16][1]; + __declspec(align(64)) VUINT32 MOne[16][1]; + __declspec(align(64)) VUINT32 One[16][1]; + __declspec(align(64)) VUINT32 LargeX[16][1]; + __declspec(align(64)) VUINT32 Zero[16][1]; + __declspec(align(64)) VUINT32 Tbl_H[32][1]; + __declspec(align(64)) VUINT32 Pi2[16][1]; + __declspec(align(64)) VUINT32 coeff[3][16][1]; +} __svml_satan_data_internal_avx512; #endif __svml_satan_data_internal_avx512: - /*== AbsMask ==*/ - .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff - /*== Shifter ==*/ - .align 64 - .long 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000 - /*== MaxThreshold ==*/ - .align 64 - .long 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000 - /*== MOne ==*/ - .align 64 - .long 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000 - /*== One ==*/ - .align 64 - .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 - /*== LargeX ==*/ - .align 64 - .long 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000 - /*== Zero ==*/ - .align 64 - .long 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 - /*== Tbl_H ==*/ - .align 64 - .long 0x00000000, 0x3e7adbb0 - .long 0x3eed6338, 0x3f24bc7d - .long 0x3f490fdb, 0x3f6563e3 - .long 0x3f7b985f, 0x3f869c79 - .long 0x3f8db70d, 0x3f93877b - .long 0x3f985b6c, 0x3f9c6b53 - .long 0x3f9fe0bb, 0x3fa2daa4 - .long 0x3fa57088, 0x3fa7b46f - .long 0x3fa9b465, 0x3fab7b7a - .long 0x3fad1283, 0x3fae809e - .long 0x3fafcb99, 0x3fb0f836 - .long 0x3fb20a6a, 0x3fb30581 - .long 0x3fb3ec43, 0x3fb4c10a - .long 0x3fb585d7, 0x3fb63c64 - .long 0x3fb6e62c, 0x3fb78478 - .long 0x3fb81868, 0x3fb8a2f5 - /*== Pi2 ==*/ - .align 64 - .long 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB - /*== coeff3 ==*/ - .align 64 - .long 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de - .long 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2 - .long 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa - .align 64 - .type __svml_satan_data_internal_avx512,@object - .size __svml_satan_data_internal_avx512,.-__svml_satan_data_internal_avx512 + /* AbsMask */ + .long 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff + /* Shifter */ + .align 64 + .long 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000, 0x4a000000 + /* MaxThreshold */ + .align 64 + .long 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000, 0x40F80000 + /* MOne */ + .align 64 + .long 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000 + /* One */ + .align 64 + .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 + /* LargeX */ + .align 64 + .long 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000, 0x4f800000 + /* Zero */ + .align 64 + .long 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 + /* Tbl_H */ + .align 64 + .long 0x00000000, 0x3e7adbb0 + .long 0x3eed6338, 0x3f24bc7d + .long 0x3f490fdb, 0x3f6563e3 + .long 0x3f7b985f, 0x3f869c79 + .long 0x3f8db70d, 0x3f93877b + .long 0x3f985b6c, 0x3f9c6b53 + .long 0x3f9fe0bb, 0x3fa2daa4 + .long 0x3fa57088, 0x3fa7b46f + .long 0x3fa9b465, 0x3fab7b7a + .long 0x3fad1283, 0x3fae809e + .long 0x3fafcb99, 0x3fb0f836 + .long 0x3fb20a6a, 0x3fb30581 + .long 0x3fb3ec43, 0x3fb4c10a + .long 0x3fb585d7, 0x3fb63c64 + .long 0x3fb6e62c, 0x3fb78478 + .long 0x3fb81868, 0x3fb8a2f5 + /* Pi2 */ + .align 64 + .long 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB, 0x3fc90FDB + /* coeff3 */ + .align 64 + .long 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de, 0xbe0fa8de + .long 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2, 0x3e4cc8e2 + .long 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa, 0xbeaaaaaa + .align 64 + .type __svml_satan_data_internal_avx512, @object + .size __svml_satan_data_internal_avx512, .-__svml_satan_data_internal_avx512