From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1039) id 9ED903858C60; Mon, 6 Dec 2021 16:19:10 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 9ED903858C60 MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: H.J. Lu To: gcc-cvs@gcc.gnu.org Subject: [gcc r12-5806] libsanitizer: Use SSE to save and restore XMM registers X-Act-Checkin: gcc X-Git-Author: H.J. Lu X-Git-Refname: refs/heads/master X-Git-Oldrev: 0dc77a0c4942d3b264f8f8cfc2c509ecc02c3634 X-Git-Newrev: 70b043845d7c378c6a9361a6769885897d1018c2 Message-Id: <20211206161910.9ED903858C60@sourceware.org> Date: Mon, 6 Dec 2021 16:19:10 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 06 Dec 2021 16:19:10 -0000 https://gcc.gnu.org/g:70b043845d7c378c6a9361a6769885897d1018c2 commit r12-5806-g70b043845d7c378c6a9361a6769885897d1018c2 Author: H.J. Lu Date: Tue Nov 30 05:31:26 2021 -0800 libsanitizer: Use SSE to save and restore XMM registers Use SSE, instead of AVX, to save and restore XMM registers to support processors without AVX. The affected codes are unused in upstream since https://github.com/llvm/llvm-project/commit/66d4ce7e26a5 and will be removed in https://reviews.llvm.org/D112604 This fixed FAIL: g++.dg/tsan/pthread_cond_clockwait.C -O0 execution test FAIL: g++.dg/tsan/pthread_cond_clockwait.C -O2 execution test on machines without AVX. PR sanitizer/103466 * tsan/tsan_rtl_amd64.S (__tsan_trace_switch_thunk): Replace vmovdqu with movdqu. (__tsan_report_race_thunk): Likewise. Diff: --- libsanitizer/tsan/tsan_rtl_amd64.S | 128 ++++++++++++++++++------------------- 1 file changed, 64 insertions(+), 64 deletions(-) diff --git a/libsanitizer/tsan/tsan_rtl_amd64.S b/libsanitizer/tsan/tsan_rtl_amd64.S index 632b19d1815..c15b01e49e5 100644 --- a/libsanitizer/tsan/tsan_rtl_amd64.S +++ b/libsanitizer/tsan/tsan_rtl_amd64.S @@ -45,22 +45,22 @@ ASM_SYMBOL(__tsan_trace_switch_thunk): # All XMM registers are caller-saved. sub $0x100, %rsp CFI_ADJUST_CFA_OFFSET(0x100) - vmovdqu %xmm0, 0x0(%rsp) - vmovdqu %xmm1, 0x10(%rsp) - vmovdqu %xmm2, 0x20(%rsp) - vmovdqu %xmm3, 0x30(%rsp) - vmovdqu %xmm4, 0x40(%rsp) - vmovdqu %xmm5, 0x50(%rsp) - vmovdqu %xmm6, 0x60(%rsp) - vmovdqu %xmm7, 0x70(%rsp) - vmovdqu %xmm8, 0x80(%rsp) - vmovdqu %xmm9, 0x90(%rsp) - vmovdqu %xmm10, 0xa0(%rsp) - vmovdqu %xmm11, 0xb0(%rsp) - vmovdqu %xmm12, 0xc0(%rsp) - vmovdqu %xmm13, 0xd0(%rsp) - vmovdqu %xmm14, 0xe0(%rsp) - vmovdqu %xmm15, 0xf0(%rsp) + movdqu %xmm0, 0x0(%rsp) + movdqu %xmm1, 0x10(%rsp) + movdqu %xmm2, 0x20(%rsp) + movdqu %xmm3, 0x30(%rsp) + movdqu %xmm4, 0x40(%rsp) + movdqu %xmm5, 0x50(%rsp) + movdqu %xmm6, 0x60(%rsp) + movdqu %xmm7, 0x70(%rsp) + movdqu %xmm8, 0x80(%rsp) + movdqu %xmm9, 0x90(%rsp) + movdqu %xmm10, 0xa0(%rsp) + movdqu %xmm11, 0xb0(%rsp) + movdqu %xmm12, 0xc0(%rsp) + movdqu %xmm13, 0xd0(%rsp) + movdqu %xmm14, 0xe0(%rsp) + movdqu %xmm15, 0xf0(%rsp) # Align stack frame. push %rbx # non-scratch CFI_ADJUST_CFA_OFFSET(8) @@ -78,22 +78,22 @@ ASM_SYMBOL(__tsan_trace_switch_thunk): pop %rbx CFI_ADJUST_CFA_OFFSET(-8) # Restore scratch registers. - vmovdqu 0x0(%rsp), %xmm0 - vmovdqu 0x10(%rsp), %xmm1 - vmovdqu 0x20(%rsp), %xmm2 - vmovdqu 0x30(%rsp), %xmm3 - vmovdqu 0x40(%rsp), %xmm4 - vmovdqu 0x50(%rsp), %xmm5 - vmovdqu 0x60(%rsp), %xmm6 - vmovdqu 0x70(%rsp), %xmm7 - vmovdqu 0x80(%rsp), %xmm8 - vmovdqu 0x90(%rsp), %xmm9 - vmovdqu 0xa0(%rsp), %xmm10 - vmovdqu 0xb0(%rsp), %xmm11 - vmovdqu 0xc0(%rsp), %xmm12 - vmovdqu 0xd0(%rsp), %xmm13 - vmovdqu 0xe0(%rsp), %xmm14 - vmovdqu 0xf0(%rsp), %xmm15 + movdqu 0x0(%rsp), %xmm0 + movdqu 0x10(%rsp), %xmm1 + movdqu 0x20(%rsp), %xmm2 + movdqu 0x30(%rsp), %xmm3 + movdqu 0x40(%rsp), %xmm4 + movdqu 0x50(%rsp), %xmm5 + movdqu 0x60(%rsp), %xmm6 + movdqu 0x70(%rsp), %xmm7 + movdqu 0x80(%rsp), %xmm8 + movdqu 0x90(%rsp), %xmm9 + movdqu 0xa0(%rsp), %xmm10 + movdqu 0xb0(%rsp), %xmm11 + movdqu 0xc0(%rsp), %xmm12 + movdqu 0xd0(%rsp), %xmm13 + movdqu 0xe0(%rsp), %xmm14 + movdqu 0xf0(%rsp), %xmm15 add $0x100, %rsp CFI_ADJUST_CFA_OFFSET(-0x100) pop %r11 @@ -163,22 +163,22 @@ ASM_SYMBOL(__tsan_report_race_thunk): # All XMM registers are caller-saved. sub $0x100, %rsp CFI_ADJUST_CFA_OFFSET(0x100) - vmovdqu %xmm0, 0x0(%rsp) - vmovdqu %xmm1, 0x10(%rsp) - vmovdqu %xmm2, 0x20(%rsp) - vmovdqu %xmm3, 0x30(%rsp) - vmovdqu %xmm4, 0x40(%rsp) - vmovdqu %xmm5, 0x50(%rsp) - vmovdqu %xmm6, 0x60(%rsp) - vmovdqu %xmm7, 0x70(%rsp) - vmovdqu %xmm8, 0x80(%rsp) - vmovdqu %xmm9, 0x90(%rsp) - vmovdqu %xmm10, 0xa0(%rsp) - vmovdqu %xmm11, 0xb0(%rsp) - vmovdqu %xmm12, 0xc0(%rsp) - vmovdqu %xmm13, 0xd0(%rsp) - vmovdqu %xmm14, 0xe0(%rsp) - vmovdqu %xmm15, 0xf0(%rsp) + movdqu %xmm0, 0x0(%rsp) + movdqu %xmm1, 0x10(%rsp) + movdqu %xmm2, 0x20(%rsp) + movdqu %xmm3, 0x30(%rsp) + movdqu %xmm4, 0x40(%rsp) + movdqu %xmm5, 0x50(%rsp) + movdqu %xmm6, 0x60(%rsp) + movdqu %xmm7, 0x70(%rsp) + movdqu %xmm8, 0x80(%rsp) + movdqu %xmm9, 0x90(%rsp) + movdqu %xmm10, 0xa0(%rsp) + movdqu %xmm11, 0xb0(%rsp) + movdqu %xmm12, 0xc0(%rsp) + movdqu %xmm13, 0xd0(%rsp) + movdqu %xmm14, 0xe0(%rsp) + movdqu %xmm15, 0xf0(%rsp) # Align stack frame. push %rbx # non-scratch CFI_ADJUST_CFA_OFFSET(8) @@ -196,22 +196,22 @@ ASM_SYMBOL(__tsan_report_race_thunk): pop %rbx CFI_ADJUST_CFA_OFFSET(-8) # Restore scratch registers. - vmovdqu 0x0(%rsp), %xmm0 - vmovdqu 0x10(%rsp), %xmm1 - vmovdqu 0x20(%rsp), %xmm2 - vmovdqu 0x30(%rsp), %xmm3 - vmovdqu 0x40(%rsp), %xmm4 - vmovdqu 0x50(%rsp), %xmm5 - vmovdqu 0x60(%rsp), %xmm6 - vmovdqu 0x70(%rsp), %xmm7 - vmovdqu 0x80(%rsp), %xmm8 - vmovdqu 0x90(%rsp), %xmm9 - vmovdqu 0xa0(%rsp), %xmm10 - vmovdqu 0xb0(%rsp), %xmm11 - vmovdqu 0xc0(%rsp), %xmm12 - vmovdqu 0xd0(%rsp), %xmm13 - vmovdqu 0xe0(%rsp), %xmm14 - vmovdqu 0xf0(%rsp), %xmm15 + movdqu 0x0(%rsp), %xmm0 + movdqu 0x10(%rsp), %xmm1 + movdqu 0x20(%rsp), %xmm2 + movdqu 0x30(%rsp), %xmm3 + movdqu 0x40(%rsp), %xmm4 + movdqu 0x50(%rsp), %xmm5 + movdqu 0x60(%rsp), %xmm6 + movdqu 0x70(%rsp), %xmm7 + movdqu 0x80(%rsp), %xmm8 + movdqu 0x90(%rsp), %xmm9 + movdqu 0xa0(%rsp), %xmm10 + movdqu 0xb0(%rsp), %xmm11 + movdqu 0xc0(%rsp), %xmm12 + movdqu 0xd0(%rsp), %xmm13 + movdqu 0xe0(%rsp), %xmm14 + movdqu 0xf0(%rsp), %xmm15 add $0x100, %rsp CFI_ADJUST_CFA_OFFSET(-0x100) pop %r11