From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1791) id 0883C3858D37; Thu, 2 Mar 2023 20:19:16 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 0883C3858D37 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1677788356; bh=bPcNdc/IsttraG7RZ2uxMSdKDLY3Gz0fhx7R4SdevPo=; h=From:To:Subject:Date:From; b=ofmw+XHQBYmA/dLXZiXAj1wz8NtLMlWvGyCD4ZjJq/IsgOsuusfW79xCdOJ8bhj9R 0b+GGLCO5Y+kvuYSzzbLqXl12ykxobF3xIlnU2NaUl0TGfdbA/2D96z/TdsgT+oSsp MlYPn8b5MIVEmXLI8c514L1Uuef3Uviu1ncmWTAA= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Adhemerval Zanella To: glibc-cvs@sourceware.org Subject: [glibc] alpha: Remove strncmp optimization X-Act-Checkin: glibc X-Git-Author: Adhemerval Zanella Netto X-Git-Refname: refs/heads/master X-Git-Oldrev: 92fdb11ae7aa1ab6b18622670ea702205cd6fdc5 X-Git-Newrev: 90ae3bc393164e260269e9e1711f7ca4956596e9 Message-Id: <20230302201916.0883C3858D37@sourceware.org> Date: Thu, 2 Mar 2023 20:19:16 +0000 (GMT) List-Id: https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=90ae3bc393164e260269e9e1711f7ca4956596e9 commit 90ae3bc393164e260269e9e1711f7ca4956596e9 Author: Adhemerval Zanella Netto Date: Tue Feb 28 14:24:00 2023 -0300 alpha: Remove strncmp optimization The generic implementation already cover word access along with cmpbge for both aligned and unaligned, so use it instead. Checked qemu static for alpha-linux-gnu. Diff: --- sysdeps/alpha/strncmp.S | 276 ------------------------------------------------ 1 file changed, 276 deletions(-) diff --git a/sysdeps/alpha/strncmp.S b/sysdeps/alpha/strncmp.S deleted file mode 100644 index a54cc45a0a..0000000000 --- a/sysdeps/alpha/strncmp.S +++ /dev/null @@ -1,276 +0,0 @@ -/* Copyright (C) 1996-2023 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library. If not, see - . */ - -/* Bytewise compare two null-terminated strings of length no longer than N. */ - -#include - - .set noat - .set noreorder - -/* EV6 only predicts one branch per octaword. We'll use these to push - subsequent branches back to the next bundle. This will generally add - a fetch+decode cycle to older machines, so skip in that case. */ -#ifdef __alpha_fix__ -# define ev6_unop unop -#else -# define ev6_unop -#endif - - .text - -ENTRY(strncmp) -#ifdef PROF - ldgp gp, 0(pv) - lda AT, _mcount - jsr AT, (AT), _mcount - .prologue 1 -#else - .prologue 0 -#endif - - xor a0, a1, t2 # are s1 and s2 co-aligned? - beq a2, $zerolength - ldq_u t0, 0(a0) # load asap to give cache time to catch up - ldq_u t1, 0(a1) - lda t3, -1 - and t2, 7, t2 - srl t3, 1, t6 - and a0, 7, t4 # find s1 misalignment - and a1, 7, t5 # find s2 misalignment - cmovlt a2, t6, a2 # bound neg count to LONG_MAX - addq a1, a2, a3 # s2+count - addq a2, t4, a2 # bias count by s1 misalignment - and a2, 7, t10 # ofs of last byte in s1 last word - srl a2, 3, a2 # remaining full words in s1 count - bne t2, $unaligned - - /* On entry to this basic block: - t0 == the first word of s1. - t1 == the first word of s2. - t3 == -1. */ -$aligned: - mskqh t3, a1, t8 # mask off leading garbage - ornot t1, t8, t1 - ornot t0, t8, t0 - cmpbge zero, t1, t7 # bits set iff null found - beq a2, $eoc # check end of count - bne t7, $eos - beq t10, $ant_loop - - /* Aligned compare main loop. - On entry to this basic block: - t0 == an s1 word. - t1 == an s2 word not containing a null. */ - - .align 4 -$a_loop: - xor t0, t1, t2 # e0 : - bne t2, $wordcmp # .. e1 (zdb) - ldq_u t1, 8(a1) # e0 : - ldq_u t0, 8(a0) # .. e1 : - - subq a2, 1, a2 # e0 : - addq a1, 8, a1 # .. e1 : - addq a0, 8, a0 # e0 : - beq a2, $eoc # .. e1 : - - cmpbge zero, t1, t7 # e0 : - beq t7, $a_loop # .. e1 : - - br $eos - - /* Alternate aligned compare loop, for when there's no trailing - bytes on the count. We have to avoid reading too much data. */ - .align 4 -$ant_loop: - xor t0, t1, t2 # e0 : - ev6_unop - ev6_unop - bne t2, $wordcmp # .. e1 (zdb) - - subq a2, 1, a2 # e0 : - beq a2, $zerolength # .. e1 : - ldq_u t1, 8(a1) # e0 : - ldq_u t0, 8(a0) # .. e1 : - - addq a1, 8, a1 # e0 : - addq a0, 8, a0 # .. e1 : - cmpbge zero, t1, t7 # e0 : - beq t7, $ant_loop # .. e1 : - - br $eos - - /* The two strings are not co-aligned. Align s1 and cope. */ - /* On entry to this basic block: - t0 == the first word of s1. - t1 == the first word of s2. - t3 == -1. - t4 == misalignment of s1. - t5 == misalignment of s2. - t10 == misalignment of s1 end. */ - .align 4 -$unaligned: - /* If s1 misalignment is larger than s2 misalignment, we need - extra startup checks to avoid SEGV. */ - subq a1, t4, a1 # adjust s2 for s1 misalignment - cmpult t4, t5, t9 - subq a3, 1, a3 # last byte of s2 - bic a1, 7, t8 - mskqh t3, t5, t7 # mask garbage in s2 - subq a3, t8, a3 - ornot t1, t7, t7 - srl a3, 3, a3 # remaining full words in s2 count - beq t9, $u_head - - /* Failing that, we need to look for both eos and eoc within the - first word of s2. If we find either, we can continue by - pretending that the next word of s2 is all zeros. */ - lda t2, 0 # next = zero - cmpeq a3, 0, t8 # eoc in the first word of s2? - cmpbge zero, t7, t7 # eos in the first word of s2? - or t7, t8, t8 - bne t8, $u_head_nl - - /* We know just enough now to be able to assemble the first - full word of s2. We can still find a zero at the end of it. - - On entry to this basic block: - t0 == first word of s1 - t1 == first partial word of s2. - t3 == -1. - t10 == ofs of last byte in s1 last word. - t11 == ofs of last byte in s2 last word. */ -$u_head: - ldq_u t2, 8(a1) # load second partial s2 word - subq a3, 1, a3 -$u_head_nl: - extql t1, a1, t1 # create first s2 word - mskqh t3, a0, t8 - extqh t2, a1, t4 - ornot t0, t8, t0 # kill s1 garbage - or t1, t4, t1 # s2 word now complete - cmpbge zero, t0, t7 # find eos in first s1 word - ornot t1, t8, t1 # kill s2 garbage - beq a2, $eoc - subq a2, 1, a2 - bne t7, $eos - mskql t3, a1, t8 # mask out s2[1] bits we have seen - xor t0, t1, t4 # compare aligned words - or t2, t8, t8 - bne t4, $wordcmp - cmpbge zero, t8, t7 # eos in high bits of s2[1]? - cmpeq a3, 0, t8 # eoc in s2[1]? - or t7, t8, t7 - bne t7, $u_final - - /* Unaligned copy main loop. In order to avoid reading too much, - the loop is structured to detect zeros in aligned words from s2. - This has, unfortunately, effectively pulled half of a loop - iteration out into the head and half into the tail, but it does - prevent nastiness from accumulating in the very thing we want - to run as fast as possible. - - On entry to this basic block: - t2 == the unshifted low-bits from the next s2 word. - t10 == ofs of last byte in s1 last word. - t11 == ofs of last byte in s2 last word. */ - .align 4 -$u_loop: - extql t2, a1, t3 # e0 : - ldq_u t2, 16(a1) # .. e1 : load next s2 high bits - ldq_u t0, 8(a0) # e0 : load next s1 word - addq a1, 8, a1 # .. e1 : - - addq a0, 8, a0 # e0 : - subq a3, 1, a3 # .. e1 : - extqh t2, a1, t1 # e0 : - cmpbge zero, t0, t7 # .. e1 : eos in current s1 word - - or t1, t3, t1 # e0 : - beq a2, $eoc # .. e1 : eoc in current s1 word - subq a2, 1, a2 # e0 : - cmpbge zero, t2, t4 # .. e1 : eos in s2[1] - - xor t0, t1, t3 # e0 : compare the words - ev6_unop - ev6_unop - bne t7, $eos # .. e1 : - - cmpeq a3, 0, t5 # e0 : eoc in s2[1] - ev6_unop - ev6_unop - bne t3, $wordcmp # .. e1 : - - or t4, t5, t4 # e0 : eos or eoc in s2[1]. - beq t4, $u_loop # .. e1 (zdb) - - /* We've found a zero in the low bits of the last s2 word. Get - the next s1 word and align them. */ - .align 3 -$u_final: - ldq_u t0, 8(a0) - extql t2, a1, t1 - cmpbge zero, t1, t7 - bne a2, $eos - - /* We've hit end of count. Zero everything after the count - and compare whats left. */ - .align 3 -$eoc: - mskql t0, t10, t0 - mskql t1, t10, t1 - cmpbge zero, t1, t7 - - /* We've found a zero somewhere in a word we just read. - On entry to this basic block: - t0 == s1 word - t1 == s2 word - t7 == cmpbge mask containing the zero. */ - .align 3 -$eos: - negq t7, t6 # create bytemask of valid data - and t6, t7, t8 - subq t8, 1, t6 - or t6, t8, t7 - zapnot t0, t7, t0 # kill the garbage - zapnot t1, t7, t1 - xor t0, t1, v0 # ... and compare - beq v0, $done - - /* Here we have two differing co-aligned words in t0 & t1. - Bytewise compare them and return (t0 > t1 ? 1 : -1). */ - .align 3 -$wordcmp: - cmpbge t0, t1, t2 # comparison yields bit mask of ge - cmpbge t1, t0, t3 - xor t2, t3, t0 # bits set iff t0/t1 bytes differ - negq t0, t1 # clear all but least bit - and t0, t1, t0 - lda v0, -1 - and t0, t2, t1 # was bit set in t0 > t1? - cmovne t1, 1, v0 -$done: - ret - - .align 3 -$zerolength: - clr v0 - ret - - END(strncmp) -libc_hidden_builtin_def (strncmp)