public inbox for glibc-cvs@sourceware.org
help / color / mirror / Atom feed
* [glibc/google/grte/v5-2.27/master] Makes AArch64 assembly acceptable to clang
@ 2021-08-28 0:42 Fangrui Song
0 siblings, 0 replies; only message in thread
From: Fangrui Song @ 2021-08-28 0:42 UTC (permalink / raw)
To: glibc-cvs
https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=83bede0cfc8ac6c044427844edf4efc59fc6d62b
commit 83bede0cfc8ac6c044427844edf4efc59fc6d62b
Author: Shu-Chun Weng <scw@google.com>
Date: Fri Apr 19 14:47:59 2019 -0700
Makes AArch64 assembly acceptable to clang
According to ARMv8 architecture reference manual section C7.2.188, SIMD MOV (to
general) instruction format is
MOV <Xd>, <Vn>.D[<index>]
gas appears to accept "<Vn>.2D[<index>]" as well, but clang's assembler does
not. C.f. https://community.arm.com/developer/ip-products/processors/f/cortex-a-forum/5214/aarch64-assembly-syntax-for-armclang
Diff:
---
sysdeps/aarch64/memchr.S | 6 +++---
sysdeps/aarch64/strchr.S | 6 +++---
sysdeps/aarch64/strchrnul.S | 6 +++---
sysdeps/aarch64/strrchr.S | 10 +++++-----
4 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/sysdeps/aarch64/memchr.S b/sysdeps/aarch64/memchr.S
index e422aef090..a3b0fc6486 100644
--- a/sysdeps/aarch64/memchr.S
+++ b/sysdeps/aarch64/memchr.S
@@ -91,7 +91,7 @@ ENTRY (__memchr)
and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
addp vend.16b, vend.16b, vend.16b /* 128->64 */
- mov synd, vend.2d[0]
+ mov synd, vend.d[0]
/* Clear the soff*2 lower bits */
lsl tmp, soff, #1
lsr synd, synd, tmp
@@ -111,7 +111,7 @@ L(loop):
/* Use a fast check for the termination condition */
orr vend.16b, vhas_chr1.16b, vhas_chr2.16b
addp vend.2d, vend.2d, vend.2d
- mov synd, vend.2d[0]
+ mov synd, vend.d[0]
/* We're not out of data, loop if we haven't found the character */
cbz synd, L(loop)
@@ -121,7 +121,7 @@ L(end):
and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
addp vend.16b, vhas_chr1.16b, vhas_chr2.16b /* 256->128 */
addp vend.16b, vend.16b, vend.16b /* 128->64 */
- mov synd, vend.2d[0]
+ mov synd, vend.d[0]
/* Only do the clear for the last possible block */
b.hi L(tail)
diff --git a/sysdeps/aarch64/strchr.S b/sysdeps/aarch64/strchr.S
index c27465220b..57fd780dfa 100644
--- a/sysdeps/aarch64/strchr.S
+++ b/sysdeps/aarch64/strchr.S
@@ -94,7 +94,7 @@ ENTRY (strchr)
addp vend1.16b, vend1.16b, vend2.16b // 128->64
lsr tmp1, tmp3, tmp1
- mov tmp3, vend1.2d[0]
+ mov tmp3, vend1.d[0]
bic tmp1, tmp3, tmp1 // Mask padding bits.
cbnz tmp1, L(tail)
@@ -109,7 +109,7 @@ L(loop):
orr vend2.16b, vhas_nul2.16b, vhas_chr2.16b
orr vend1.16b, vend1.16b, vend2.16b
addp vend1.2d, vend1.2d, vend1.2d
- mov tmp1, vend1.2d[0]
+ mov tmp1, vend1.d[0]
cbz tmp1, L(loop)
/* Termination condition found. Now need to establish exactly why
@@ -123,7 +123,7 @@ L(loop):
addp vend1.16b, vend1.16b, vend2.16b // 256->128
addp vend1.16b, vend1.16b, vend2.16b // 128->64
- mov tmp1, vend1.2d[0]
+ mov tmp1, vend1.d[0]
L(tail):
sub src, src, #32
rbit tmp1, tmp1
diff --git a/sysdeps/aarch64/strchrnul.S b/sysdeps/aarch64/strchrnul.S
index e13ace5b7e..7f340d3adf 100644
--- a/sysdeps/aarch64/strchrnul.S
+++ b/sysdeps/aarch64/strchrnul.S
@@ -91,7 +91,7 @@ ENTRY (__strchrnul)
addp vend1.16b, vend1.16b, vend1.16b // 128->64
lsr tmp1, tmp3, tmp1
- mov tmp3, vend1.2d[0]
+ mov tmp3, vend1.d[0]
bic tmp1, tmp3, tmp1 // Mask padding bits.
cbnz tmp1, L(tail)
@@ -106,7 +106,7 @@ L(loop):
orr vhas_chr2.16b, vhas_nul2.16b, vhas_chr2.16b
orr vend1.16b, vhas_chr1.16b, vhas_chr2.16b
addp vend1.2d, vend1.2d, vend1.2d
- mov tmp1, vend1.2d[0]
+ mov tmp1, vend1.d[0]
cbz tmp1, L(loop)
/* Termination condition found. Now need to establish exactly why
@@ -116,7 +116,7 @@ L(loop):
addp vend1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
addp vend1.16b, vend1.16b, vend1.16b // 128->64
- mov tmp1, vend1.2d[0]
+ mov tmp1, vend1.d[0]
L(tail):
/* Count the trailing zeros, by bit reversing... */
rbit tmp1, tmp1
diff --git a/sysdeps/aarch64/strrchr.S b/sysdeps/aarch64/strrchr.S
index aa334ede53..1b7e238f49 100644
--- a/sysdeps/aarch64/strrchr.S
+++ b/sysdeps/aarch64/strrchr.S
@@ -101,10 +101,10 @@ ENTRY(strrchr)
addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b // 128->64
addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b // 128->64
- mov nul_match, vhas_nul1.2d[0]
+ mov nul_match, vhas_nul1.d[0]
lsl tmp1, tmp1, #1
mov const_m1, #~0
- mov chr_match, vhas_chr1.2d[0]
+ mov chr_match, vhas_chr1.d[0]
lsr tmp3, const_m1, tmp1
bic nul_match, nul_match, tmp3 // Mask padding bits.
@@ -127,15 +127,15 @@ L(aligned):
addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
addp vend1.16b, vend1.16b, vend1.16b // 128->64
addp vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b // 128->64
- mov nul_match, vend1.2d[0]
- mov chr_match, vhas_chr1.2d[0]
+ mov nul_match, vend1.d[0]
+ mov chr_match, vhas_chr1.d[0]
cbz nul_match, L(loop)
and vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
and vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b
addp vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b
- mov nul_match, vhas_nul1.2d[0]
+ mov nul_match, vhas_nul1.d[0]
L(tail):
/* Work out exactly where the string ends. */
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2021-08-28 0:42 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-28 0:42 [glibc/google/grte/v5-2.27/master] Makes AArch64 assembly acceptable to clang Fangrui Song
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).