public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug target/64306] New: [SH] Improve unaligned loads
@ 2014-12-14 15:32 olegendo at gcc dot gnu.org
2014-12-17 1:24 ` [Bug target/64306] [SH] Improve unaligned loads and stores olegendo at gcc dot gnu.org
2015-02-22 14:44 ` olegendo at gcc dot gnu.org
0 siblings, 2 replies; 3+ messages in thread
From: olegendo at gcc dot gnu.org @ 2014-12-14 15:32 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64306
Bug ID: 64306
Summary: [SH] Improve unaligned loads
Product: gcc
Version: 5.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: target
Assignee: unassigned at gcc dot gnu.org
Reporter: olegendo at gcc dot gnu.org
Target: sh*-*-*
On SH4A the movua.l insn can be used to do 32 bit unaligned loads (currently
defunct, see PR 52480). It could also be used to do 16 bit unaligned loads
with fewer insns, if over-reading adjacent bytes is OK to do (usually it's not
safe, but could be relaxed and enabled by a -m option).
struct __attribute__((packed)) x
{
int val32;
short val_s16;
unsigned short val_u16;
};
int load_unaligned_s16 (const x& xx)
{
return xx.val_s16;
}
currently compiles to
little endian:
mov.b @(4,r4),r0
extu.b r0,r1
mov.b @(5,r4),r0
extu.b r0,r4
swap.b r4,r4
or r1,r4
exts.w r4,r0
big endian:
mov.b @(4,r4),r0
mov r0,r1
mov.b @(5,r4),r0
extu.b r0,r4
extu.b r1,r0
swap.b r0,r0
or r4,r0
exts.w r0,r0
better:
mov.b @({5|4},r4),r0
extu.b r0,r1
mov.b @({4|5},r4),r0
shll8 r0
or r1,r0
SH4A little endian (unsafe):
movua.l @r4,r0
exts.w r0,r0
SH4A big endian (unsafe):
movua.l @r4,r0
shlr16 r0
exts.w r0,r0
int load_unaligned_u16 (const x& xx)
{
return xx.val_u16;
}
currently compiles to
little endian:
mov.b @(6,r4),r0
extu.b r0,r1
mov.b @(7,r4),r0
extu.b r0,r4
swap.b r4,r0
or r1,r0
big endian:
mov.b @(6,r4),r0
mov r0,r1
mov.b @(7,r4),r0
extu.b r0,r4
extu.b r1,r0
swap.b r0,r0
or r4,r0
better (uses fewer regs):
mov.b @({6|7},r4),r0
extu.b r0,r1
mov.b @({7|6},r4),r0
shll8 r0
or r1,r0
extu.w r0,r0
int load_unaligned32 (const x& xx)
{
return xx.val32;
}
currently compiles to
little endian:
mov.b @(1,r4),r0
mov.b @r4,r2
extu.b r0,r1
mov.b @(2,r4),r0
extu.b r2,r3
swap.b r1,r2
or r3,r2
extu.b r0,r3
mov r3,r0
shll16 r0
mov r0,r1
mov.b @(3,r4),r0
or r2,r1
shll16 r0
shll8 r0
or r1,r0
better:
mov.b @r4+,r0 ! r0 = xx.xx.xx.aa
mov.b @r4+,r1 ! r1 = xx.xx.xx.bb
extu.b r0,r0 ! r0 = 00.00.00.aa
mov.b @r4+,r2 ! r2 = xx.xx.xx.cc
shll8 r1 ! r1 = xx.xx.bb.00
or r1,r0 ! r0 = xx.xx.bb.aa
mov.b @r4+,r3 ! r3 = xx.xx.xx.dd
extu.b r2,r2 ! r2 = 00.00.00.cc
shll16 r0 ! r0 = bb.aa.00.00
shll8 r3 ! r3 = xx.xx.dd.00
or r3,r2 ! r2 = xx.xx.dd.cc
xtrct r2,r0 ! r0 = dd.cc.bb.aa
which is two unaligned signed 16 bit loads + shll16 + xtrct.
If the (mis)alignment offset value is known, it can be even more compact.
x0.x1.aa.bb.cc.dd.y0.y1
^^^^^^^^^^^
add #-2,r4
mov.l @r4,r0 ! r0 = bb.aa.x1.x0
mov.l @(4,r4),r1 ! r1 = y1.y0.dd.cc
xtrct r1,r0 ! r0 = dd.cc.bb.aa
x0.aa.bb.cc.dd.y0.y1.y2
^^^^^^^^^^^
add #-1,r4
mov.l @r4,r0 ! r0 = cc.bb.aa.x0
mov.l @(4,r4),r1 ! r1 = y2.y1.y0.dd
! r1:r0 = y2.y1.y0.dd : cc.bb.aa.x0
mov r0,r2
xtrct r1,r2 ! r2 = y0.dd.cc.bb
shlr8 r2 ! r2 = 00.y0.dd.cc
shll8 r0 ! r0 = bb.aa.x0.00
xtrct r1,r0 ! r0 = dd.cc.bb.aa
void store_unaligned16 (x& xx, int val)
{
xx.val_s16 = val;
}
currently compiles to
little endian:
extu.w r5,r0
mov.b r0,@(4,r4)
shlr8 r0
mov.b r0,@(5,r4)
big endian:
extu.w r5,r5
mov r5,r1
shlr8 r1
mov r1,r0
mov.b r0,@(4,r4)
mov r5,r0
mov.b r0,@(5,r4)
better (eliminate unnecessary extu.w):
mov r5,r0
mov.b r0,@({4|5},r4)
shlr8 r0
mov.b r0,@({5|4},r4)
void store_unaligned32 (x& xx, int val)
{
xx.val32 = val;
}
currently compiles to
little endian:
mov r5,r0
shlr8 r0
mov.b r5,@r4
mov.b r0,@(1,r4)
mov r5,r0
shlr16 r0
mov.b r0,@(2,r4)
mov r5,r0
shlr16 r0
shlr8 r0
mov.b r0,@(3,r4)
big endian:
mov r5,r1
mov r5,r0
shlr16 r1
shlr16 r0
shlr8 r1
mov.b r0,@(1,r4)
mov r5,r0
shlr8 r0
mov.b r0,@(2,r4)
mov r5,r0
mov.b r1,@r4
mov.b r0,@(3,r4)
better:
mov r5,r0
mov.b r0,@({0|3},r4)
shlr8 r0
mov.b r0,@({1|2},r4)
shlr8 r0
mov.b r0,@({2|1},r4)
shlr8 r0
mov.b r0,@({3|0},r4)
^ permalink raw reply [flat|nested] 3+ messages in thread
* [Bug target/64306] [SH] Improve unaligned loads and stores
2014-12-14 15:32 [Bug target/64306] New: [SH] Improve unaligned loads olegendo at gcc dot gnu.org
@ 2014-12-17 1:24 ` olegendo at gcc dot gnu.org
2015-02-22 14:44 ` olegendo at gcc dot gnu.org
1 sibling, 0 replies; 3+ messages in thread
From: olegendo at gcc dot gnu.org @ 2014-12-17 1:24 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64306
--- Comment #1 from Oleg Endo <olegendo at gcc dot gnu.org> ---
If the alignment/offset of an unaligned 32 bit store is known to be 16 bit, it
can be done with something like:
mov r5,r0
mov.w r0,@({0|2},r4)
shlr16 r0
mov.w r0,@({2|0},r4)
^ permalink raw reply [flat|nested] 3+ messages in thread
* [Bug target/64306] [SH] Improve unaligned loads and stores
2014-12-14 15:32 [Bug target/64306] New: [SH] Improve unaligned loads olegendo at gcc dot gnu.org
2014-12-17 1:24 ` [Bug target/64306] [SH] Improve unaligned loads and stores olegendo at gcc dot gnu.org
@ 2015-02-22 14:44 ` olegendo at gcc dot gnu.org
1 sibling, 0 replies; 3+ messages in thread
From: olegendo at gcc dot gnu.org @ 2015-02-22 14:44 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64306
Oleg Endo <olegendo at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Status|UNCONFIRMED |NEW
Last reconfirmed| |2015-02-22
Ever confirmed|0 |1
--- Comment #2 from Oleg Endo <olegendo at gcc dot gnu.org> ---
When storing bswapped values to unaligned mems the following happens:
union unaligned32s { int val; } __attribute__((packed));
void store_32s_0 (unsigned char* x, unsigned char a, int b)
{
((union unaligned32s*)(x))->val = a;
}
mov #0,r0
extu.b r5,r5
mov.b r0,@(1,r4)
mov.b r0,@(2,r4)
mov r5,r0 // r0 = zero (redundant load)
shlr16 r0 // 0 >> 16 (redundant shift)
mov.b r5,@r4
shlr8 r0 // 0 >> 8 (redundant shift)
rts
mov.b r0,@(3,r4)
should be:
mov.b r5,@r4
mov #0,r0
mov.b r0,@(1,r4)
mov.b r0,@(2,r4)
mov.b r0,@(3,r4)
void store_32s_1 (unsigned char* x, unsigned char a, int b)
{
((union unaligned32s*)(x))->val = __builtin_bswap32 (a);
}
extu.b r5,r5
swap.b r5,r5
swap.w r5,r5
extu.b r5,r1
mov.b r1,@r4
mov #0,r1
mov r1,r0
mov.b r0,@(1,r4)
mov.b r0,@(2,r4)
mov r5,r0
shlr16 r0
shlr8 r0
rts
mov.b r0,@(3,r4)
should be:
mov #0,r0
mov.b r0,@(0,r4)
mov.b r0,@(1,r4)
mov.b r0,@(2,r4)
mov r5,r0
mov.b r0,@(3,r4)
A similar thing happens when storing the T bit to unaligned mems, see also PR
65162.
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2015-02-22 13:48 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-12-14 15:32 [Bug target/64306] New: [SH] Improve unaligned loads olegendo at gcc dot gnu.org
2014-12-17 1:24 ` [Bug target/64306] [SH] Improve unaligned loads and stores olegendo at gcc dot gnu.org
2015-02-22 14:44 ` olegendo at gcc dot gnu.org
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).