From: Jan Beulich <jbeulich@suse.com>
To: Binutils <binutils@sourceware.org>
Subject: [PATCH] gas: special-case division / modulo by ±1
Date: Mon, 16 Dec 2024 12:39:25 +0100 [thread overview]
Message-ID: <ea7ead44-ff9f-42a4-903e-9ffee6630cf6@suse.com> (raw)
Dividing the largest possible negative value by -1 generally is UB, for
the result not being representable at least in commonly used binary
notation. This UB on x86, for example, is a Floating Point Exception on
Linux, i.e. resulting in an internal error (albeit only when
sizeof(valueT) == sizeof(void *); the library routine otherwise involved
apparently deals with the inputs quite okay).
Leave original values unaltered for division by 1; this may matter down
the road, in case we start including X_unsigned and X_extrabit in
arithmetic. For the same reason treat modulo by 1 the same as modulo by
-1.
The quad and octa tests have more relaxed expecations than intended, for
X_unsigned and X_extrabit not being taken into account [yet]. The upper
halves can wrongly end up as all ones (for .octa, when !BFD64, even the
upper three quarters). Yet it makes little sense to address this just
for div/mod by ±1. quad-div2 is yet more special, to cover for most
32-bit targets being unable to deal with forward-ref expressions in
.quad even when BFD64; even ones being able to (like x86) then still
don't get the values right.
---
I'm surprised fuzzing didn't spot this yet, but perhaps the specific
input values needed don't fit very well in how fuzzers determine new
"interesting" values.
--- a/gas/expr.c
+++ b/gas/expr.c
@@ -2015,8 +2015,32 @@ expr (int rankarg, /* Larger # is highe
bits of the result. */
resultP->X_add_number *= (valueT) v;
break;
- case O_divide: resultP->X_add_number /= v; break;
- case O_modulus: resultP->X_add_number %= v; break;
+
+ case O_divide:
+ if (v == 1)
+ break;
+ if (v == -1)
+ {
+ /* Dividing the largest negative value representable in offsetT
+ by -1 has a non-representable result in common binary
+ notation. Treat it as negation instead, carried out as an
+ unsigned operation to avoid UB. */
+ resultP->X_add_number = - (valueT) resultP->X_add_number;
+ }
+ else
+ resultP->X_add_number /= v;
+ break;
+
+ case O_modulus:
+ /* See above for why in particular -1 needs special casing.
+ While the operation is UB in C, mathematically it has a well-
+ defined result. */
+ if (v == 1 || v == -1)
+ resultP->X_add_number = 0;
+ else
+ resultP->X_add_number %= v;
+ break;
+
case O_left_shift:
case O_right_shift:
/* We always use unsigned shifts. According to the ISO
@@ -2372,12 +2396,22 @@ resolve_expression (expressionS *express
case O_divide:
if (right == 0)
return 0;
- left = (offsetT) left / (offsetT) right;
+ /* See expr() for reasons of the special casing. */
+ if (right == 1)
+ break;
+ if ((offsetT) right == -1)
+ left = -left;
+ else
+ left = (offsetT) left / (offsetT) right;
break;
case O_modulus:
if (right == 0)
return 0;
- left = (offsetT) left % (offsetT) right;
+ /* Again, see expr() for reasons of the special casing. */
+ if (right == 1 || (offsetT) right == -1)
+ left = 0;
+ else
+ left = (offsetT) left % (offsetT) right;
break;
case O_left_shift:
if (right >= sizeof (left) * CHAR_BIT)
--- a/gas/symbols.c
+++ b/gas/symbols.c
@@ -1723,8 +1723,27 @@ resolve_symbol_value (symbolS *symp)
{
/* See expr() for reasons of the use of valueT casts here. */
case O_multiply: left *= (valueT) right; break;
- case O_divide: left /= right; break;
- case O_modulus: left %= right; break;
+
+ /* See expr() for reasons of the special casing. */
+ case O_divide:
+ if (right == 1)
+ break;
+ if (right == -1)
+ {
+ left = -left;
+ break;
+ }
+ left /= right;
+ break;
+
+ /* Again, see expr() for reasons of the special casing. */
+ case O_modulus:
+ if (right == 1 || right == -1)
+ left = 0;
+ else
+ left %= right;
+ break;
+
case O_left_shift:
left = (valueT) left << (valueT) right; break;
case O_right_shift:
--- a/gas/testsuite/gas/all/gas.exp
+++ b/gas/testsuite/gas/all/gas.exp
@@ -449,6 +449,22 @@ if { ![istarget "pdp11-*-*"] } {
run_dump_test octa
}
+# Some x86 flavors use '/' as a comment char, yet that can be suppressed.
+# Some other target use '/' or '%' as a comment char, without a way to
+# suppress it.
+if { [istarget "i?86-*-*"] || [istarget "x86_64-*-*"] } {
+ run_dump_test quad-div {{as --divide}}
+ run_dump_test quad-div2 {{as --divide}}
+ run_dump_test octa-div {{as --divide}}
+} elseif { ![istarget "mcore-*-*"]
+ && ![istarget "mmix-*-*"]
+ && ![istarget "pdp11-*-*"]
+ && ![istarget "pj*-*-*"] } {
+ run_dump_test quad-div
+ run_dump_test quad-div2
+ run_dump_test octa-div
+}
+
# .set works differently on some targets.
switch -glob $target_triplet {
alpha*-*-* { }
--- /dev/null
+++ b/gas/testsuite/gas/all/octa-div.d
@@ -0,0 +1,10 @@
+#objdump: -s -j .data
+#name: .octa with division
+
+.*: +file format .*
+
+Contents of section .data:
+ [^ ]* (00000080 ........ ........ ........|........ ........ ........ 80000000) .*
+ [^ ]* (00000000 ........ ........ ........|........ ........ ........ 00000000) .*
+ [^ ]* (00000000 00000080 ........ ........|........ ........ 80000000 00000000) .*
+ [^ ]* (00000000 00000000 ........ ........|........ ........ 00000000 00000000) .*
--- /dev/null
+++ b/gas/testsuite/gas/all/octa-div.s
@@ -0,0 +1,11 @@
+ .data
+ .octa -0x80000000 / -1
+ .octa -0x80000000 % -1
+ .if ((1 << 16) << 16) <> 0
+ .octa -0x8000000000000000 / -1
+ .octa -0x8000000000000000 % -1
+ .else
+ /* Not really useful fallback for non-BFD64 targets. */
+ .octa 0x8000000000000000
+ .octa 0x0000000000000000
+ .endif
--- /dev/null
+++ b/gas/testsuite/gas/all/quad-div.d
@@ -0,0 +1,9 @@
+#objdump : -s -j .data
+#name : .quad with division
+
+.*: .*
+
+Contents of section (\.data|\$DATA\$):
+ 0000 (........ 80000000 ........ 00000000|00000080 ........ 00000000 ........) .*
+ 00.. (80000000 00000000 00000000 00000000|00000000 00000080 00000000 00000000) .*
+#pass
--- /dev/null
+++ b/gas/testsuite/gas/all/quad-div.s
@@ -0,0 +1,32 @@
+ .data
+
+ .eqv INT_MAX, 0x7fffffff
+ .eqv INT_MIN, -INT_MAX - 1
+
+ /* Note: Shifts are uniformly done as unsigned. */
+ .rept (INT_MIN / -1) >> 31
+ .quad -0x80000000 / -1
+ .endr
+ .rept (INT_MIN % -1) + 1
+ .quad -0x80000000 % -1
+ .endr
+
+ .if ((1 << 16) << 16) <> 0
+
+ .eqv LONG_MAX, 0x7fffffffffffffff
+ .eqv LONG_MIN, -LONG_MAX - 1
+
+ /* Note: Shifts are uniformly done as unsigned. */
+ .rept (LONG_MIN / -1) >> 63
+ .quad -0x8000000000000000 / -1
+ .endr
+ .rept (LONG_MIN % -1) + 1
+ .quad -0x8000000000000000 % -1
+ .endr
+
+ .else /* Not really useful fallback for non-BFD64 targets. */
+
+ .quad 0x8000000000000000
+ .quad 0x0000000000000000
+
+ .endif
--- /dev/null
+++ b/gas/testsuite/gas/all/quad-div2.d
@@ -0,0 +1,16 @@
+#objdump : -s -j .data
+#name : .quad with division (fwdref)
+# bfin doesn't support 'symbol = expression'.
+# tic30 and tic4x have 4 octets per byte, tic54x has 2 octets per byte.
+# v850 re-purposes .offset.
+#notarget: bfin-*-* *c30-*-* *c4x-*-* *c54x-*-* v850*-*-*
+# linkrelax targets don't handle equivalence expressions well (nor any
+# other forward expression). mep uses complex relocs.
+#xfail: crx-*-* h8300-*-* mn10200-*-* mn10300-*-* mep-*-*
+
+.*: .*
+
+Contents of section (\.data|\$DATA\$):
+ 0000 (00000000 80000000|80000000 00000000|00000080 00000000) 00000000 00000000 .*
+ 00.. (80000000 00000000 00000000 00000000|00000000 00000080 00000000 00000000) .*
+#pass
--- /dev/null
+++ b/gas/testsuite/gas/all/quad-div2.s
@@ -0,0 +1,27 @@
+ .offset
+ .dc.a 0
+ vasz = .
+
+ .data
+
+ .if vasz >= 8
+
+ .quad INT_MIN / -1
+ .quad INT_MIN % -1
+ .quad LONG_MIN / -1
+ .quad LONG_MIN % -1
+
+ LONG_MIN = -0x8000000000000000
+
+ .else
+
+ /* Use .long to cover at least !BFD64 targets. */
+ .long INT_MIN / -1, 0
+ .long INT_MIN % -1, 0
+ /* Not really useful fallback for less-than-64-bit-VMA targets. */
+ .quad 0x8000000000000000
+ .quad 0x0000000000000000
+
+ .endif
+
+ INT_MIN = -0x80000000
reply other threads:[~2024-12-16 11:39 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ea7ead44-ff9f-42a4-903e-9ffee6630cf6@suse.com \
--to=jbeulich@suse.com \
--cc=binutils@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).