[Bug middle-end/101134] New: Bogus -Wstringop-overflow warning about non-existent overflow

public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed

* [Bug middle-end/101134] New: Bogus -Wstringop-overflow warning about non-existent overflow
@ 2021-06-19  9:52 dangelog at gmail dot com
  2021-06-21 16:10 ` [Bug middle-end/101134] " msebor at gcc dot gnu.org
                   ` (13 more replies)
  0 siblings, 14 replies; 15+ messages in thread
From: dangelog at gmail dot com @ 2021-06-19  9:52 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101134

            Bug ID: 101134
           Summary: Bogus -Wstringop-overflow warning about non-existent
                    overflow
           Product: gcc
           Version: 11.1.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: middle-end
          Assignee: unassigned at gcc dot gnu.org
          Reporter: dangelog at gmail dot com
  Target Milestone: ---

Hello,

This reduced testcase from Qt raises a -Wstring-overflow warning on GCC 11.1
when compiling under -O2 -g -Wall -Wextra:

    #include <stdlib.h>
    #include <string.h>

    struct QTestCharBuffer
    {
        enum { InitialSize = 512 };

        inline QTestCharBuffer() : buf(staticBuf)
        {
            staticBuf[0] = '\0';
        }

        QTestCharBuffer(const QTestCharBuffer &) = delete;
        QTestCharBuffer &operator=(const QTestCharBuffer &) = delete;

        inline ~QTestCharBuffer()
        {
            if (buf != staticBuf)
                free(buf);
        }

        inline char *data()
        {
            return buf;
        }

        inline int size() const
        {
            return _size;
        }

        inline bool reset(int newSize)
        {
            char *newBuf = nullptr;
            if (buf == staticBuf) {
                // if we point to our internal buffer, we need to malloc first
                newBuf = reinterpret_cast<char *>(malloc(newSize));
            } else {
                // if we already malloc'ed, just realloc
                newBuf = reinterpret_cast<char *>(realloc(buf, newSize));
            }

            // if the allocation went wrong (newBuf == 0), we leave the object
as is
            if (!newBuf)
                return false;

            _size = newSize;
            buf = newBuf;
            return true;
        }

    private:
        int _size = InitialSize;
        char* buf;
        char staticBuf[InitialSize];
    };


    typedef int (*StringFormatFunction)(QTestCharBuffer*,char const*,size_t);

    /*
        A wrapper for string functions written to work with a fixed size buffer
so they can be called
        with a dynamically allocated buffer.
    */
    int allocateStringFn(QTestCharBuffer* str, char const* src,
StringFormatFunction func)
    {
        static const int MAXSIZE = 1024*1024*2;

        int size = str->size();

        int res = 0;

        for (;;) {
            res = func(str, src, size);
            str->data()[size - 1] = '\0';
            if (res < size) {
                // We succeeded or fatally failed
                break;
            }
            // buffer wasn't big enough, try again
            size *= 2;
            if (size > MAXSIZE) {
                break;
            }
            if (!str->reset(size))
                break; // ran out of memory - bye
        }

        return res;
    }

    int xmlQuote(QTestCharBuffer* destBuf, char const* src, size_t n)
    {
        if (n == 0) return 0;

        char *dest = destBuf->data();
        *dest = 0;
        if (!src) return 0;

        char* begin = dest;
        char* end = dest + n;

        while (dest < end) {
            switch (*src) {

    #define MAP_ENTITY(chr, ent) \
                case chr:                                   \
                    if (dest + sizeof(ent) < end) {         \
                        strcpy(dest, ent);                  \
                        dest += sizeof(ent) - 1;            \
                    }                                       \
                    else {                                  \
                        *dest = 0;                          \
                        return (dest+sizeof(ent)-begin);    \
                    }                                       \
                    ++src;                                  \
                    break;

                MAP_ENTITY('>', "&gt;");
                MAP_ENTITY('<', "&lt;");
                MAP_ENTITY('\'', "&apos;");
                MAP_ENTITY('"', "&quot;");
                MAP_ENTITY('&', "&amp;");

                // not strictly necessary, but allows handling of comments
without
                // having to explicitly look for `--'
                MAP_ENTITY('-', "&#x002D;");

    #undef MAP_ENTITY

                case 0:
                    *dest = 0;
                    return (dest-begin);

                default:
                    *dest = *src;
                    ++dest;
                    ++src;
                    break;
            }
        }

        // If we get here, dest was completely filled (dest == end)
        *(dest-1) = 0;
        return (dest-begin);
    }

    int xmlQuote(QTestCharBuffer* str, char const* src)
    {
        return allocateStringFn(str, src, xmlQuote);
    }

    void enterTestFunction(const char *function)
    {
        QTestCharBuffer quotedFunction;
        xmlQuote(&quotedFunction, function);
    }


Godbolt link: https://gcc.godbolt.org/z/aPMdYjqEa

The warning is

    In function 'int allocateStringFn(QTestCharBuffer*, const char*,
StringFormatFunction)',
        inlined from 'int xmlQuote(QTestCharBuffer*, const char*)' at
<source>:150:28,
        inlined from 'void enterTestFunction(const char*)' at <source>:156:13:
    <source>:75:31: warning: writing 1 byte into a region of size 0
[-Wstringop-overflow=]
       75 |         str->data()[size - 1] = '\0';
          |         ~~~~~~~~~~~~~~~~~~~~~~^~~~~~
    <source>: In function 'void enterTestFunction(const char*)':
    <source>:55:10: note: at offset -1 into destination object
'QTestCharBuffer::staticBuf' of size 512
       55 |     char staticBuf[InitialSize];
          |          ^~~~~~~~~
    Compiler returned: 0



It's wrong because `size` can never be 0: the allocated object is visible to
GCC and has size == 512; size can only get multiplied by 2 and the result of
the multiplication is checked (so it can't overflow). Adding enough
__builtin_unreachable() for that condition removes the warnings, but it should
not be necessary.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Bug middle-end/101134] Bogus -Wstringop-overflow warning about non-existent overflow
  2021-06-19  9:52 [Bug middle-end/101134] New: Bogus -Wstringop-overflow warning about non-existent overflow dangelog at gmail dot com
@ 2021-06-21 16:10 ` msebor at gcc dot gnu.org
  2021-06-21 16:44 ` dangelog at gmail dot com
                   ` (12 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: msebor at gcc dot gnu.org @ 2021-06-21 16:10 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101134

Martin Sebor <msebor at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
         Resolution|---                         |WONTFIX
             Status|UNCONFIRMED                 |RESOLVED

--- Comment #1 from Martin Sebor <msebor at gcc dot gnu.org> ---
GCC cannot determine every arbitrarily complex invariant in a complex program. 
If a precondition must be true true that a flow-dependent warning doesn't know
about then asserting it usually both avoids the warning and improves codegen. 
In the test case adding either of the two assertions below has that effect:

            res = func(str, src, size);
            if (size <= 0)
              __builtin_unreachable ();
            str->data()[size - 1] = '\0';

        ...

        // If we get here, dest was completely filled (dest == end)
        if (dest == destBuf->data ())
          __builtin_unreachable ();
        *(dest-1) = 0;
        return (dest-begin);

In future releases, as optimizations improve GCC may be able to determine more
preconditions and the warning might disappear on its own as well.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Bug middle-end/101134] Bogus -Wstringop-overflow warning about non-existent overflow
  2021-06-19  9:52 [Bug middle-end/101134] New: Bogus -Wstringop-overflow warning about non-existent overflow dangelog at gmail dot com
  2021-06-21 16:10 ` [Bug middle-end/101134] " msebor at gcc dot gnu.org
@ 2021-06-21 16:44 ` dangelog at gmail dot com
  2021-06-21 20:40 ` msebor at gcc dot gnu.org
                   ` (11 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: dangelog at gmail dot com @ 2021-06-21 16:44 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101134

--- Comment #2 from Giuseppe D'Angelo <dangelog at gmail dot com> ---
As I said,

> Adding enough __builtin_unreachable() for that condition removes the warnings, but it should not be necessary.

I disagree with the resolution, though. While I understand that GCC cannot
reason globally, the warning message itself is miselading, as it's worded in a
way that makes the user think that GCC has *conclusevely* proven the existence
of a problem, while in fact GCC is wrong. Specifically, this statement:

>     <source>:75:31: warning: writing 1 byte into a region of size 0 [-Wstringop-overflow=]

At least, I'd like a less strong wording if GCC cannot *prove* this but only
estimate it (e.g. "warning: possible string overflow (writing 1 byte...)").
Ideally, even, having two separate warnings or two separate warning levels
(overflow proved / overflow just estimated) so one can enable only one of the
two if needed.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Bug middle-end/101134] Bogus -Wstringop-overflow warning about non-existent overflow
  2021-06-19  9:52 [Bug middle-end/101134] New: Bogus -Wstringop-overflow warning about non-existent overflow dangelog at gmail dot com
  2021-06-21 16:10 ` [Bug middle-end/101134] " msebor at gcc dot gnu.org
  2021-06-21 16:44 ` dangelog at gmail dot com
@ 2021-06-21 20:40 ` msebor at gcc dot gnu.org
  2021-06-22  8:57 ` dangelog at gmail dot com
                   ` (10 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: msebor at gcc dot gnu.org @ 2021-06-21 20:40 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101134

--- Comment #3 from Martin Sebor <msebor at gcc dot gnu.org> ---
The warning architecture doesn't make it possible to distinguish between the
two situations you describe.  No flow-sensitive GCC warning points out a
certain bug: every instance needs to be viewed as only a possible one.  The
article below helps explain some of the underlying challenges here in detail:
https://developers.redhat.com/blog/2019/03/13/understanding-gcc-warnings-part-2

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Bug middle-end/101134] Bogus -Wstringop-overflow warning about non-existent overflow
  2021-06-19  9:52 [Bug middle-end/101134] New: Bogus -Wstringop-overflow warning about non-existent overflow dangelog at gmail dot com
                   ` (2 preceding siblings ...)
  2021-06-21 20:40 ` msebor at gcc dot gnu.org
@ 2021-06-22  8:57 ` dangelog at gmail dot com
  2021-06-22 15:27 ` msebor at gcc dot gnu.org
                   ` (9 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: dangelog at gmail dot com @ 2021-06-22  8:57 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101134

--- Comment #4 from Giuseppe D'Angelo <dangelog at gmail dot com> ---
Could the warning messages then be changed to point out that the issue is only
a mere possibility? Using an "assertive" wording makes users believe that GCC
has positively and conclusively proved that there's something wrong, whilst
it's exactly the opposite (it didn't prove anything, and it's a false
positive).

Uninitialized warnings have this distinction and warn in two different ("may be
used uninitialized" vs "is used initialized"). If here the distinction cannot
be made, AND false positives are allowed to warn, I'd really prefer the "may be
overflowing" wording than the "is overflowing" existing one.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Bug middle-end/101134] Bogus -Wstringop-overflow warning about non-existent overflow
  2021-06-19  9:52 [Bug middle-end/101134] New: Bogus -Wstringop-overflow warning about non-existent overflow dangelog at gmail dot com
                   ` (3 preceding siblings ...)
  2021-06-22  8:57 ` dangelog at gmail dot com
@ 2021-06-22 15:27 ` msebor at gcc dot gnu.org
  2021-06-22 16:34 ` dangelog at gmail dot com
                   ` (8 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: msebor at gcc dot gnu.org @ 2021-06-22 15:27 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101134

--- Comment #5 from Martin Sebor <msebor at gcc dot gnu.org> ---
It wouldn't be right to change the wording of just one warning because the
problem applies to all flow based diagnostics.  They all depend on various
optimizations that propagate constants, add or remove tests, and change the
control flow graph.  A statement that in the source code is conditional on the
values of various parameters might in the intermediate representation appear
unconditional with constants as a result, even be unconditionally invalid but
unreachable.  This is true at function scope as well as across function call
boundaries thanks to inlining.  Changing the wording for all of them to try to
somehow reflect this wouldn't help because then all instances of them all would
have to use the new form.

-Wuninitialized and -Wmaybe-uninitialized aren't substantially different in
this.  The latter is unique only in that it diagnoses arguments of PHI nodes,
which are the results of conditional expressions.  This property is described
by the following sentence in the manual:

"...if there exists a path from the function entry to a use of the object that
is initialized, but there exist some other paths for which the object is not
initialized, the compiler emits a warning if it cannot prove the uninitialized
paths are not executed at run time."

This is only roughly what happens but there are many instances of
-Wuninitialized that could be described using the same sentence, even though it
doesn't consider PHI nodes.  If we do introduce a similar distinction in other
warnings like -Wstringop-overflow it will be on top of the instances already
issued by them, not a subset of them.

This is a general property of all flow based warnings in GCC except those
emitted by the static analyzer which doesn't depend on the same optimizations
(only a very small subset of them).  All warnings (flow based or otherwise,
including those issued by the analyzer) should always be viewed as documented,
i.e., as "messages that report constructions that are not inherently erroneous
but that are risky or suggest there may have been an error."

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Bug middle-end/101134] Bogus -Wstringop-overflow warning about non-existent overflow
  2021-06-19  9:52 [Bug middle-end/101134] New: Bogus -Wstringop-overflow warning about non-existent overflow dangelog at gmail dot com
                   ` (4 preceding siblings ...)
  2021-06-22 15:27 ` msebor at gcc dot gnu.org
@ 2021-06-22 16:34 ` dangelog at gmail dot com
  2021-06-23 19:56 ` msebor at gcc dot gnu.org
                   ` (7 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: dangelog at gmail dot com @ 2021-06-22 16:34 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101134

--- Comment #6 from Giuseppe D'Angelo <dangelog at gmail dot com> ---
(In reply to Martin Sebor from comment #5)
> It wouldn't be right to change the wording of just one warning because the
> problem applies to all flow based diagnostics.  They all depend on various
> optimizations that propagate constants, add or remove tests, and change the
> control flow graph.  A statement that in the source code is conditional on
> the values of various parameters might in the intermediate representation
> appear unconditional with constants as a result, even be unconditionally
> invalid but unreachable.  This is true at function scope as well as across
> function call boundaries thanks to inlining.  Changing the wording for all
> of them to try to somehow reflect this wouldn't help because then all
> instances of them all would have to use the new form.

Sorry, why wouldn't help? That is, given the fact that these warnings do have
false positives, why would it bad for such warnings to say "something bad MIGHT
be happening here" vs "something bad IS happening here"? 

For an end user, it makes a lot of difference to know if the compiler has
definitely found something wrong vs. the compiler may or may not have found it.
Just a tiny language change in the printed mesage would reassure the user that
the warning could, in fact, be a false positive. (Compare this to "there IS
something bad here". You read the code in question, and you don't see what's
bad, and you start digging around trying to understand "why is the compiler
telling me that there IS something bad? Am I not seeing something?".)


> -Wuninitialized and -Wmaybe-uninitialized aren't substantially different in
> this.  The latter is unique only in that it diagnoses arguments of PHI
> nodes, which are the results of conditional expressions.  This property is
> described by the following sentence in the manual:
> 
> "...if there exists a path from the function entry to a use of the object
> that is initialized, but there exist some other paths for which the object
> is not initialized, the compiler emits a warning if it cannot prove the
> uninitialized paths are not executed at run time."
> 
> This is only roughly what happens but there are many instances of
> -Wuninitialized that could be described using the same sentence, even though
> it doesn't consider PHI nodes.  If we do introduce a similar distinction in
> other warnings like -Wstringop-overflow it will be on top of the instances
> already issued by them, not a subset of them.
> 
> This is a general property of all flow based warnings in GCC except those
> emitted by the static analyzer which doesn't depend on the same
> optimizations (only a very small subset of them).  All warnings (flow based
> or otherwise, including those issued by the analyzer) should always be
> viewed as documented, i.e., as "messages that report constructions that are
> not inherently erroneous but that are risky or suggest there may have been
> an error."

"not inherently erroneous" is as per language rules (i.e. the program isn't
ill-formed). But "there may have been" an error is the key I'm trying to point
out. The compiler can prove that something is a mistake, or can only estimate
that, and so raise a false positive. I'd rather have a message telling me
whether a given warning is because of a proof or an estimate.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Bug middle-end/101134] Bogus -Wstringop-overflow warning about non-existent overflow
  2021-06-19  9:52 [Bug middle-end/101134] New: Bogus -Wstringop-overflow warning about non-existent overflow dangelog at gmail dot com
                   ` (5 preceding siblings ...)
  2021-06-22 16:34 ` dangelog at gmail dot com
@ 2021-06-23 19:56 ` msebor at gcc dot gnu.org
  2021-06-24 14:07 ` dangelog at gmail dot com
                   ` (6 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: msebor at gcc dot gnu.org @ 2021-06-23 19:56 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101134

--- Comment #7 from Martin Sebor <msebor at gcc dot gnu.org> ---
Changing the warning text from "does X" to "may do X" wouldn't help because all
instances of it (or all warnings) would have to use the latter form, and that's
already implied by the former.  Every GCC warning already means "something
looks fishy here" and not "this is definitely a bug."  Not just because not
every suspicious piece of code is necessarily a bug, or because no warning is
completely free of false positives, but also because every flow-sensitive
warning also depends on whether control can reach the construct it warns about
(as in: is the function where X occurs ever called?)  Users who expect
otherwise simply need to adjust their expectations (as per the manual).

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Bug middle-end/101134] Bogus -Wstringop-overflow warning about non-existent overflow
  2021-06-19  9:52 [Bug middle-end/101134] New: Bogus -Wstringop-overflow warning about non-existent overflow dangelog at gmail dot com
                   ` (6 preceding siblings ...)
  2021-06-23 19:56 ` msebor at gcc dot gnu.org
@ 2021-06-24 14:07 ` dangelog at gmail dot com
  2021-06-24 17:05 ` segher at gcc dot gnu.org
                   ` (5 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: dangelog at gmail dot com @ 2021-06-24 14:07 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101134

--- Comment #8 from Giuseppe D'Angelo <dangelog at gmail dot com> ---
In a -Wall build, it's a bit unfair to pretend the users to know if a warning
is being generated by the frontend, the middleend, the backend and so on. All
they get is a list of warnings saying "this is like this, this is like that".
You're saying that all such warnings should be treated as "maybe", as false
positives are a possibility. But I don't agree with this. 

First, as I said, the mood of the warning is "indicative", which denotes
certainty and reality, not possibility. (But I'll grant, not being a native
English speaker, this may just be a personal impression of the warning being
"stern".)

Second, the presence of things like -Wmaybe-uninitialized vs -Wuninitialized
hints at the fact that GCC *wants* to distinguish these "maybe" vs. "certain"
cases, at least in certain contexts (like, where it CAN do that!), and give
different warnings if it can.

Third, frontend warnings simply don't have false positives: if the compiler
tells you "this function may be marked override", "this class has virtual
functions but no virtual destructor", "this case label falls through into the
next one", that's absolutely true in 100% of the cases. A false positive here
would clearly be treated as a bug in GCC, and not dismissed as "but it's a
warning, and a warning is just a 'maybe', and yes, GCC is telling you to add
`override`, and then you added it, and now the program doesn't even build any
more because the warning was wrong and `override` was not even needed, but see,
it's your fault for not understanding the 'maybe' part".

So, in a nutshell, yes, I'd be much more comfortable if warnings that denote a
possibility (for any reason, really, I'm not asking to NEVER generate false
positives) would simply have "may" or "might" added to their text. If that's
the majority of middle-end warnings because they all generate false positives,
why would that be a problem, in principle?

But fair enough, let's agree to disagree. :)

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Bug middle-end/101134] Bogus -Wstringop-overflow warning about non-existent overflow
  2021-06-19  9:52 [Bug middle-end/101134] New: Bogus -Wstringop-overflow warning about non-existent overflow dangelog at gmail dot com
                   ` (7 preceding siblings ...)
  2021-06-24 14:07 ` dangelog at gmail dot com
@ 2021-06-24 17:05 ` segher at gcc dot gnu.org
  2021-06-24 17:57 ` rsandifo at gcc dot gnu.org
                   ` (4 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: segher at gcc dot gnu.org @ 2021-06-24 17:05 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101134

Segher Boessenkool <segher at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|RESOLVED                    |REOPENED
   Last reconfirmed|                            |2021-06-24
                 CC|                            |segher at gcc dot gnu.org
     Ever confirmed|0                           |1
         Resolution|WONTFIX                     |---

--- Comment #9 from Segher Boessenkool <segher at gcc dot gnu.org> ---
(In reply to Martin Sebor from comment #7)
> Changing the warning text from "does X" to "may do X" wouldn't help because
> all instances of it (or all warnings) would have to use the latter form, and
> that's already implied by the former.  Every GCC warning already means
> "something looks fishy here" and not "this is definitely a bug."

Yes.  And most warning texts do not say "A is B" when that is not true.
All that do are buggy.


Reopened.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Bug middle-end/101134] Bogus -Wstringop-overflow warning about non-existent overflow
  2021-06-19  9:52 [Bug middle-end/101134] New: Bogus -Wstringop-overflow warning about non-existent overflow dangelog at gmail dot com
                   ` (8 preceding siblings ...)
  2021-06-24 17:05 ` segher at gcc dot gnu.org
@ 2021-06-24 17:57 ` rsandifo at gcc dot gnu.org
  2021-06-24 19:36 ` dmalcolm at gcc dot gnu.org
                   ` (3 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: rsandifo at gcc dot gnu.org @ 2021-06-24 17:57 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101134

rsandifo at gcc dot gnu.org <rsandifo at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
           Assignee|unassigned at gcc dot gnu.org      |rsandifo at gcc dot gnu.org
                 CC|                            |rsandifo at gcc dot gnu.org
             Status|REOPENED                    |ASSIGNED

--- Comment #10 from rsandifo at gcc dot gnu.org <rsandifo at gcc dot gnu.org> ---
I'll take this.  Not really my area, it's just a bit of a pet peeve
when the compiler isn't equivocal enough.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Bug middle-end/101134] Bogus -Wstringop-overflow warning about non-existent overflow
  2021-06-19  9:52 [Bug middle-end/101134] New: Bogus -Wstringop-overflow warning about non-existent overflow dangelog at gmail dot com
                   ` (9 preceding siblings ...)
  2021-06-24 17:57 ` rsandifo at gcc dot gnu.org
@ 2021-06-24 19:36 ` dmalcolm at gcc dot gnu.org
  2021-06-24 21:18 ` msebor at gcc dot gnu.org
                   ` (2 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: dmalcolm at gcc dot gnu.org @ 2021-06-24 19:36 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101134

David Malcolm <dmalcolm at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |dmalcolm at gcc dot gnu.org

--- Comment #11 from David Malcolm <dmalcolm at gcc dot gnu.org> ---
FWIW the current GCC UX guidelines:
  https://gcc.gnu.org/onlinedocs/gccint/Guidelines-for-Diagnostics.html
don't talk about a distinction between "possible" vs "definite" in the wording,
but it looks to me like they ought to for cases like this

...and it may already be implied by some of the stuff like "Ideally a
diagnostic should contain enough information to allow the user to make an
informed choice about whether they should care (and how to fix it), but a
balance must be drawn against overloading the user with irrelevant data.", in
that a "possible" vs "definite" distinction doesn't add much verbiage, but is
very useful in terms of clarity to the end-user, IMHO.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Bug middle-end/101134] Bogus -Wstringop-overflow warning about non-existent overflow
  2021-06-19  9:52 [Bug middle-end/101134] New: Bogus -Wstringop-overflow warning about non-existent overflow dangelog at gmail dot com
                   ` (10 preceding siblings ...)
  2021-06-24 19:36 ` dmalcolm at gcc dot gnu.org
@ 2021-06-24 21:18 ` msebor at gcc dot gnu.org
  2021-06-24 22:18 ` dangelog at gmail dot com
  2022-03-17 10:18 ` redi at gcc dot gnu.org
  13 siblings, 0 replies; 15+ messages in thread
From: msebor at gcc dot gnu.org @ 2021-06-24 21:18 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101134

--- Comment #12 from Martin Sebor <msebor at gcc dot gnu.org> ---
I don't need to be convinced that it would be nice to be able to differentiate
between certain bugs and possible ones.  The text of this class of warnings
already does differentiate between "may write/read/access" and
"writing/reading/accessing" under some conditions:

https://gcc.gnu.org/git/?p=gcc.git;a=blob;f=gcc/builtins.c;h=73c12e3bb8c39907b6bd95148f860709dbbf3f50;hb=refs/heads/releases/gcc-11#l4136

This is not among them.  In this case the IL that triggers the warning is:

  <bb 3> [local count: 10145877954]:
  # size_18 = PHI <512(2), size_13(10)>
  # prephitmp_54 = PHI <&quotedFunction.staticBuf(2), newBuf_36(10)>
  ...
  <bb 7> [local count: 2825037180]:
  newBuf_33 = malloc (_51);
  goto <bb 9>; [100.00%]

  <bb 8> [local count: 6591753510]:
  newBuf_35 = realloc (_30, _51);
  ...
  <bb 9> [local count: 9416790700]:
  # newBuf_36 = PHI <newBuf_33(7), newBuf_35(8)>
  ...
  <bb 14> [local count: 3449598541]:
  MEM[(char *)prephitmp_54 + -1B] = 0;   <<< -Wstring-overflow

prephitmp_54 is set to point to either the beginning of
quotedFunction.staticBuf or the dynamically allocated buffer.  So prephitmp_54
- 1 is unconditionally invalid and the warning triggers.  The warning doesn't
consider the predicates leading up to the invalid store: all it uses to make
its decision is the statement itself and its arguments.  For PHIs, to minimize
noise, it triggers only if the statement is invalid for all arguments.  This is
how most flow-based warnings work in GCC (some like -Warray-bounds don't
consider PHIs yet).

To do better and either hope to issue a "maybe" kind of a warning or preferably
avoid it altogether if the code is unreachable we would need to do what
-Wmaybe-uninitialized does (as I said in comment #5) and analyze the
predicates.  I've been working on extracting the uninitialized predicate
analyzer for the last few months.  I'm not sure to what extent it will be
usable outside the uninitialized pass yet or how well it will work.  As we
know, -Wmaybe-uninitialized has lots of false positives (and negatives).  But
even the uninitialized pass issues unconditional warnings for conditional bugs.
 For instance:

  int f (void)
  { 
    int i, j = 0;
    int *p = i ? &i : &j;
    return *p;
  }

a.c: In function ‘f’:
a.c:4:14: warning: ‘i’ is used uninitialized [-Wuninitialized]
    4 |   int *p = i ? &i : &j;
      |              ^

It does that because the first time it runs it's too early to make the
distinction, and by the second time it runs to issuse -Wmaybe-uninitialized the
uninitialized read has been eliminated.  And this is done to strike a
reasonable balance between false negatives and positives.

So in general, the distinction between the two cases can only be made when it
can be discerned from the IL, and the IL doesn't always preserve the
conditional nature of the problem statement.  So every warning must always be
viewed as a "maybe" kind of a warning.  It will never be a sure a thing, either
at the scope of individual functions, and certainly not with inlining or
function cloning.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Bug middle-end/101134] Bogus -Wstringop-overflow warning about non-existent overflow
  2021-06-19  9:52 [Bug middle-end/101134] New: Bogus -Wstringop-overflow warning about non-existent overflow dangelog at gmail dot com
                   ` (11 preceding siblings ...)
  2021-06-24 21:18 ` msebor at gcc dot gnu.org
@ 2021-06-24 22:18 ` dangelog at gmail dot com
  2022-03-17 10:18 ` redi at gcc dot gnu.org
  13 siblings, 0 replies; 15+ messages in thread
From: dangelog at gmail dot com @ 2021-06-24 22:18 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101134

--- Comment #13 from Giuseppe D'Angelo <dangelog at gmail dot com> ---
Hi,

(In reply to Martin Sebor from comment #12)
> So in general, the distinction between the two cases can only be made when
> it can be discerned from the IL, and the IL doesn't always preserve the
> conditional nature of the problem statement.  So every warning must always
> be viewed as a "maybe" kind of a warning.  It will never be a sure a thing,
> either at the scope of individual functions, and certainly not with inlining
> or function cloning.

I think there's been a misunderstanding. Apologies if I created confusion, let
me try and reset the conversation:

I perfectly understand if, in the context of this particular warning (or any
other similar middle-end warning), it is very hard, or currently impossible, or
even always impossible to distinguish between the "maybe" case and the
"certain" case. 

Without such a distinction available or possible, I'm also OK with raising 
false positives. Therefore, in relation to this aspect of the original
submission (the code raising a false positive warning), I'm perfectly fine at
marking the request of not triggering the warning altogether as out of scope /
won't fix / etc.

On other hand, I was not OK with the idea that the *warning message* should
keep saying that "there *is* an overflow", in a positive indicative mood. All
I'd really ask there would be to reword the message in order to say something
like "there *may be* an overflow" instead. It might seem like a trivial/useless
change for someone who knows how the middle-end works and where such warnings
come from, but it would bring a lot of clarity to end-users (to me, at least)
if any warning message would clearly indicate whether it may be a false
positive. And that could be achieved by simply adding "may" to the warning
messages in question.

Does this make sense?

Thanks,

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [Bug middle-end/101134] Bogus -Wstringop-overflow warning about non-existent overflow
  2021-06-19  9:52 [Bug middle-end/101134] New: Bogus -Wstringop-overflow warning about non-existent overflow dangelog at gmail dot com
                   ` (12 preceding siblings ...)
  2021-06-24 22:18 ` dangelog at gmail dot com
@ 2022-03-17 10:18 ` redi at gcc dot gnu.org
  13 siblings, 0 replies; 15+ messages in thread
From: redi at gcc dot gnu.org @ 2022-03-17 10:18 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101134

--- Comment #14 from Jonathan Wakely <redi at gcc dot gnu.org> ---
(In reply to Giuseppe D'Angelo from comment #13)
> Does this make sense?

Yes, perfect sense, and many of us agree 100%.

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2022-03-17 10:18 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-19  9:52 [Bug middle-end/101134] New: Bogus -Wstringop-overflow warning about non-existent overflow dangelog at gmail dot com
2021-06-21 16:10 ` [Bug middle-end/101134] " msebor at gcc dot gnu.org
2021-06-21 16:44 ` dangelog at gmail dot com
2021-06-21 20:40 ` msebor at gcc dot gnu.org
2021-06-22  8:57 ` dangelog at gmail dot com
2021-06-22 15:27 ` msebor at gcc dot gnu.org
2021-06-22 16:34 ` dangelog at gmail dot com
2021-06-23 19:56 ` msebor at gcc dot gnu.org
2021-06-24 14:07 ` dangelog at gmail dot com
2021-06-24 17:05 ` segher at gcc dot gnu.org
2021-06-24 17:57 ` rsandifo at gcc dot gnu.org
2021-06-24 19:36 ` dmalcolm at gcc dot gnu.org
2021-06-24 21:18 ` msebor at gcc dot gnu.org
2021-06-24 22:18 ` dangelog at gmail dot com
2022-03-17 10:18 ` redi at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).