public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug c/94703] New: Small-sized  memcpy leading to unnecessary register spillage unless done through a dummy union
@ 2020-04-21 21:07 pskocik at gmail dot com
  2020-04-22  7:28 ` [Bug middle-end/94703] " rguenth at gcc dot gnu.org
                   ` (14 more replies)
  0 siblings, 15 replies; 16+ messages in thread
From: pskocik at gmail dot com @ 2020-04-21 21:07 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94703

            Bug ID: 94703
           Summary: Small-sized  memcpy leading to unnecessary register
                    spillage unless done through a dummy union
           Product: gcc
           Version: unknown
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
          Assignee: unassigned at gcc dot gnu.org
          Reporter: pskocik at gmail dot com
  Target Milestone: ---

The problem, demonstrated in code examples below, can be suppressed by
memcpying into a union (possibly just a one-member union), but that seems like
a silly workaround that shouldn't be required.

Examples:

#include <stdint.h>
#include <string.h>

uint64_t get4_1(void const *X)
{
        //spills
        uint64_t r = 0; memcpy(&r,X,4); return r;
}

uint64_t get4_nospill(void const *X)
{
        //doesn't spill
        union { uint64_t u64; } u = {0};
        memcpy(&u.u64,X,sizeof(uint32_t));
        return u.u64;
}

uint64_t get2_1(void const *X)
{
        //spills
        uint64_t r = 0; memcpy(&r,X,2); return r;
}


uint64_t get2_nospill(void const *X)
{
        //doesn't spill
        union { uint64_t u64; } u = {0};
        memcpy(&u.u64,X,sizeof(uint16_t));
        return u.u64;
}

        void backend(void const*Src, size_t Sz);
        static inline void valInPtrInl(void *Src, size_t Sz)
        {
                if(Sz<=sizeof(void const*)){
                        #if 1 //spills
                                void const*inlSrc; 
                                memcpy(&inlSrc,Src,Sz);
                                backend(inlSrc,Sz); return;
                        #else
                                //doesn't spill
                                union{ void const*inlSrc; } u;
                                memcpy(&u.inlSrc,Src,Sz);
                                backend(u.inlSrc,Sz); return;
                        #endif
                }

                backend(Src,Sz);
                return;

        }
void valInPtr(int X) { valInPtrInl(&X,sizeof(X)); }

GCC 9.3 output on x86_64:

get4_1:
        mov     QWORD PTR [rsp-8], 0
        mov     eax, DWORD PTR [rdi]
        mov     DWORD PTR [rsp-8], eax
        mov     rax, QWORD PTR [rsp-8]
        ret
get4_nospill:
        mov     eax, DWORD PTR [rdi]
        ret
get2_1:
        mov     QWORD PTR [rsp-8], 0
        movzx   eax, WORD PTR [rdi]
        mov     WORD PTR [rsp-8], ax
        mov     rax, QWORD PTR [rsp-8]
        ret
get2_nospill:
        xor     eax, eax
        mov     ax, WORD PTR [rdi]
        ret
valInPtr:
        mov     DWORD PTR [rsp-16], edi
        mov     rdi, QWORD PTR [rsp-16]
        mov     esi, 4
        jmp     backend

Clang 3.1 output on x86_64:

get4_1:                                 # @get4_1
        mov     EAX, DWORD PTR [RDI]
        ret

get4_nospill:                           # @get4_nospill
        mov     EAX, DWORD PTR [RDI]
        ret

get2_1:                                 # @get2_1
        movzx   EAX, WORD PTR [RDI]
        ret

get2_nospill:                           # @get2_nospill
        movzx   EAX, WORD PTR [RDI]
        ret

valInPtr:                               # @valInPtr
        mov     EDI, EDI
        mov     ESI, 4
        jmp     backend                 # TAILCALL


https://gcc.godbolt.org/z/rwq2UY

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug middle-end/94703] Small-sized  memcpy leading to unnecessary register spillage unless done through a dummy union
  2020-04-21 21:07 [Bug c/94703] New: Small-sized memcpy leading to unnecessary register spillage unless done through a dummy union pskocik at gmail dot com
@ 2020-04-22  7:28 ` rguenth at gcc dot gnu.org
  2020-04-22  7:34 ` rguenth at gcc dot gnu.org
                   ` (13 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: rguenth at gcc dot gnu.org @ 2020-04-22  7:28 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94703

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |rguenth at gcc dot gnu.org
            Version|unknown                     |10.0
   Last reconfirmed|                            |2020-04-22
             Status|UNCONFIRMED                 |NEW
     Ever confirmed|0                           |1
             Target|                            |x86_64-*-*

--- Comment #1 from Richard Biener <rguenth at gcc dot gnu.org> ---
Confirmed.  We end up with

get4_1 (const void * X)
{
  uint64_t r;
  unsigned int _4;
  uint64_t _6;

  <bb 2> [local count: 1073741824]:
  r = 0;
  _4 = MEM <unsigned int> [(char * {ref-all})X_3(D)];
  MEM <unsigned int> [(char * {ref-all})&r] = _4;
  _6 = r;
  r ={v} {CLOBBER};
  return _6;

and

get4_nospill (const void * X)
{
  union
  {
    uint64_t u64;
  } u;
  unsigned int _4;
  uint64_t _6;

  <bb 2> [local count: 1073741824]:
  u.u64 = 0;
  _4 = MEM <unsigned int> [(char * {ref-all})X_3(D)];
  MEM <unsigned int> [(char * {ref-all})&u] = _4;
  _6 = u.u64;
  u ={v} {CLOBBER};
  return _6;

so it's the same on the GIMPLE level but somehow RTL expansion likes
the latter more, expanding 'u' to a register but not 'r'.  Ah,
that's because we have to keep TREE_ADDRESSABLE to prevent 'r' from
being rewritten into SSA but not 'u' ...

Extending DECL_GIMPLE_REG_P to non-vector/comples vars would likely
fix this.

We can also rewrite 'r' into SSA when we use BIT_INSERT_EXPR more
aggressively (not sure if we want that).

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug middle-end/94703] Small-sized  memcpy leading to unnecessary register spillage unless done through a dummy union
  2020-04-21 21:07 [Bug c/94703] New: Small-sized memcpy leading to unnecessary register spillage unless done through a dummy union pskocik at gmail dot com
  2020-04-22  7:28 ` [Bug middle-end/94703] " rguenth at gcc dot gnu.org
@ 2020-04-22  7:34 ` rguenth at gcc dot gnu.org
  2020-04-22  7:59 ` rguenth at gcc dot gnu.org
                   ` (12 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: rguenth at gcc dot gnu.org @ 2020-04-22  7:34 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94703

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |marxin at gcc dot gnu.org

--- Comment #2 from Richard Biener <rguenth at gcc dot gnu.org> ---
Implementation plan extending DECL_GIMPLE_REG_P:

 a) invert it, DECL_GIMPLE_REG_P -> DECL_NOT_GIMPLE_REG_P
 b) honor DECL_NOT_GIMPLE_REG_P for all types (just grep, most
    importantly in is_gimple_reg), eventually some tests for
    TREE_ADDRESSABLE need adjusting to DECL_NOT_GIMPLE_REG_P
 c) in update_address_taken always clear TREE_ADDRESSABLE when possible
    but set DECL_NOT_GIMPLE_REG_P appropriately;  audit other
    setters of TREE_ADDRESSABLE if they could do similar

CCing Martin who's always eager to jump onto new tasks ;)

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug middle-end/94703] Small-sized  memcpy leading to unnecessary register spillage unless done through a dummy union
  2020-04-21 21:07 [Bug c/94703] New: Small-sized memcpy leading to unnecessary register spillage unless done through a dummy union pskocik at gmail dot com
  2020-04-22  7:28 ` [Bug middle-end/94703] " rguenth at gcc dot gnu.org
  2020-04-22  7:34 ` rguenth at gcc dot gnu.org
@ 2020-04-22  7:59 ` rguenth at gcc dot gnu.org
  2020-04-22  8:46 ` rguenth at gcc dot gnu.org
                   ` (11 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: rguenth at gcc dot gnu.org @ 2020-04-22  7:59 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94703

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|NEW                         |ASSIGNED
           Assignee|unassigned at gcc dot gnu.org      |rguenth at gcc dot gnu.org

--- Comment #3 from Richard Biener <rguenth at gcc dot gnu.org> ---
ACtually I'll give it a quick try.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug middle-end/94703] Small-sized  memcpy leading to unnecessary register spillage unless done through a dummy union
  2020-04-21 21:07 [Bug c/94703] New: Small-sized memcpy leading to unnecessary register spillage unless done through a dummy union pskocik at gmail dot com
                   ` (2 preceding siblings ...)
  2020-04-22  7:59 ` rguenth at gcc dot gnu.org
@ 2020-04-22  8:46 ` rguenth at gcc dot gnu.org
  2020-05-07 13:39 ` cvs-commit at gcc dot gnu.org
                   ` (10 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: rguenth at gcc dot gnu.org @ 2020-04-22  8:46 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94703

--- Comment #4 from Richard Biener <rguenth at gcc dot gnu.org> ---
Created attachment 48335
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=48335&action=edit
untested patch

Patch, easier than expected.  Possibly needs some adjustment to the gimplifier,
we'll see.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug middle-end/94703] Small-sized  memcpy leading to unnecessary register spillage unless done through a dummy union
  2020-04-21 21:07 [Bug c/94703] New: Small-sized memcpy leading to unnecessary register spillage unless done through a dummy union pskocik at gmail dot com
                   ` (3 preceding siblings ...)
  2020-04-22  8:46 ` rguenth at gcc dot gnu.org
@ 2020-05-07 13:39 ` cvs-commit at gcc dot gnu.org
  2020-05-07 13:41 ` rguenth at gcc dot gnu.org
                   ` (9 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: cvs-commit at gcc dot gnu.org @ 2020-05-07 13:39 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94703

--- Comment #5 from CVS Commits <cvs-commit at gcc dot gnu.org> ---
The master branch has been updated by Richard Biener <rguenth@gcc.gnu.org>:

https://gcc.gnu.org/g:eb72dc663e9070b281be83a80f6f838a3a878822

commit r11-165-geb72dc663e9070b281be83a80f6f838a3a878822
Author: Richard Biener <rguenther@suse.de>
Date:   Wed Apr 22 10:40:51 2020 +0200

    extend DECL_GIMPLE_REG_P to all types

    This extends DECL_GIMPLE_REG_P to all types so we can clear
    TREE_ADDRESSABLE even for integers with partial defs, not just
    complex and vector variables.  To make that transition easier
    the patch inverts DECL_GIMPLE_REG_P to DECL_NOT_GIMPLE_REG_P
    since that makes the default the current state for all other
    types besides complex and vectors.

    For the testcase in PR94703 we're able to expand the partial
    def'ed local integer to a register then, producing a single
    movl rather than going through the stack.

    On i?86 this execute FAILs gcc.dg/torture/pr71522.c because
    we now expand a round-trip through a long double automatic var
    to a register fld/fst which normalizes the value.  For that
    during RTL expansion we're looking for problematic punnings
    of decls and avoid pseudos for those - I chose integer or
    BLKmode accesses on decls with modes where precision doesn't
    match bitsize which covers the XFmode case.

    2020-05-07  Richard Biener  <rguenther@suse.de>

            PR middle-end/94703
            * tree-core.h (tree_decl_common::gimple_reg_flag): Rename ...
            (tree_decl_common::not_gimple_reg_flag): ... to this.
            * tree.h (DECL_GIMPLE_REG_P): Rename ...
            (DECL_NOT_GIMPLE_REG_P): ... to this.
            * gimple-expr.c (copy_var_decl): Copy DECL_NOT_GIMPLE_REG_P.
            (create_tmp_reg): Simplify.
            (create_tmp_reg_fn): Likewise.
            (is_gimple_reg): Check DECL_NOT_GIMPLE_REG_P for all regs.
            * gimplify.c (create_tmp_from_val): Simplify.
            (gimplify_bind_expr): Likewise.
            (gimplify_compound_literal_expr): Likewise.
            (gimplify_function_tree): Likewise.
            (prepare_gimple_addressable): Set DECL_NOT_GIMPLE_REG_P.
            * asan.c (create_odr_indicator): Do not clear DECL_GIMPLE_REG_P.
            (asan_add_global): Copy it.
            * cgraphunit.c (cgraph_node::expand_thunk): Force args
            to be GIMPLE regs.
            * function.c (gimplify_parameters): Copy
            DECL_NOT_GIMPLE_REG_P.
            * ipa-param-manipulation.c
            (ipa_param_body_adjustments::common_initialization): Simplify.
            (ipa_param_body_adjustments::reset_debug_stmts): Copy
            DECL_NOT_GIMPLE_REG_P.
            * omp-low.c (lower_omp_for_scan): Do not set DECL_GIMPLE_REG_P.
            * sanopt.c (sanitize_rewrite_addressable_params): Likewise.
            * tree-cfg.c (make_blocks_1): Simplify.
            (verify_address): Do not verify DECL_GIMPLE_REG_P setting.
            * tree-eh.c (lower_eh_constructs_2): Simplify.
            * tree-inline.c (declare_return_variable): Adjust and
            generalize.
            (copy_decl_to_var): Copy DECL_NOT_GIMPLE_REG_P.
            (copy_result_decl_to_var): Likewise.
            * tree-into-ssa.c (pass_build_ssa::execute): Adjust comment.
            * tree-nested.c (create_tmp_var_for): Simplify.
            * tree-parloops.c (separate_decls_in_region_name): Copy
            DECL_NOT_GIMPLE_REG_P.
            * tree-sra.c (create_access_replacement): Adjust and
            generalize partial def support.
            * tree-ssa-forwprop.c (pass_forwprop::execute): Set
            DECL_NOT_GIMPLE_REG_P on decls we introduce partial defs on.
            * tree-ssa.c (maybe_optimize_var): Handle clearing of
            TREE_ADDRESSABLE and setting/clearing DECL_NOT_GIMPLE_REG_P
            independently.
            * lto-streamer-out.c (hash_tree): Hash DECL_NOT_GIMPLE_REG_P.
            * tree-streamer-out.c (pack_ts_decl_common_value_fields): Stream
            DECL_NOT_GIMPLE_REG_P.
            * tree-streamer-in.c (unpack_ts_decl_common_value_fields):
Likewise.
            * cfgexpand.c (avoid_type_punning_on_regs): New.
            (discover_nonconstant_array_refs): Call
            avoid_type_punning_on_regs to avoid unsupported mode punning.

            lto/
            * lto-common.c (compare_tree_sccs_1): Compare
            DECL_NOT_GIMPLE_REG_P.

            c/
            * gimple-parser.c (c_parser_parse_ssa_name): Do not set
            DECL_GIMPLE_REG_P.

            cp/
            * optimize.c (update_cloned_parm): Copy DECL_NOT_GIMPLE_REG_P.

            * gcc.dg/tree-ssa/pr94703.c: New testcase.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug middle-end/94703] Small-sized  memcpy leading to unnecessary register spillage unless done through a dummy union
  2020-04-21 21:07 [Bug c/94703] New: Small-sized memcpy leading to unnecessary register spillage unless done through a dummy union pskocik at gmail dot com
                   ` (4 preceding siblings ...)
  2020-05-07 13:39 ` cvs-commit at gcc dot gnu.org
@ 2020-05-07 13:41 ` rguenth at gcc dot gnu.org
  2020-05-08 11:03 ` ro at gcc dot gnu.org
                   ` (8 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: rguenth at gcc dot gnu.org @ 2020-05-07 13:41 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94703

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|ASSIGNED                    |RESOLVED
         Resolution|---                         |FIXED
      Known to work|                            |11.0

--- Comment #6 from Richard Biener <rguenth at gcc dot gnu.org> ---
Fixed for GCC 11.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug middle-end/94703] Small-sized  memcpy leading to unnecessary register spillage unless done through a dummy union
  2020-04-21 21:07 [Bug c/94703] New: Small-sized memcpy leading to unnecessary register spillage unless done through a dummy union pskocik at gmail dot com
                   ` (5 preceding siblings ...)
  2020-05-07 13:41 ` rguenth at gcc dot gnu.org
@ 2020-05-08 11:03 ` ro at gcc dot gnu.org
  2020-05-08 11:03 ` ro at gcc dot gnu.org
                   ` (7 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: ro at gcc dot gnu.org @ 2020-05-08 11:03 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94703

Rainer Orth <ro at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |ro at gcc dot gnu.org

--- Comment #7 from Rainer Orth <ro at gcc dot gnu.org> ---
Created attachment 48483
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=48483&action=edit
32-bit sparc-sun-solaris2.11 pr94703.c.021t.ssa

The new testcase FAILs on sparc-sun-solaris2.11 (both 32 and 64-bit):

+FAIL: gcc.dg/tree-ssa/pr94703.c scan-tree-dump ssa "No longer having address
taken: r"

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug middle-end/94703] Small-sized  memcpy leading to unnecessary register spillage unless done through a dummy union
  2020-04-21 21:07 [Bug c/94703] New: Small-sized memcpy leading to unnecessary register spillage unless done through a dummy union pskocik at gmail dot com
                   ` (6 preceding siblings ...)
  2020-05-08 11:03 ` ro at gcc dot gnu.org
@ 2020-05-08 11:03 ` ro at gcc dot gnu.org
  2020-05-08 11:19 ` rguenth at gcc dot gnu.org
                   ` (6 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: ro at gcc dot gnu.org @ 2020-05-08 11:03 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94703

Rainer Orth <ro at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
         Resolution|FIXED                       |---
             Status|RESOLVED                    |REOPENED

--- Comment #8 from Rainer Orth <ro at gcc dot gnu.org> ---
Testcase FAILs on SPARC.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug middle-end/94703] Small-sized  memcpy leading to unnecessary register spillage unless done through a dummy union
  2020-04-21 21:07 [Bug c/94703] New: Small-sized memcpy leading to unnecessary register spillage unless done through a dummy union pskocik at gmail dot com
                   ` (7 preceding siblings ...)
  2020-05-08 11:03 ` ro at gcc dot gnu.org
@ 2020-05-08 11:19 ` rguenth at gcc dot gnu.org
  2020-05-13 14:43 ` ro at CeBiTec dot Uni-Bielefeld.DE
                   ` (5 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: rguenth at gcc dot gnu.org @ 2020-05-08 11:19 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94703

--- Comment #9 from Richard Biener <rguenth at gcc dot gnu.org> ---
(In reply to Rainer Orth from comment #7)
> Created attachment 48483 [details]
> 32-bit sparc-sun-solaris2.11 pr94703.c.021t.ssa
> 
> The new testcase FAILs on sparc-sun-solaris2.11 (both 32 and 64-bit):
> 
> +FAIL: gcc.dg/tree-ssa/pr94703.c scan-tree-dump ssa "No longer having
> address taken: r"

Hmm, OK looks like memcpy is not folded, likely because the source is
not known to be appropriately aligned.

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr94703.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr94703.c
index 7209fa0a4d4..eadea45a32f 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr94703.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr94703.c
@@ -4,6 +4,7 @@
 unsigned int set_lowpart (unsigned int const *X)
 {
   unsigned int r = 0;
+  X = __builtin_assume_aligned (X, sizeof (unsigned int) / 2);
   __builtin_memcpy(&r,X,sizeof (unsigned int) / 2);
   return r;
 }

should fix this.  Can you verify and if so, commit?  Thx.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug middle-end/94703] Small-sized  memcpy leading to unnecessary register spillage unless done through a dummy union
  2020-04-21 21:07 [Bug c/94703] New: Small-sized memcpy leading to unnecessary register spillage unless done through a dummy union pskocik at gmail dot com
                   ` (8 preceding siblings ...)
  2020-05-08 11:19 ` rguenth at gcc dot gnu.org
@ 2020-05-13 14:43 ` ro at CeBiTec dot Uni-Bielefeld.DE
  2020-05-13 16:00 ` pskocik at gmail dot com
                   ` (4 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: ro at CeBiTec dot Uni-Bielefeld.DE @ 2020-05-13 14:43 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94703

--- Comment #10 from ro at CeBiTec dot Uni-Bielefeld.DE <ro at CeBiTec dot Uni-Bielefeld.DE> ---
> --- Comment #9 from Richard Biener <rguenth at gcc dot gnu.org> ---
[...]
> Hmm, OK looks like memcpy is not folded, likely because the source is
> not known to be appropriately aligned.
[...]
> should fix this.  Can you verify and if so, commit?  Thx.

Unfortunately, it doesn't.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug middle-end/94703] Small-sized  memcpy leading to unnecessary register spillage unless done through a dummy union
  2020-04-21 21:07 [Bug c/94703] New: Small-sized memcpy leading to unnecessary register spillage unless done through a dummy union pskocik at gmail dot com
                   ` (9 preceding siblings ...)
  2020-05-13 14:43 ` ro at CeBiTec dot Uni-Bielefeld.DE
@ 2020-05-13 16:00 ` pskocik at gmail dot com
  2020-05-14  9:10 ` rguenth at gcc dot gnu.org
                   ` (3 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: pskocik at gmail dot com @ 2020-05-13 16:00 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94703

--- Comment #11 from pskocik at gmail dot com ---
Thanks for the shot at a fix, Richard Biener.

Since I have reported this, I think I should mentioned a related suboptimality
that should probably be getting fixed alongside with this (if this one is
getting fixed), namely that while


int64_t zextend_int_to_int64_nospill(int *X) 
{ 
    union { int64_t _; } r = {0}; return memcpy(&r._,X,sizeof(*X)),r._;
}

(and hopefully later even 

int64_t zextend_int_to_int64_spill(int *X) { int64_t r = {0}; return
memcpy(&r,X,sizeof(*X)),r; }
)

generates, on x86_64, the optimal

zextend_int_to_int64_nospill:
        mov     eax, DWORD PTR [rdi]
        ret

for zeroextending promotions of sub-int types, an extra xor instruction gets
generated, e.g.:


int64_t zextend_short_to_int64_nospill_but_suboptimal(short *X) 
{
union { int64_t _; } r ={0}; return memcpy(&r._,X,sizeof(*X)),r._;
}

=>

zextend_short_to_int64_nospill_but_suboptimal:
        xor     eax, eax
        mov     ax, WORD PTR [rdi]
        ret

which was surprising to me because it doesn't happen with zero-extending
memcpy-based promotion from {,u}ints to larger types ({,u}{,l}longs).

https://gcc.godbolt.org/z/ZjXaCw

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug middle-end/94703] Small-sized  memcpy leading to unnecessary register spillage unless done through a dummy union
  2020-04-21 21:07 [Bug c/94703] New: Small-sized memcpy leading to unnecessary register spillage unless done through a dummy union pskocik at gmail dot com
                   ` (10 preceding siblings ...)
  2020-05-13 16:00 ` pskocik at gmail dot com
@ 2020-05-14  9:10 ` rguenth at gcc dot gnu.org
  2020-05-14  9:49 ` rguenth at gcc dot gnu.org
                   ` (2 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: rguenth at gcc dot gnu.org @ 2020-05-14  9:10 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94703

--- Comment #12 from Richard Biener <rguenth at gcc dot gnu.org> ---
(In reply to pskocik from comment #11)
> Thanks for the shot at a fix, Richard Biener.
> 
> Since I have reported this, I think I should mentioned a related
> suboptimality that should probably be getting fixed alongside with this (if
> this one is getting fixed), namely that while
> 
> 
> int64_t zextend_int_to_int64_nospill(int *X) 
> { 
>     union { int64_t _; } r = {0}; return memcpy(&r._,X,sizeof(*X)),r._;
> }
> 
> (and hopefully later even 
> 
> int64_t zextend_int_to_int64_spill(int *X) { int64_t r = {0}; return
> memcpy(&r,X,sizeof(*X)),r; }
> )
> 
> generates, on x86_64, the optimal
> 
> zextend_int_to_int64_nospill:
>         mov     eax, DWORD PTR [rdi]
>         ret
> 
> for zeroextending promotions of sub-int types, an extra xor instruction gets
> generated, e.g.:
> 
> 
> int64_t zextend_short_to_int64_nospill_but_suboptimal(short *X) 
> {
> union { int64_t _; } r ={0}; return memcpy(&r._,X,sizeof(*X)),r._;
> }
> 
> =>
> 
> zextend_short_to_int64_nospill_but_suboptimal:
>         xor     eax, eax
>         mov     ax, WORD PTR [rdi]
>         ret
> 
> which was surprising to me because it doesn't happen with zero-extending
> memcpy-based promotion from {,u}ints to larger types ({,u}{,l}longs).
> 
> https://gcc.godbolt.org/z/ZjXaCw

I think this is PR93507 for which I have a patch queued as well.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug middle-end/94703] Small-sized  memcpy leading to unnecessary register spillage unless done through a dummy union
  2020-04-21 21:07 [Bug c/94703] New: Small-sized memcpy leading to unnecessary register spillage unless done through a dummy union pskocik at gmail dot com
                   ` (11 preceding siblings ...)
  2020-05-14  9:10 ` rguenth at gcc dot gnu.org
@ 2020-05-14  9:49 ` rguenth at gcc dot gnu.org
  2020-05-14  9:54 ` cvs-commit at gcc dot gnu.org
  2020-05-14  9:55 ` rguenth at gcc dot gnu.org
  14 siblings, 0 replies; 16+ messages in thread
From: rguenth at gcc dot gnu.org @ 2020-05-14  9:49 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94703

--- Comment #13 from Richard Biener <rguenth at gcc dot gnu.org> ---
(In reply to ro@CeBiTec.Uni-Bielefeld.DE from comment #10)
> > --- Comment #9 from Richard Biener <rguenth at gcc dot gnu.org> ---
> [...]
> > Hmm, OK looks like memcpy is not folded, likely because the source is
> > not known to be appropriately aligned.
> [...]
> > should fix this.  Can you verify and if so, commit?  Thx.
> 
> Unfortunately, it doesn't.

OK, this only helps a bit later since CCP is required to propagate the
alignment, the following forwprop pass to elide the memcpy and then
finally the update-address-taken invocation in the _second_ CCP pass
after inlining will have

pr94703.c.093t.ccp2:No longer having address taken: r

I've long pondered to remove the memcpy folding restriction for strict-align
targets but never went through.

I'll update the testcase to require

/* { dg-require-effective-target non_strict_align } */

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug middle-end/94703] Small-sized  memcpy leading to unnecessary register spillage unless done through a dummy union
  2020-04-21 21:07 [Bug c/94703] New: Small-sized memcpy leading to unnecessary register spillage unless done through a dummy union pskocik at gmail dot com
                   ` (12 preceding siblings ...)
  2020-05-14  9:49 ` rguenth at gcc dot gnu.org
@ 2020-05-14  9:54 ` cvs-commit at gcc dot gnu.org
  2020-05-14  9:55 ` rguenth at gcc dot gnu.org
  14 siblings, 0 replies; 16+ messages in thread
From: cvs-commit at gcc dot gnu.org @ 2020-05-14  9:54 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94703

--- Comment #14 from CVS Commits <cvs-commit at gcc dot gnu.org> ---
The master branch has been updated by Richard Biener <rguenth@gcc.gnu.org>:

https://gcc.gnu.org/g:0d1ccfd0cc2e1add15929c43e6c7472336d33e65

commit r11-384-g0d1ccfd0cc2e1add15929c43e6c7472336d33e65
Author: Richard Biener <rguenther@suse.de>
Date:   Thu May 14 11:50:20 2020 +0200

    testsuite/94703 - skip gcc.dg/tree-ssa/pr94703.c on strict-align targets

    The specific dump scanning doesn't work on strict-align targets,
    the following simply skips the testcase for those.

    2020-05-14  Richard Biener  <rguenther@suse.de>

            PR testsuite/94703
            * gcc.dg/tree-ssa/pr94703.c: Skip for strict-align targets.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug middle-end/94703] Small-sized  memcpy leading to unnecessary register spillage unless done through a dummy union
  2020-04-21 21:07 [Bug c/94703] New: Small-sized memcpy leading to unnecessary register spillage unless done through a dummy union pskocik at gmail dot com
                   ` (13 preceding siblings ...)
  2020-05-14  9:54 ` cvs-commit at gcc dot gnu.org
@ 2020-05-14  9:55 ` rguenth at gcc dot gnu.org
  14 siblings, 0 replies; 16+ messages in thread
From: rguenth at gcc dot gnu.org @ 2020-05-14  9:55 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94703

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|REOPENED                    |RESOLVED
         Resolution|---                         |FIXED

--- Comment #15 from Richard Biener <rguenth at gcc dot gnu.org> ---
Fixed.

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2020-05-14  9:55 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-21 21:07 [Bug c/94703] New: Small-sized memcpy leading to unnecessary register spillage unless done through a dummy union pskocik at gmail dot com
2020-04-22  7:28 ` [Bug middle-end/94703] " rguenth at gcc dot gnu.org
2020-04-22  7:34 ` rguenth at gcc dot gnu.org
2020-04-22  7:59 ` rguenth at gcc dot gnu.org
2020-04-22  8:46 ` rguenth at gcc dot gnu.org
2020-05-07 13:39 ` cvs-commit at gcc dot gnu.org
2020-05-07 13:41 ` rguenth at gcc dot gnu.org
2020-05-08 11:03 ` ro at gcc dot gnu.org
2020-05-08 11:03 ` ro at gcc dot gnu.org
2020-05-08 11:19 ` rguenth at gcc dot gnu.org
2020-05-13 14:43 ` ro at CeBiTec dot Uni-Bielefeld.DE
2020-05-13 16:00 ` pskocik at gmail dot com
2020-05-14  9:10 ` rguenth at gcc dot gnu.org
2020-05-14  9:49 ` rguenth at gcc dot gnu.org
2020-05-14  9:54 ` cvs-commit at gcc dot gnu.org
2020-05-14  9:55 ` rguenth at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).