From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 14568 invoked by alias); 6 Jan 2020 15:09:42 -0000 Mailing-List: contact gcc-help-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-help-owner@gcc.gnu.org Received: (qmail 14560 invoked by uid 89); 6 Jan 2020 15:09:42 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-0.7 required=5.0 tests=AWL,BAYES_00,SCC_5_SHORT_WORD_LINES,SPF_SOFTFAIL autolearn=no version=3.3.1 spammy=H*F:U*gcc, nearly, clever, playing X-HELO: cerberus.halldom.com Received: from cerberus.halldom.com (HELO cerberus.halldom.com) (79.135.97.241) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Mon, 06 Jan 2020 15:09:39 +0000 Received: from ceres.halldom.com ([79.135.97.244]:51842) by cerberus.halldom.com with esmtpsa (TLSv1.2:ECDHE-RSA-AES128-GCM-SHA256:128) (Exim 4.92) (envelope-from ) id 1ioU0e-000641-Re for gcc-help@gcc.gnu.org; Mon, 06 Jan 2020 15:09:36 +0000 Subject: Function returning struct on x86_64 (at least) To: gcc-help@gcc.gnu.org References: <20190418190754.GH8599@gate.crashing.org> <4b1f3ec6-8df9-7707-8d75-af6a84774b74@gmch.uk> <042a6858-a5a5-c842-e29f-ca45fbe14912@gmch.uk> From: Chris Hall Message-ID: <6ca4e3d5-46ea-ef7e-080c-11da70f0b9d8@gmch.uk> Date: Mon, 06 Jan 2020 15:09:00 -0000 User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:60.0) Gecko/20100101 Thunderbird/60.9.1 MIME-Version: 1.0 In-Reply-To: <042a6858-a5a5-c842-e29f-ca45fbe14912@gmch.uk> Content-Type: text/plain; charset=utf-8; format=flowed Content-Transfer-Encoding: 7bit X-SW-Source: 2020-01/txt/msg00016.txt.bz2 I hoped to do something "clever" with a function of the form: typedef struct { char s[64] ; } qerr_str_t ; extern qerr_str_t qerrst0(int err) { qerr_str_t st ; snprintf(st.s, sizeof(st.s), "errno=%d", err) ; return st ; } but was disappointed to find that this compiles (gcc 8.3 and others, -O2) to this: .LC0: .string "errno=%d" qerrst0: pushq %rbx movl %esi, %ecx movq %rdi, %rbx movl $.LC0, %edx movl $64, %esi xorl %eax, %eax subq $64, %rsp movq %rsp, %rdi call snprintf movdqa (%rsp), %xmm0 movq %rbx, %rax movdqa 16(%rsp), %xmm1 movdqa 32(%rsp), %xmm2 movdqa 48(%rsp), %xmm3 movups %xmm0, (%rbx) movups %xmm1, 16(%rbx) movups %xmm2, 32(%rbx) movups %xmm3, 48(%rbx) addq $64, %rsp popq %rbx ret On reflection, the compiler is playing safe and not writing to whatever the "hidden" pointer %rdi is pointing at, until the implicit assignment. So I have no right to be disappointed. The object of the exercise is to create temporary strings for use like this: int main(int argc, char* argv[]) { printf("%s: %s\n", argv[0], qerrst0(argc).s) ; } where the "hidden" pointer passed to qerrst0() does not, in fact, point to anything accessible. Sadly, even when qerrst0() is inlined, I find: .LC0: .string "errno=%d" .LC1: .string "%s: %s\n" main: pushq %rbx movl %edi, %ecx movq %rsi, %rbx movl $.LC0, %edx movl $64, %esi xorl %eax, %eax addq $-128, %rsp leaq 64(%rsp), %rdi call snprintf movdqa 64(%rsp), %xmm0 movq (%rbx), %rsi xorl %eax, %eax movdqa 80(%rsp), %xmm1 movdqa 96(%rsp), %xmm2 movq %rsp, %rdx movl $.LC1, %edi movdqa 112(%rsp), %xmm3 movaps %xmm0, (%rsp) movaps %xmm1, 16(%rsp) movaps %xmm2, 32(%rsp) movaps %xmm3, 48(%rsp) call printf subq $-128, %rsp xorl %eax, %eax popq %rbx ret where there is still an (unnecessary) assignment going on ! I tried something simpler: extern qerr_str_t qerrst1(int err) { qerr_str_t st ; st.s[0] = err ; return st ; } which compiles to: qerrst1: movq %rdi, %rax movb %sil, (%rdi) ret ...so a trivial case optimises as one might hope. As does: extern qerr_str_t qerrst2(int err) { qerr_str_t st ; char* q = st.s ; q[0] = err ; q[63] = err ; return st ; } qerrst2: movq %rdi, %rax movb %sil, (%rdi) movb %sil, 63(%rdi) ret The following are also optimised: extern qerr_str_t qerrst3a(int err) { qerr_str_t st = { "" } ; return st ; } extern qerr_str_t qerrst3b(int err) { qerr_str_t st ; char* q = st.s ; memset(q, 0, sizeof(st.s)) ; return st ; } to the same code: qerrst3a/b: pxor %xmm0, %xmm0 movq %rdi, %rax movups %xmm0, (%rdi) movups %xmm0, 16(%rdi) movups %xmm0, 32(%rdi) movups %xmm0, 48(%rdi) ret However, ever so slightly more complicated: extern qerr_str_t qerrst4(int err) { qerr_str_t st ; for (int i = 0 ; i < (err & 63) ; ++i) st.s[i] = err - i ; return st ; } qerrst4: movl %esi, %edx movq %rdi, %rax andl $63, %edx je .L12 subl $1, %edx leaq -71(%rsp,%rdx), %r8 leaq -72(%rsp), %rdx addl %edx, %esi .L11: movl %esi, %ecx subl %edx, %ecx addq $1, %rdx movb %cl, -1(%rdx) cmpq %r8, %rdx jne .L11 .L12: movdqa -72(%rsp), %xmm0 movdqa -56(%rsp), %xmm1 movdqa -40(%rsp), %xmm2 movdqa -24(%rsp), %xmm3 movups %xmm0, (%rax) movups %xmm1, 16(%rax) movups %xmm2, 32(%rax) movups %xmm3, 48(%rax) ret Which is a puzzle :-( Interestingly, I also found (after a little effort): extern qerr_str_t qerrst5(int err, char* fred) { qerr_str_t st ; st.s[ 0] = err ; st.s[ 2] = fred[ 8] ; st.s[ 4] = fred[ 6] ; st.s[ 6] = fred[ 4] ; st.s[ 8] = fred[ 2] ; st.s[10] = fred[ 0] ; return st ; } qerrst5: movq %rdi, %rax movzbl 8(%rdx), %r9d movzbl 6(%rdx), %r8d movzbl 4(%rdx), %edi movzbl 2(%rdx), %ecx movb %sil, (%rax) -- BUG iff %rax == movzbl (%rdx), %edx -- %rdx ! movb %r9b, 2(%rax) movb %r8b, 4(%rax) movb %dil, 6(%rax) movb %cl, 8(%rax) movb %dl, 10(%rax) ret which is very nearly correct... except as noted, if *fred points at the final destination !! For this to do what I had hoped (and I imagine is the majority case), what is needed is a way to mark the declaration of 'qerr_str_t st' in the function as a "clone" of the final destination 'qerr_str_t' in the caller -- so that the compiler could Just Do It. I looked for an __attribute__(()) for this... but could not find one. Is there any way in which I can persuade the compiler that a function returning a struct does not need to worry about preserving the value of the final destination (ie the struct at %rdi) ? Chris