From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 25030 invoked by alias); 6 Aug 2009 21:42:25 -0000 Received: (qmail 25021 invoked by uid 22791); 6 Aug 2009 21:42:24 -0000 X-SWARE-Spam-Status: No, hits=-0.1 required=5.0 tests=AWL,BAYES_00,J_CHICKENPOX_12,J_CHICKENPOX_16,NO_DNS_FOR_FROM X-Spam-Check-By: sourceware.org Received: from mga10.intel.com (HELO fmsmga102.fm.intel.com) (192.55.52.92) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Thu, 06 Aug 2009 21:42:18 +0000 Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by fmsmga102.fm.intel.com with ESMTP; 06 Aug 2009 14:32:30 -0700 X-ExtLoop1: 1 Received: from gnu-6.sc.intel.com ([10.3.194.198]) by fmsmga002.fm.intel.com with ESMTP; 06 Aug 2009 14:35:14 -0700 Received: by gnu-6.sc.intel.com (Postfix, from userid 500) id BD05C812111; Thu, 6 Aug 2009 14:42:16 -0700 (PDT) Date: Thu, 06 Aug 2009 21:42:00 -0000 From: "H.J. Lu" To: gcc-patches@gcc.gnu.org Cc: ubizjak@gmail.com Subject: PATCH: PR target/40838: gcc shouldn't assume that the stack is aligned Message-ID: <20090806214216.GA14439@lucon.org> Reply-To: "H.J. Lu" MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.5.19 (2009-01-05) Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org X-SW-Source: 2009-08/txt/msg00392.txt.bz2 Hi, In 32bit, the incoming stack may not be 16 byte aligned. This patch assumes the incoming stack is 4 byte aligned and realigns stack if any SSE variable is put on stack. Any comments? Thanks. H.J. --- gcc/ 2009-08-06 H.J. Lu PR target/40838 * config/i386/i386.c (ix86_update_stack_boundary): Use STACK_BOUNDARY if use_stack_boundary_for_incoming_stack_boundary is set. (VALID_SSE_VECTOR_MODE): New. (ix86_minimum_alignment): In 32bit, set use_stack_boundary_for_incoming_stack_boundary if any SSE variables are put on stack. * config/i386/i386.h (machine_function): Add use_stack_boundary_for_incoming_stack_boundary. gcc/testsuite/ 2009-08-06 H.J. Lu PR target/40838 * gcc.target/i386/incoming-6.c: New. * gcc.target/i386/incoming-7.c: Likewise. * gcc.target/i386/incoming-8.c: Likewise. * gcc.target/i386/incoming-9.c: Likewise. Index: gcc/testsuite/gcc.target/i386/incoming-7.c =================================================================== --- gcc/testsuite/gcc.target/i386/incoming-7.c (revision 0) +++ gcc/testsuite/gcc.target/i386/incoming-7.c (revision 0) @@ -0,0 +1,16 @@ +/* PR target/40838 */ +/* { dg-do compile { target { { ! *-*-darwin* } && ilp32 } } } */ +/* { dg-options "-w -O2 -msse2 -mpreferred-stack-boundary=4" } */ + +typedef int v4si __attribute__ ((vector_size (16))); + +extern v4si y(v4si, v4si, v4si, v4si, v4si); + +extern v4si s1, s2; + +v4si x(void) +{ + return y(s1, s2, s1, s2, s2); +} + +/* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */ Index: gcc/testsuite/gcc.target/i386/incoming-9.c =================================================================== --- gcc/testsuite/gcc.target/i386/incoming-9.c (revision 0) +++ gcc/testsuite/gcc.target/i386/incoming-9.c (revision 0) @@ -0,0 +1,18 @@ +/* PR target/40838 */ +/* { dg-do compile { target { { ! *-*-darwin* } && ilp32 } } } */ +/* { dg-options "-w -O3 -mno-sse -mpreferred-stack-boundary=4" } */ + +float +foo (float f) +{ + float array[128]; + float x; + int i; + for (i = 0; i < sizeof(array) / sizeof(*array); i++) + array[i] = f; + for (i = 0; i < sizeof(array) / sizeof(*array); i++) + x += array[i]; + return x; +} + +/* { dg-final { scan-assembler-not "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */ Index: gcc/testsuite/gcc.target/i386/incoming-6.c =================================================================== --- gcc/testsuite/gcc.target/i386/incoming-6.c (revision 0) +++ gcc/testsuite/gcc.target/i386/incoming-6.c (revision 0) @@ -0,0 +1,17 @@ +/* PR target/40838 */ +/* { dg-do compile { target { { ! *-*-darwin* } && ilp32 } } } */ +/* { dg-options "-w -O2 -msse2 -mpreferred-stack-boundary=4" } */ + +typedef int v4si __attribute__ ((vector_size (16))); + +extern v4si y(v4si *s3); + +extern v4si s1, s2; + +v4si x(void) +{ + v4si s3 = s1 + s2; + return y(&s3); +} + +/* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */ Index: gcc/testsuite/gcc.target/i386/incoming-8.c =================================================================== --- gcc/testsuite/gcc.target/i386/incoming-8.c (revision 0) +++ gcc/testsuite/gcc.target/i386/incoming-8.c (revision 0) @@ -0,0 +1,18 @@ +/* PR target/40838 */ +/* { dg-do compile { target { { ! *-*-darwin* } && ilp32 } } } */ +/* { dg-options "-w -O3 -msse2 -mpreferred-stack-boundary=4" } */ + +float +foo (float f) +{ + float array[128]; + float x; + int i; + for (i = 0; i < sizeof(array) / sizeof(*array); i++) + array[i] = f; + for (i = 0; i < sizeof(array) / sizeof(*array); i++) + x += array[i]; + return x; +} + +/* { dg-final { scan-assembler "andl\[\\t \]*\\$-16,\[\\t \]*%esp" } } */ Index: gcc/config/i386/i386.h =================================================================== --- gcc/config/i386/i386.h (revision 150532) +++ gcc/config/i386/i386.h (working copy) @@ -2389,6 +2389,8 @@ struct GTY(()) machine_function { /* This value is used for amd64 targets and specifies the current abi to be used. MS_ABI means ms abi. Otherwise SYSV_ABI means sysv abi. */ enum calling_abi call_abi; + /* Use STACK_BOUNDARY for incoming stack boundary. */ + int use_stack_boundary_for_incoming_stack_boundary; struct machine_cfa_state cfa; }; #endif Index: gcc/config/i386/i386.c =================================================================== --- gcc/config/i386/i386.c (revision 150532) +++ gcc/config/i386/i386.c (working copy) @@ -8230,11 +8230,19 @@ find_drap_reg (void) static void ix86_update_stack_boundary (void) { + /* Should we use STACK_BOUNDARY for incoming stack boundary? */ + unsigned int incoming_stack_boundary; + + if (cfun->machine->use_stack_boundary_for_incoming_stack_boundary) + incoming_stack_boundary = STACK_BOUNDARY; + else + incoming_stack_boundary = ix86_default_incoming_stack_boundary; + /* Prefer the one specified at command line. */ ix86_incoming_stack_boundary = (ix86_user_incoming_stack_boundary ? ix86_user_incoming_stack_boundary - : ix86_default_incoming_stack_boundary); + : incoming_stack_boundary); /* Incoming stack alignment can be changed on individual functions via force_align_arg_pointer attribute. We use the smallest @@ -20069,6 +20077,10 @@ ix86_local_alignment (tree exp, enum mac return align; } +#define VALID_SSE_VECTOR_MODE(MODE) \ + ((MODE) == V4SFmode || (MODE) == V4SImode || (MODE) == V2DFmode \ + || (MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DImode) + /* Compute the minimum required alignment for dynamic stack realignment purposes for a local variable, parameter or a stack slot. EXP is the data type or decl itself, MODE is its mode and ALIGN is the @@ -20080,7 +20092,7 @@ ix86_minimum_alignment (tree exp, enum m { tree type, decl; - if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64) + if (TARGET_64BIT) return align; if (exp && DECL_P (exp)) @@ -20094,6 +20106,15 @@ ix86_minimum_alignment (tree exp, enum m decl = NULL; } + /* In 32bit, use STACK_BOUNDARY for incoming stack boundary if any + SSE variables are put on stack. */ + if (VALID_SSE_VECTOR_MODE (mode) + || (type && VALID_SSE_VECTOR_MODE (TYPE_MODE (type)))) + cfun->machine->use_stack_boundary_for_incoming_stack_boundary = 1; + + if (align != 64 || ix86_preferred_stack_boundary >= 64) + return align; + /* Don't do dynamic stack realignment for long long objects with -mpreferred-stack-boundary=2. */ if ((mode == DImode || (type && TYPE_MODE (type) == DImode))