From mboxrd@z Thu Jan  1 00:00:00 1970
From: Bruno Haible <haible@ilog.fr>
To: egcs@cygnus.com
Subject: eh_table alignment
Date: Sun, 30 Nov 1997 04:53:00 -0000
Message-id: <199711301253.NAA16791@halles.ilog.fr>
X-SW-Source: 1997-11/msg00973.html

The calls to ASM_OUTPUT_ALIGN in dwarf2out.c in testgcc-971128 cause a problem
on i386.
Take for example, the following source

================= foo.cc =======================
void foo () { throw 5; }
================================================

and its output (on i486-linux)

================== cc1plus foo.cc -dA -o foo.s =========================
	.file	"foo.cc"
	.version	"01.01"
/ GNU C++ version testgcc-2.8.0 971128 experimental (i486-linux) compiled by GNU C version 2.7.2.
/ options passed: 
/ options enabled:  -fpeephole -ffunction-cse -fkeep-static-consts
/ -fpcc-struct-return -fexceptions -fcommon -fverbose-asm -fgnu-linker
/ -m80387 -mhard-float -mno-soft-float -mieee-fp -mfp-ret-in-387
/ -mschedule-prologue -mcpu=i486 -march=pentium

gcc2_compiled.:
.globl __throw
.text
	.align 16
.globl foo__Fv
	.type	 foo__Fv,@function
foo__Fv:
.LFB1:
	pushl %ebp
.LCFI0:
	movl %esp,%ebp
.LCFI1:
	pushl %ebx
.LCFI2:
	pushl $0
.LCFI3:
	call __tfi
	movl %eax,%eax
	pushl %eax
.LCFI4:
	pushl $4
.LCFI5:
	call __builtin_new
	addl $4,%esp
.LCFI6:
	movl %eax,%eax
	movl %eax,%ebx
	movl $5,(%ebx)
	pushl %ebx
.LCFI7:
	call __cp_push_exception
	addl $12,%esp
.LCFI8:
.L2:
	movl $.L2,__eh_pc
	call __throw
	.align 16
	jmp .L3
	.align 16
	jmp .L1
	.align 16
.L3:
.L1:
	movl -4(%ebp),%ebx
	movl %ebp,%esp
	popl %ebp
	ret
.LFE1:
.Lfe1:
	.size	 foo__Fv,.Lfe1-foo__Fv

#APP
.section	.eh_frame,"aw",@progbits
__FRAME_BEGIN__:
	.4byte	.LLCIE1	/ Length of Common Information Entry
.LSCIE1:
	.4byte	0x0	/ CIE Identifier Tag
	.byte	0x1	/ CIE Version
	.byte	0x0	/ CIE Augmentation (none)
	.byte	0x1	/ ULEB128 0x1 (CIE Code Alignment Factor)
	.byte	0x7c	/ SLEB128 -4 (CIE Data Alignment Factor)
	.byte	0x8	/ CIE RA Column
	.byte	0xc	/ DW_CFA_def_cfa
	.byte	0x4	/ ULEB128 0x4
	.byte	0x4	/ ULEB128 0x4
	.byte	0x88	/ DW_CFA_offset, column 0x8
	.byte	0x1	/ ULEB128 0x1
	.align 4
.LECIE1:
	.set	.LLCIE1,.LECIE1-.LSCIE1
	.4byte	.LLFDE1	/ FDE Length
.LSFDE1:
	.4byte	.LSFDE1-__FRAME_BEGIN__	/ FDE CIE offset
	.4byte	.LFB1	/ FDE initial location
	.4byte	.LFE1-.LFB1	/ FDE address range
	.byte	0x4	/ DW_CFA_advance_loc4
	.4byte	.LCFI0-.LFB1
	.byte	0xe	/ DW_CFA_def_cfa_offset
	.byte	0x8	/ ULEB128 0x8
	.byte	0x85	/ DW_CFA_offset, column 0x5
	.byte	0x2	/ ULEB128 0x2
	.byte	0x4	/ DW_CFA_advance_loc4
	.4byte	.LCFI1-.LCFI0
	.byte	0xd	/ DW_CFA_def_cfa_register
	.byte	0x5	/ ULEB128 0x5
	.byte	0x4	/ DW_CFA_advance_loc4
	.4byte	.LCFI2-.LCFI1
	.byte	0x83	/ DW_CFA_offset, column 0x3
	.byte	0x3	/ ULEB128 0x3
	.byte	0x4	/ DW_CFA_advance_loc4
	.4byte	.LCFI3-.LCFI2
	.byte	0x2e	/ DW_CFA_GNU_args_size
	.byte	0x4	/ ULEB128 0x4
	.byte	0x4	/ DW_CFA_advance_loc4
	.4byte	.LCFI4-.LCFI3
	.byte	0x2e	/ DW_CFA_GNU_args_size
	.byte	0x8	/ ULEB128 0x8
	.byte	0x4	/ DW_CFA_advance_loc4
	.4byte	.LCFI5-.LCFI4
	.byte	0x2e	/ DW_CFA_GNU_args_size
	.byte	0xc	/ ULEB128 0xc
	.byte	0x4	/ DW_CFA_advance_loc4
	.4byte	.LCFI6-.LCFI5
	.byte	0x2e	/ DW_CFA_GNU_args_size
	.byte	0x8	/ ULEB128 0x8
	.byte	0x4	/ DW_CFA_advance_loc4
	.4byte	.LCFI7-.LCFI6
	.byte	0x2e	/ DW_CFA_GNU_args_size
	.byte	0xc	/ ULEB128 0xc
	.byte	0x4	/ DW_CFA_advance_loc4
	.4byte	.LCFI8-.LCFI7
	.byte	0x2e	/ DW_CFA_GNU_args_size
	.byte	0x0	/ ULEB128 0x0
	.align 4
.LEFDE1:
	.set	.LLFDE1,.LEFDE1-.LSFDE1
#NO_APP
	.ident	"GCC: (GNU) testgcc-2.8.0 971128 experimental"
==================================================================

Note that the call to __throw is *after* the label .LCFI8 (which is
the last label covered by an advance_loc4 instruction by the FDE).
This occurs only without -O.

At runtime, the following happens:
- __throw() is called.
- __frame_state_for sees that the FDE at .LSFDE1 is responsible,
  because the call to __throw is between .LFB1 and .LFE1.
- __frame_state_for executes all instructions of the FDE, up to .LEFDE1,
  because that's what it is programmed to do if no DW_CFA_advance_loc4
  matches.
- Among these, ".align 4" is one or more bytes with value 0x90, which is
  interpreted as a DW_CFA_offset instruction for register 16. This sets
  the `saved' bit for register 16.
- __throw then calls copy_reg for this register. Since register 16 is
  not a call-saved register on i386 (it is not a valid register at all),
  abort() is called.

What is the right way to fix this?

  a. Make sure that .LCFI8 is emitted *after* the call to __throw
     (as is done when compiling with "-O") ?

  b. Instead of ".align 4", emit ".align 4,0x00" where the 0x00 is
     DW_CFA_nop. Do all assemblers support this ?

  c. Remove the alignment altogether, and in frame.c change the two
     occurrences of __attribute__ ((packed, aligned (__alignof__ (void *))))
     to __attribute__ ((packed, aligned (1))) . Does gcc know to emit
     unaligned accesses for all CPUs ?