From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <jakub@sourceware.org>
Received: by sourceware.org (Postfix, from userid 2153)
 id EF84D3857C51; Tue, 18 Jan 2022 12:42:24 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org EF84D3857C51
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
From: Jakub Jelinek <jakub@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org, libstdc++-cvs@gcc.gnu.org
Subject: [gcc(refs/vendors/redhat/heads/gcc-12-branch)] Merge commit
 'r12-6669-g38ec23fafb167ddfe840d7bb22b3e943d8a7d29e' into
 redhat/gcc-12-branch
X-Act-Checkin: gcc
X-Git-Author: Jakub Jelinek <jakub@redhat.com>
X-Git-Refname: refs/vendors/redhat/heads/gcc-12-branch
X-Git-Oldrev: d8c9e50646a688fa39fd228289164868692b3474
X-Git-Newrev: 880787aef7a985a80f88a14f830fb554a33b1a87
Message-Id: <20220118124224.EF84D3857C51@sourceware.org>
Date: Tue, 18 Jan 2022 12:42:24 +0000 (GMT)
X-BeenThere: libstdc++-cvs@gcc.gnu.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Libstdc++-cvs mailing list <libstdc++-cvs.gcc.gnu.org>
List-Unsubscribe: <https://gcc.gnu.org/mailman/options/libstdc++-cvs>,
 <mailto:libstdc++-cvs-request@gcc.gnu.org?subject=unsubscribe>
List-Archive: <https://gcc.gnu.org/pipermail/libstdc++-cvs/>
List-Help: <mailto:libstdc++-cvs-request@gcc.gnu.org?subject=help>
List-Subscribe: <https://gcc.gnu.org/mailman/listinfo/libstdc++-cvs>,
 <mailto:libstdc++-cvs-request@gcc.gnu.org?subject=subscribe>
X-List-Received-Date: Tue, 18 Jan 2022 12:42:25 -0000

https://gcc.gnu.org/g:880787aef7a985a80f88a14f830fb554a33b1a87

commit 880787aef7a985a80f88a14f830fb554a33b1a87
Merge: d8c9e50646a 38ec23fafb1
Author: Jakub Jelinek <jakub@redhat.com>
Date:   Tue Jan 18 13:41:24 2022 +0100

    Merge commit 'r12-6669-g38ec23fafb167ddfe840d7bb22b3e943d8a7d29e' into redhat/gcc-12-branch

Diff:

 ChangeLog                                          |     4 +
 MAINTAINERS                                        |    14 +-
 contrib/ChangeLog                                  |    14 +
 contrib/filter-clang-warnings.py                   |    14 +-
 contrib/gcc_update                                 |     6 +-
 contrib/git-backport.py                            |    48 +
 contrib/header-tools/ChangeLog                     |     4 +
 contrib/header-tools/README                        |    34 +-
 contrib/maintainers-verify.sh                      |    45 -
 contrib/paranoia.cc                                |     2 +-
 fixincludes/ChangeLog                              |    18 +
 fixincludes/fixincl.x                              |   180 +-
 fixincludes/inclhack.def                           |   123 +
 fixincludes/tests/base/fcntl.h                     |    33 +
 fixincludes/tests/base/math.h                      |    34 +
 fixincludes/tests/base/time.h                      |    15 +
 gcc/BASE-VER                                       |     2 +-
 gcc/ChangeLog                                      |  2645 +
 gcc/DATESTAMP                                      |     2 +-
 gcc/Makefile.in                                    |   322 +-
 gcc/ada/ChangeLog                                  |   123 +
 gcc/ada/gcc-interface/Make-lang.in                 |     6 +-
 gcc/ada/gcc-interface/config-lang.in               |     2 +-
 gcc/ada/gcc-interface/{cuintp.c => cuintp.cc}      |     0
 gcc/ada/gcc-interface/decl.c                       | 10661 ----
 gcc/ada/gcc-interface/decl.cc                      | 10661 ++++
 gcc/ada/gcc-interface/gigi.h                       |    16 +-
 gcc/ada/gcc-interface/lang-specs.h                 |     2 +-
 gcc/ada/gcc-interface/{misc.c => misc.cc}          |     0
 gcc/ada/gcc-interface/{targtyps.c => targtyps.cc}  |     0
 gcc/ada/gcc-interface/{trans.c => trans.cc}        |     0
 gcc/ada/gcc-interface/utils.c                      |  7156 ---
 gcc/ada/gcc-interface/utils.cc                     |  7156 +++
 gcc/ada/gcc-interface/utils2.c                     |  3050 --
 gcc/ada/gcc-interface/utils2.cc                    |  3050 ++
 gcc/ada/init.c                                     |     2 +-
 gcc/ada/set_targ.ads                               |     4 +-
 gcc/{adjust-alignment.c => adjust-alignment.cc}    |     0
 gcc/{alias.c => alias.cc}                          |     0
 gcc/alias.h                                        |     2 +-
 gcc/{alloc-pool.c => alloc-pool.cc}                |     0
 gcc/analyzer/ChangeLog                             |    39 +
 gcc/analyzer/analyzer.cc                           |     4 +-
 gcc/analyzer/region-model-asm.cc                   |     2 +-
 gcc/analyzer/region.cc                             |     2 +-
 gcc/analyzer/sm-malloc.cc                          |     2 +-
 gcc/analyzer/supergraph.cc                         |     2 +-
 gcc/asan.c                                         |  4692 --
 gcc/asan.cc                                        |  4692 ++
 gcc/{attribs.c => attribs.cc}                      |     0
 gcc/{auto-inc-dec.c => auto-inc-dec.cc}            |     0
 gcc/{auto-profile.c => auto-profile.cc}            |     0
 gcc/auto-profile.h                                 |     2 +-
 gcc/basic-block.h                                  |     2 +-
 gcc/{bb-reorder.c => bb-reorder.cc}                |     0
 gcc/{bitmap.c => bitmap.cc}                        |     0
 gcc/btfout.c                                       |  1133 -
 gcc/btfout.cc                                      |  1133 +
 gcc/builtins.c                                     | 11184 -----
 gcc/builtins.cc                                    | 11184 +++++
 gcc/c-family/ChangeLog                             |    86 +
 gcc/c-family/c-ada-spec.c                          |  3528 --
 gcc/c-family/c-ada-spec.cc                         |  3528 ++
 gcc/c-family/c-ada-spec.h                          |     2 +-
 gcc/c-family/{c-attribs.c => c-attribs.cc}         |     0
 gcc/c-family/c-common.c                            |  9466 ----
 gcc/c-family/c-common.cc                           |  9466 ++++
 gcc/c-family/c-common.h                            |    30 +-
 gcc/c-family/c-cppbuiltin.c                        |  2009 -
 gcc/c-family/c-cppbuiltin.cc                       |  2009 +
 gcc/c-family/{c-dump.c => c-dump.cc}               |     0
 gcc/c-family/c-format.c                            |  5439 ---
 gcc/c-family/c-format.cc                           |  5439 +++
 gcc/c-family/c-gimplify.c                          |   738 -
 gcc/c-family/c-gimplify.cc                         |   738 +
 gcc/c-family/{c-indentation.c => c-indentation.cc} |     0
 gcc/c-family/c-indentation.h                       |     2 +-
 gcc/c-family/{c-lex.c => c-lex.cc}                 |     0
 gcc/c-family/c-objc.h                              |     2 +-
 gcc/c-family/c-omp.c                               |  3265 --
 gcc/c-family/c-omp.cc                              |  3265 ++
 gcc/c-family/c-opts.c                              |  1842 -
 gcc/c-family/c-opts.cc                             |  1842 +
 gcc/c-family/{c-pch.c => c-pch.cc}                 |     0
 gcc/c-family/{c-ppoutput.c => c-ppoutput.cc}       |     0
 gcc/c-family/c-pragma.c                            |  1656 -
 gcc/c-family/c-pragma.cc                           |  1656 +
 .../{c-pretty-print.c => c-pretty-print.cc}        |     0
 gcc/c-family/c-pretty-print.h                      |     2 +-
 gcc/c-family/{c-semantics.c => c-semantics.cc}     |     0
 gcc/c-family/{c-ubsan.c => c-ubsan.cc}             |     0
 gcc/c-family/{c-warn.c => c-warn.cc}               |     0
 gcc/c-family/c.opt                                 |    20 +
 gcc/c-family/{cppspec.c => cppspec.cc}             |     0
 gcc/c-family/{stub-objc.c => stub-objc.cc}         |     0
 gcc/c/ChangeLog                                    |    55 +
 gcc/c/Make-lang.in                                 |    10 +-
 gcc/c/{c-aux-info.c => c-aux-info.cc}              |     0
 gcc/c/c-convert.c                                  |   207 -
 gcc/c/c-convert.cc                                 |   207 +
 gcc/c/c-decl.c                                     | 12469 -----
 gcc/c/c-decl.cc                                    | 12469 +++++
 gcc/c/{c-errors.c => c-errors.cc}                  |     0
 gcc/c/{c-fold.c => c-fold.cc}                      |     0
 gcc/c/{c-lang.c => c-lang.cc}                      |     0
 gcc/c/{c-objc-common.c => c-objc-common.cc}        |     0
 gcc/c/c-objc-common.h                              |     2 +-
 gcc/c/c-parser.c                                   | 23404 ---------
 gcc/c/c-parser.cc                                  | 23404 +++++++++
 gcc/c/c-parser.h                                   |     2 +-
 gcc/c/c-tree.h                                     |    22 +-
 gcc/c/c-typeck.c                                   | 16079 ------
 gcc/c/c-typeck.cc                                  | 16079 ++++++
 gcc/c/config-lang.in                               |     2 +-
 gcc/c/{gccspec.c => gccspec.cc}                    |     0
 gcc/c/{gimple-parser.c => gimple-parser.cc}        |     0
 gcc/caller-save.c                                  |  1400 -
 gcc/caller-save.cc                                 |  1400 +
 gcc/calls.c                                        |  5254 --
 gcc/calls.cc                                       |  5254 ++
 gcc/{ccmp.c => ccmp.cc}                            |     0
 gcc/cfg-flags.def                                  |     6 +-
 gcc/{cfg.c => cfg.cc}                              |     0
 gcc/cfganal.c                                      |  1934 -
 gcc/cfganal.cc                                     |  1934 +
 gcc/{cfgbuild.c => cfgbuild.cc}                    |     0
 gcc/cfgcleanup.c                                   |  3339 --
 gcc/cfgcleanup.cc                                  |  3339 ++
 gcc/cfgexpand.c                                    |  7030 ---
 gcc/cfgexpand.cc                                   |  7030 +++
 gcc/cfghooks.c                                     |  1560 -
 gcc/cfghooks.cc                                    |  1560 +
 gcc/cfghooks.h                                     |     4 +-
 gcc/{cfgloop.c => cfgloop.cc}                      |     0
 gcc/{cfgloopanal.c => cfgloopanal.cc}              |     0
 gcc/{cfgloopmanip.c => cfgloopmanip.cc}            |     0
 gcc/cfgrtl.c                                       |  5366 --
 gcc/cfgrtl.cc                                      |  5366 ++
 gcc/cgraph.c                                       |  4273 --
 gcc/cgraph.cc                                      |  4273 ++
 gcc/cgraph.h                                       |    28 +-
 gcc/{cgraphbuild.c => cgraphbuild.cc}              |     0
 gcc/cgraphclones.c                                 |  1160 -
 gcc/cgraphclones.cc                                |  1160 +
 gcc/cgraphunit.c                                   |  2595 -
 gcc/cgraphunit.cc                                  |  2595 +
 gcc/{collect-utils.c => collect-utils.cc}          |     0
 gcc/collect-utils.h                                |     2 +-
 gcc/{collect2-aix.c => collect2-aix.cc}            |     0
 gcc/collect2-aix.h                                 |     2 +-
 gcc/collect2.c                                     |  3078 --
 gcc/collect2.cc                                    |  3078 ++
 gcc/combine-stack-adj.c                            |   854 -
 gcc/combine-stack-adj.cc                           |   854 +
 gcc/combine.c                                      | 14960 ------
 gcc/combine.cc                                     | 14960 ++++++
 gcc/common.opt                                     |    10 +-
 .../{common-targhooks.c => common-targhooks.cc}    |     0
 gcc/common/config/aarch64/aarch64-common.c         |   551 -
 gcc/common/config/aarch64/aarch64-common.cc        |   551 +
 .../alpha/{alpha-common.c => alpha-common.cc}      |     0
 .../config/arc/{arc-common.c => arc-common.cc}     |     0
 gcc/common/config/arm/arm-common.c                 |  1116 -
 gcc/common/config/arm/arm-common.cc                |  1116 +
 gcc/common/config/avr/avr-common.c                 |   153 -
 gcc/common/config/avr/avr-common.cc                |   153 +
 .../config/bfin/{bfin-common.c => bfin-common.cc}  |     0
 .../config/bpf/{bpf-common.c => bpf-common.cc}     |     0
 .../config/c6x/{c6x-common.c => c6x-common.cc}     |     0
 .../config/cr16/{cr16-common.c => cr16-common.cc}  |     0
 .../config/cris/{cris-common.c => cris-common.cc}  |     0
 .../config/csky/{csky-common.c => csky-common.cc}  |     0
 .../config/{default-common.c => default-common.cc} |     0
 .../{epiphany-common.c => epiphany-common.cc}      |     0
 .../config/fr30/{fr30-common.c => fr30-common.cc}  |     0
 .../config/frv/{frv-common.c => frv-common.cc}     |     0
 .../config/gcn/{gcn-common.c => gcn-common.cc}     |     0
 .../h8300/{h8300-common.c => h8300-common.cc}      |     0
 .../config/i386/{i386-common.c => i386-common.cc}  |     0
 gcc/common/config/i386/i386-isas.h                 |     4 +-
 .../config/ia64/{ia64-common.c => ia64-common.cc}  |     0
 .../iq2000/{iq2000-common.c => iq2000-common.cc}   |     0
 .../config/lm32/{lm32-common.c => lm32-common.cc}  |     0
 .../config/m32r/{m32r-common.c => m32r-common.cc}  |     0
 .../config/m68k/{m68k-common.c => m68k-common.cc}  |     0
 .../mcore/{mcore-common.c => mcore-common.cc}      |     0
 .../{microblaze-common.c => microblaze-common.cc}  |     0
 .../config/mips/{mips-common.c => mips-common.cc}  |     0
 .../config/mmix/{mmix-common.c => mmix-common.cc}  |     0
 .../{mn10300-common.c => mn10300-common.cc}        |     0
 .../msp430/{msp430-common.c => msp430-common.cc}   |     0
 .../nds32/{nds32-common.c => nds32-common.cc}      |     0
 .../nios2/{nios2-common.c => nios2-common.cc}      |     0
 .../nvptx/{nvptx-common.c => nvptx-common.cc}      |     0
 .../config/or1k/{or1k-common.c => or1k-common.cc}  |     0
 gcc/common/config/pa/{pa-common.c => pa-common.cc} |     0
 .../pdp11/{pdp11-common.c => pdp11-common.cc}      |     0
 .../config/pru/{pru-common.c => pru-common.cc}     |     0
 .../riscv/{riscv-common.c => riscv-common.cc}      |     0
 .../rs6000/{rs6000-common.c => rs6000-common.cc}   |     0
 gcc/common/config/rx/{rx-common.c => rx-common.cc} |     0
 .../config/s390/{s390-common.c => s390-common.cc}  |     0
 gcc/common/config/sh/{sh-common.c => sh-common.cc} |     0
 .../sparc/{sparc-common.c => sparc-common.cc}      |     0
 .../tilegx/{tilegx-common.c => tilegx-common.cc}   |     0
 .../{tilepro-common.c => tilepro-common.cc}        |     0
 .../config/v850/{v850-common.c => v850-common.cc}  |     0
 .../config/vax/{vax-common.c => vax-common.cc}     |     0
 .../visium/{visium-common.c => visium-common.cc}   |     0
 .../{xstormy16-common.c => xstormy16-common.cc}    |     0
 .../xtensa/{xtensa-common.c => xtensa-common.cc}   |     0
 gcc/{compare-elim.c => compare-elim.cc}            |     0
 gcc/conditions.h                                   |     2 +-
 gcc/config.gcc                                     |    42 +-
 ...{aarch64-bti-insert.c => aarch64-bti-insert.cc} |     0
 gcc/config/aarch64/aarch64-builtins.c              |  3214 --
 gcc/config/aarch64/aarch64-builtins.cc             |  3214 ++
 gcc/config/aarch64/{aarch64-c.c => aarch64-c.cc}   |     0
 gcc/config/aarch64/{aarch64-d.c => aarch64-d.cc}   |     0
 gcc/config/aarch64/aarch64-protos.h                |     4 +-
 gcc/config/aarch64/aarch64-sve-builtins.cc         |     2 +-
 gcc/config/aarch64/aarch64.c                       | 26861 -----------
 gcc/config/aarch64/aarch64.cc                      | 26862 +++++++++++
 gcc/config/aarch64/aarch64.h                       |     6 +-
 gcc/config/aarch64/cortex-a57-fma-steering.c       |  1096 -
 gcc/config/aarch64/cortex-a57-fma-steering.cc      |  1096 +
 gcc/config/aarch64/driver-aarch64.c                |   470 -
 gcc/config/aarch64/driver-aarch64.cc               |   470 +
 ...oidance.c => falkor-tag-collision-avoidance.cc} |     0
 ...ost-aarch64-darwin.c => host-aarch64-darwin.cc} |     0
 gcc/config/aarch64/t-aarch64                       |    26 +-
 gcc/config/aarch64/x-aarch64                       |     2 +-
 gcc/config/aarch64/x-darwin                        |     2 +-
 gcc/config/alpha/alpha-protos.h                    |     2 +-
 gcc/config/alpha/alpha.c                           | 10058 ----
 gcc/config/alpha/alpha.cc                          | 10058 ++++
 gcc/config/alpha/alpha.h                           |     8 +-
 gcc/config/alpha/alpha.md                          |     4 +-
 gcc/config/alpha/driver-alpha.c                    |   101 -
 gcc/config/alpha/driver-alpha.cc                   |   101 +
 gcc/config/alpha/x-alpha                           |     2 +-
 gcc/config/arc/{arc-c.c => arc-c.cc}               |     0
 gcc/config/arc/arc-protos.h                        |     2 +-
 gcc/config/arc/arc.c                               | 11769 -----
 gcc/config/arc/arc.cc                              | 11769 +++++
 gcc/config/arc/arc.md                              |     8 +-
 gcc/config/arc/builtins.def                        |     4 +-
 gcc/config/arc/{driver-arc.c => driver-arc.cc}     |     0
 gcc/config/arc/t-arc                               |     6 +-
 gcc/config/arm/{aarch-common.c => aarch-common.cc} |     0
 gcc/config/arm/{arm-builtins.c => arm-builtins.cc} |     0
 gcc/config/arm/arm-c.c                             |   505 -
 gcc/config/arm/arm-c.cc                            |   505 +
 gcc/config/arm/{arm-d.c => arm-d.cc}               |     0
 gcc/config/arm/arm-protos.h                        |     8 +-
 gcc/config/arm/arm.c                               | 34143 -------------
 gcc/config/arm/arm.cc                              | 34143 +++++++++++++
 gcc/config/arm/arm.h                               |     8 +-
 gcc/config/arm/arm.md                              |     2 +-
 gcc/config/arm/driver-arm.c                        |   137 -
 gcc/config/arm/driver-arm.cc                       |   137 +
 gcc/config/arm/symbian.h                           |     2 +-
 gcc/config/arm/t-arm                               |    16 +-
 gcc/config/arm/thumb1.md                           |    10 +-
 gcc/config/arm/x-arm                               |     2 +-
 gcc/config/avr/avr-c.c                             |   509 -
 gcc/config/avr/avr-c.cc                            |   509 +
 gcc/config/avr/{avr-devices.c => avr-devices.cc}   |     0
 gcc/config/avr/avr-fixed.md                        |     2 +-
 gcc/config/avr/avr-log.c                           |   325 -
 gcc/config/avr/avr-log.cc                          |   325 +
 gcc/config/avr/avr-mcus.def                        |     2 +-
 gcc/config/avr/avr-modes.def                       |     2 +-
 gcc/config/avr/avr-passes.def                      |     2 +-
 gcc/config/avr/avr-protos.h                        |     4 +-
 gcc/config/avr/avr.c                               | 14717 ------
 gcc/config/avr/avr.cc                              | 14717 ++++++
 gcc/config/avr/avr.h                               |     4 +-
 gcc/config/avr/avr.md                              |     6 +-
 gcc/config/avr/builtins.def                        |     4 +-
 gcc/config/avr/{driver-avr.c => driver-avr.cc}     |     0
 gcc/config/avr/gen-avr-mmcu-specs.c                |   323 -
 gcc/config/avr/gen-avr-mmcu-specs.cc               |   323 +
 gcc/config/avr/gen-avr-mmcu-texi.c                 |   202 -
 gcc/config/avr/gen-avr-mmcu-texi.cc                |   202 +
 gcc/config/avr/t-avr                               |    18 +-
 gcc/config/bfin/bfin.c                             |  5883 ---
 gcc/config/bfin/bfin.cc                            |  5883 +++
 gcc/config/bpf/bpf-protos.h                        |     2 +-
 gcc/config/bpf/{bpf.c => bpf.cc}                   |     0
 gcc/config/bpf/bpf.h                               |     2 +-
 gcc/config/bpf/{coreout.c => coreout.cc}           |     0
 gcc/config/bpf/t-bpf                               |     2 +-
 gcc/config/c6x/c6x-protos.h                        |     4 +-
 gcc/config/c6x/{c6x.c => c6x.cc}                   |     0
 gcc/config/cr16/cr16-protos.h                      |     2 +-
 gcc/config/cr16/{cr16.c => cr16.cc}                |     0
 gcc/config/cris/cris.c                             |  3729 --
 gcc/config/cris/cris.cc                            |  3729 ++
 gcc/config/cris/cris.h                             |     8 +-
 gcc/config/cris/cris.opt                           |     2 +-
 gcc/config/cris/sync.md                            |     2 +-
 gcc/config/csky/csky.c                             |  7329 ---
 gcc/config/csky/csky.cc                            |  7329 +++
 gcc/config/darwin-c.c                              |   889 -
 gcc/config/darwin-c.cc                             |   889 +
 gcc/config/{darwin-d.c => darwin-d.cc}             |     0
 gcc/config/{darwin-driver.c => darwin-driver.cc}   |     0
 gcc/config/darwin-f.c                              |    60 -
 gcc/config/darwin-f.cc                             |    60 +
 gcc/config/darwin-sections.def                     |     8 +-
 gcc/config/darwin.c                                |  3886 --
 gcc/config/darwin.cc                               |  3886 ++
 gcc/config/darwin.h                                |     4 +-
 gcc/config/{default-c.c => default-c.cc}           |     0
 gcc/config/{default-d.c => default-d.cc}           |     0
 gcc/config/{dragonfly-d.c => dragonfly-d.cc}       |     0
 gcc/config/elfos.h                                 |     2 +-
 gcc/config/epiphany/epiphany-sched.md              |     2 +-
 gcc/config/epiphany/epiphany.c                     |  3047 --
 gcc/config/epiphany/epiphany.cc                    |  3047 ++
 gcc/config/epiphany/epiphany.h                     |     6 +-
 gcc/config/epiphany/mode-switch-use.c              |   107 -
 gcc/config/epiphany/mode-switch-use.cc             |   107 +
 gcc/config/epiphany/predicates.md                  |     2 +-
 .../{resolve-sw-modes.c => resolve-sw-modes.cc}    |     0
 gcc/config/epiphany/t-epiphany                     |     4 +-
 gcc/config/fr30/fr30-protos.h                      |     2 +-
 gcc/config/fr30/{fr30.c => fr30.cc}                |     0
 gcc/config/{freebsd-d.c => freebsd-d.cc}           |     0
 gcc/config/frv/frv-protos.h                        |     2 +-
 gcc/config/frv/frv.c                               |  9451 ----
 gcc/config/frv/frv.cc                              |  9451 ++++
 gcc/config/frv/frv.h                               |     2 +-
 gcc/config/ft32/ft32-protos.h                      |     2 +-
 gcc/config/ft32/{ft32.c => ft32.cc}                |     0
 gcc/config/gcn/{driver-gcn.c => driver-gcn.cc}     |     0
 gcc/config/gcn/gcn-hsa.h                           |     2 +-
 gcc/config/gcn/{gcn-run.c => gcn-run.cc}           |     0
 gcc/config/gcn/{gcn-tree.c => gcn-tree.cc}         |     0
 gcc/config/gcn/gcn.c                               |  6672 ---
 gcc/config/gcn/gcn.cc                              |  6672 +++
 gcc/config/gcn/mkoffload.c                         |  1174 -
 gcc/config/gcn/mkoffload.cc                        |  1174 +
 gcc/config/gcn/t-gcn-hsa                           |     8 +-
 gcc/config/gcn/t-omp-device                        |     2 +-
 gcc/config/{glibc-c.c => glibc-c.cc}               |     0
 gcc/config/{glibc-d.c => glibc-d.cc}               |     0
 gcc/config/h8300/h8300-protos.h                    |     4 +-
 gcc/config/h8300/h8300.c                           |  5632 ---
 gcc/config/h8300/h8300.cc                          |  5632 +++
 gcc/config/h8300/h8300.h                           |     6 +-
 gcc/config/{host-darwin.c => host-darwin.cc}       |     0
 gcc/config/{host-hpux.c => host-hpux.cc}           |     0
 gcc/config/{host-linux.c => host-linux.cc}         |     0
 gcc/config/{host-netbsd.c => host-netbsd.cc}       |     0
 gcc/config/{host-openbsd.c => host-openbsd.cc}     |     0
 gcc/config/{host-solaris.c => host-solaris.cc}     |     0
 gcc/config/i386/cygming.h                          |     4 +-
 gcc/config/i386/{djgpp.c => djgpp.cc}              |     0
 gcc/config/i386/djgpp.h                            |     2 +-
 gcc/config/i386/dragonfly.h                        |     2 +-
 gcc/config/i386/driver-i386.c                      |   841 -
 gcc/config/i386/driver-i386.cc                     |   841 +
 .../i386/{driver-mingw32.c => driver-mingw32.cc}   |     0
 gcc/config/i386/freebsd.h                          |     2 +-
 .../i386/{gnu-property.c => gnu-property.cc}       |     0
 gcc/config/i386/{host-cygwin.c => host-cygwin.cc}  |     0
 .../{host-i386-darwin.c => host-i386-darwin.cc}    |     0
 .../i386/{host-mingw32.c => host-mingw32.cc}       |     0
 .../i386/{i386-builtins.c => i386-builtins.cc}     |     0
 gcc/config/i386/i386-c.c                           |   817 -
 gcc/config/i386/i386-c.cc                          |   817 +
 gcc/config/i386/{i386-d.c => i386-d.cc}            |     0
 gcc/config/i386/i386-expand.c                      | 23247 ---------
 gcc/config/i386/i386-expand.cc                     | 23280 +++++++++
 .../i386/{i386-features.c => i386-features.cc}     |     0
 gcc/config/i386/i386-options.c                     |  3863 --
 gcc/config/i386/i386-options.cc                    |  3863 ++
 gcc/config/i386/i386-protos.h                      |    13 +-
 gcc/config/i386/i386.c                             | 24709 ----------
 gcc/config/i386/i386.cc                            | 24709 ++++++++++
 gcc/config/i386/i386.h                             |    18 +-
 gcc/config/i386/i386.md                            |    28 +-
 ...{intelmic-mkoffload.c => intelmic-mkoffload.cc} |     0
 gcc/config/i386/lynx.h                             |     2 +-
 gcc/config/i386/mmx.md                             |     8 +-
 gcc/config/i386/{msformat-c.c => msformat-c.cc}    |     0
 gcc/config/i386/sse.md                             |    87 +-
 gcc/config/i386/subst.md                           |     7 +
 gcc/config/i386/t-cygming                          |    18 +-
 gcc/config/i386/t-djgpp                            |     4 +-
 gcc/config/i386/t-gnu-property                     |     2 +-
 gcc/config/i386/t-i386                             |    20 +-
 gcc/config/i386/t-intelmic                         |     2 +-
 gcc/config/i386/t-omp-device                       |     4 +-
 gcc/config/i386/winnt-cxx.c                        |   177 -
 gcc/config/i386/winnt-cxx.cc                       |   177 +
 gcc/config/i386/{winnt-d.c => winnt-d.cc}          |     0
 gcc/config/i386/{winnt-stubs.c => winnt-stubs.cc}  |     0
 gcc/config/i386/winnt.c                            |  1377 -
 gcc/config/i386/winnt.cc                           |  1377 +
 gcc/config/i386/x-cygwin                           |     4 +-
 gcc/config/i386/x-darwin                           |     2 +-
 gcc/config/i386/x-i386                             |     2 +-
 gcc/config/i386/x-mingw32                          |     6 +-
 ...86-tune-sched-atom.c => x86-tune-sched-atom.cc} |     0
 .../{x86-tune-sched-bd.c => x86-tune-sched-bd.cc}  |     0
 gcc/config/i386/x86-tune-sched-core.c              |   257 -
 gcc/config/i386/x86-tune-sched-core.cc             |   257 +
 .../i386/{x86-tune-sched.c => x86-tune-sched.cc}   |     0
 gcc/config/i386/x86-tune.def                       |    10 +-
 gcc/config/i386/xm-djgpp.h                         |     4 +-
 gcc/config/ia64/freebsd.h                          |     4 +-
 gcc/config/ia64/hpux.h                             |     2 +-
 gcc/config/ia64/{ia64-c.c => ia64-c.cc}            |     0
 gcc/config/ia64/ia64-protos.h                      |     2 +-
 gcc/config/ia64/ia64.c                             | 11927 -----
 gcc/config/ia64/ia64.cc                            | 11927 +++++
 gcc/config/ia64/ia64.h                             |     2 +-
 gcc/config/ia64/ia64.md                            |     4 +-
 gcc/config/ia64/predicates.md                      |     2 +-
 gcc/config/ia64/sysv4.h                            |     4 +-
 gcc/config/ia64/t-ia64                             |     6 +-
 gcc/config/iq2000/{iq2000.c => iq2000.cc}          |     0
 gcc/config/iq2000/iq2000.h                         |     2 +-
 gcc/config/iq2000/iq2000.md                        |     4 +-
 gcc/config/{linux.c => linux.cc}                   |     0
 gcc/config/linux.h                                 |     4 +-
 gcc/config/lm32/{lm32.c => lm32.cc}                |     0
 gcc/config/m32c/{m32c-pragma.c => m32c-pragma.cc}  |     0
 gcc/config/m32c/m32c.c                             |  4506 --
 gcc/config/m32c/m32c.cc                            |  4506 ++
 gcc/config/m32c/m32c.h                             |     2 +-
 gcc/config/m32c/t-m32c                             |     2 +-
 gcc/config/m32r/m32r-protos.h                      |     2 +-
 gcc/config/m32r/m32r.c                             |  2959 --
 gcc/config/m32r/m32r.cc                            |  2959 ++
 gcc/config/m32r/m32r.h                             |     4 +-
 gcc/config/m32r/m32r.md                            |     2 +-
 gcc/config/m68k/m68k-isas.def                      |     2 +-
 gcc/config/m68k/m68k-microarchs.def                |     2 +-
 gcc/config/m68k/m68k-protos.h                      |     6 +-
 gcc/config/m68k/m68k.c                             |  7154 ---
 gcc/config/m68k/m68k.cc                            |  7154 +++
 gcc/config/m68k/m68k.h                             |     4 +-
 gcc/config/m68k/m68k.md                            |     4 +-
 gcc/config/m68k/m68kemb.h                          |     2 +-
 gcc/config/m68k/uclinux.h                          |     2 +-
 gcc/config/mcore/mcore-protos.h                    |     2 +-
 gcc/config/mcore/mcore.c                           |  3290 --
 gcc/config/mcore/mcore.cc                          |  3290 ++
 gcc/config/mcore/mcore.h                           |     4 +-
 gcc/config/mcore/mcore.md                          |     2 +-
 .../microblaze/{microblaze-c.c => microblaze-c.cc} |     0
 gcc/config/microblaze/microblaze-protos.h          |     2 +-
 gcc/config/microblaze/microblaze.c                 |  4070 --
 gcc/config/microblaze/microblaze.cc                |  4070 ++
 gcc/config/microblaze/microblaze.h                 |     2 +-
 gcc/config/microblaze/microblaze.md                |     4 +-
 gcc/config/microblaze/t-microblaze                 |     4 +-
 gcc/config/mips/driver-native.c                    |    91 -
 gcc/config/mips/driver-native.cc                   |    91 +
 .../{frame-header-opt.c => frame-header-opt.cc}    |     0
 gcc/config/mips/loongson2ef.md                     |     2 +-
 gcc/config/mips/{mips-d.c => mips-d.cc}            |     0
 gcc/config/mips/mips-protos.h                      |     2 +-
 gcc/config/mips/mips.c                             | 22925 ---------
 gcc/config/mips/mips.cc                            | 22925 +++++++++
 gcc/config/mips/mips.h                             |     2 +-
 gcc/config/mips/mips.md                            |     2 +-
 gcc/config/mips/t-mips                             |     4 +-
 gcc/config/mips/x-native                           |     2 +-
 gcc/config/mmix/mmix-protos.h                      |     2 +-
 gcc/config/mmix/mmix.c                             |  2872 --
 gcc/config/mmix/mmix.cc                            |  2872 ++
 gcc/config/mmix/mmix.h                             |     2 +-
 gcc/config/mmix/mmix.md                            |     4 +-
 gcc/config/mmix/predicates.md                      |     2 +-
 gcc/config/mn10300/mn10300.c                       |  3423 --
 gcc/config/mn10300/mn10300.cc                      |  3423 ++
 gcc/config/mn10300/mn10300.h                       |     6 +-
 gcc/config/moxie/moxie-protos.h                    |     2 +-
 gcc/config/moxie/{moxie.c => moxie.cc}             |     0
 gcc/config/moxie/uclinux.h                         |     2 +-
 .../msp430/{driver-msp430.c => driver-msp430.cc}   |     0
 gcc/config/msp430/{msp430-c.c => msp430-c.cc}      |     0
 gcc/config/msp430/msp430-devices.c                 |  1110 -
 gcc/config/msp430/msp430-devices.cc                |  1110 +
 gcc/config/msp430/msp430.c                         |  4521 --
 gcc/config/msp430/msp430.cc                        |  4521 ++
 gcc/config/msp430/msp430.h                         |     4 +-
 gcc/config/msp430/t-msp430                         |     6 +-
 gcc/config/nds32/nds32-cost.c                      |   726 -
 gcc/config/nds32/nds32-cost.cc                     |   726 +
 gcc/config/nds32/nds32-doubleword.md               |     2 +-
 .../nds32/{nds32-fp-as-gp.c => nds32-fp-as-gp.cc}  |     0
 .../{nds32-intrinsic.c => nds32-intrinsic.cc}      |     0
 gcc/config/nds32/{nds32-isr.c => nds32-isr.cc}     |     0
 ...{nds32-md-auxiliary.c => nds32-md-auxiliary.cc} |     0
 ...manipulation.c => nds32-memory-manipulation.cc} |     0
 ...es-auxiliary.c => nds32-pipelines-auxiliary.cc} |     0
 .../{nds32-predicates.c => nds32-predicates.cc}    |     0
 .../{nds32-relax-opt.c => nds32-relax-opt.cc}      |     0
 gcc/config/nds32/{nds32-utils.c => nds32-utils.cc} |     0
 gcc/config/nds32/nds32.c                           |  5895 ---
 gcc/config/nds32/nds32.cc                          |  5895 +++
 gcc/config/nds32/nds32.h                           |    10 +-
 gcc/config/nds32/predicates.md                     |    12 +-
 gcc/config/nds32/t-nds32                           |    40 +-
 gcc/config/{netbsd-d.c => netbsd-d.cc}             |     0
 gcc/config/{netbsd.c => netbsd.cc}                 |     0
 gcc/config/nios2/nios2.c                           |  5624 ---
 gcc/config/nios2/nios2.cc                          |  5624 +++
 gcc/config/nvptx/{mkoffload.c => mkoffload.cc}     |     0
 gcc/config/nvptx/{nvptx-c.c => nvptx-c.cc}         |     0
 gcc/config/nvptx/nvptx-protos.h                    |     2 +-
 gcc/config/nvptx/nvptx.c                           |  7011 ---
 gcc/config/nvptx/nvptx.cc                          |  7011 +++
 gcc/config/nvptx/nvptx.h                           |     2 +-
 gcc/config/nvptx/t-nvptx                           |     4 +-
 gcc/config/nvptx/t-omp-device                      |     2 +-
 gcc/config/{openbsd-d.c => openbsd-d.cc}           |     0
 gcc/config/or1k/{or1k.c => or1k.cc}                |     0
 gcc/config/pa/elf.h                                |     2 +-
 gcc/config/pa/{pa-d.c => pa-d.cc}                  |     0
 gcc/config/pa/pa-linux.h                           |     2 +-
 gcc/config/pa/pa-netbsd.h                          |     2 +-
 gcc/config/pa/pa-openbsd.h                         |     2 +-
 gcc/config/pa/pa-protos.h                          |    10 +-
 gcc/config/pa/pa.c                                 | 11080 -----
 gcc/config/pa/pa.cc                                | 11080 +++++
 gcc/config/pa/pa.h                                 |     8 +-
 gcc/config/pa/pa.md                                |     8 +-
 gcc/config/pa/som.h                                |     2 +-
 gcc/config/pa/t-pa                                 |     2 +-
 gcc/config/pdp11/pdp11.c                           |  2417 -
 gcc/config/pdp11/pdp11.cc                          |  2417 +
 gcc/config/pdp11/pdp11.h                           |     2 +-
 gcc/config/pdp11/pdp11.md                          |     2 +-
 gcc/config/pdp11/t-pdp11                           |     2 +-
 gcc/config/pru/{pru-passes.c => pru-passes.cc}     |     0
 gcc/config/pru/{pru-pragma.c => pru-pragma.cc}     |     0
 gcc/config/pru/{pru.c => pru.cc}                   |     0
 gcc/config/pru/pru.md                              |     2 +-
 gcc/config/pru/t-pru                               |     4 +-
 .../riscv/{riscv-builtins.c => riscv-builtins.cc}  |     0
 gcc/config/riscv/{riscv-c.c => riscv-c.cc}         |     0
 gcc/config/riscv/{riscv-d.c => riscv-d.cc}         |     0
 gcc/config/riscv/riscv-protos.h                    |    10 +-
 ...-shorten-memrefs.c => riscv-shorten-memrefs.cc} |     0
 gcc/config/riscv/{riscv-sr.c => riscv-sr.cc}       |     0
 gcc/config/riscv/riscv.c                           |  5783 ---
 gcc/config/riscv/riscv.cc                          |  5783 +++
 gcc/config/riscv/riscv.h                           |     4 +-
 gcc/config/riscv/t-riscv                           |    16 +-
 gcc/config/rl78/{rl78-c.c => rl78-c.cc}            |     0
 gcc/config/rl78/rl78.c                             |  4977 --
 gcc/config/rl78/rl78.cc                            |  4977 ++
 gcc/config/rl78/t-rl78                             |     2 +-
 gcc/config/rs6000/aix.h                            |     4 +-
 gcc/config/rs6000/aix71.h                          |     2 +-
 gcc/config/rs6000/aix72.h                          |     2 +-
 gcc/config/rs6000/aix73.h                          |     2 +-
 gcc/config/rs6000/altivec.md                       |    25 -
 gcc/config/rs6000/darwin.h                         |     6 +-
 gcc/config/rs6000/driver-rs6000.c                  |   638 -
 gcc/config/rs6000/driver-rs6000.cc                 |   638 +
 gcc/config/rs6000/freebsd.h                        |     4 +-
 gcc/config/rs6000/freebsd64.h                      |     4 +-
 .../rs6000/{host-darwin.c => host-darwin.cc}       |     0
 .../{host-ppc64-darwin.c => host-ppc64-darwin.cc}  |     0
 gcc/config/rs6000/lynx.h                           |     2 +-
 gcc/config/rs6000/rbtree.c                         |   242 -
 gcc/config/rs6000/rbtree.cc                        |   242 +
 gcc/config/rs6000/rbtree.h                         |     2 +-
 gcc/config/rs6000/rs6000-c.c                       |  2076 -
 gcc/config/rs6000/rs6000-c.cc                      |  2076 +
 gcc/config/rs6000/rs6000-call.c                    |  6521 ---
 gcc/config/rs6000/rs6000-call.cc                   |  6521 +++
 gcc/config/rs6000/rs6000-cpus.def                  |     2 +-
 gcc/config/rs6000/{rs6000-d.c => rs6000-d.cc}      |     0
 gcc/config/rs6000/rs6000-gen-builtins.c            |  3076 --
 gcc/config/rs6000/rs6000-gen-builtins.cc           |  3076 ++
 gcc/config/rs6000/rs6000-internal.h                |    10 +-
 .../rs6000/{rs6000-linux.c => rs6000-linux.cc}     |     0
 gcc/config/rs6000/rs6000-logue.c                   |  5689 ---
 gcc/config/rs6000/rs6000-logue.cc                  |  5689 +++
 gcc/config/rs6000/rs6000-overload.def              |    10 +-
 gcc/config/rs6000/rs6000-p8swap.c                  |  2788 --
 gcc/config/rs6000/rs6000-p8swap.cc                 |  2788 ++
 .../{rs6000-pcrel-opt.c => rs6000-pcrel-opt.cc}    |     0
 gcc/config/rs6000/rs6000-protos.h                  |     6 +-
 .../rs6000/{rs6000-string.c => rs6000-string.cc}   |     0
 gcc/config/rs6000/rs6000.c                         | 28942 -----------
 gcc/config/rs6000/rs6000.cc                        | 28942 +++++++++++
 gcc/config/rs6000/rs6000.h                         |    16 +-
 gcc/config/rs6000/rs6000.md                        |     2 +-
 gcc/config/rs6000/sysv4.h                          |     2 +-
 gcc/config/rs6000/t-linux                          |     2 +-
 gcc/config/rs6000/t-linux64                        |     2 +-
 gcc/config/rs6000/t-rs6000                         |    34 +-
 gcc/config/rs6000/vsx.md                           |    32 +-
 gcc/config/rs6000/x-darwin                         |     2 +-
 gcc/config/rs6000/x-darwin64                       |     2 +-
 gcc/config/rs6000/x-rs6000                         |     2 +-
 gcc/config/rs6000/xcoff.h                          |     4 +-
 gcc/config/rx/rx.c                                 |  3812 --
 gcc/config/rx/rx.cc                                |  3812 ++
 gcc/config/s390/constraints.md                     |     8 +-
 gcc/config/s390/driver-native.c                    |   189 -
 gcc/config/s390/driver-native.cc                   |   189 +
 gcc/config/s390/htmxlintrin.h                      |     2 +-
 gcc/config/s390/s390-builtins.def                  |     8 +-
 gcc/config/s390/s390-builtins.h                    |     2 +-
 gcc/config/s390/s390-c.c                           |  1100 -
 gcc/config/s390/s390-c.cc                          |  1100 +
 gcc/config/s390/{s390-d.c => s390-d.cc}            |     0
 gcc/config/s390/s390-opts.h                        |     2 +-
 gcc/config/s390/s390-protos.h                      |    10 +-
 gcc/config/s390/s390.c                             | 17478 -------
 gcc/config/s390/s390.cc                            | 17478 +++++++
 gcc/config/s390/s390.h                             |     8 +-
 gcc/config/s390/s390.md                            |     8 +-
 gcc/config/s390/t-s390                             |     6 +-
 gcc/config/s390/vx-builtins.md                     |     2 +-
 gcc/config/s390/x-native                           |     2 +-
 gcc/config/sh/divtab-sh4-300.c                     |    77 -
 gcc/config/sh/divtab-sh4-300.cc                    |    77 +
 gcc/config/sh/divtab-sh4.c                         |    85 -
 gcc/config/sh/divtab-sh4.cc                        |    85 +
 gcc/config/sh/divtab.c                             |   203 -
 gcc/config/sh/divtab.cc                            |   203 +
 gcc/config/sh/elf.h                                |     2 +-
 gcc/config/sh/{sh-c.c => sh-c.cc}                  |     0
 gcc/config/sh/sh-protos.h                          |     2 +-
 gcc/config/sh/sh.c                                 | 12630 -----
 gcc/config/sh/sh.cc                                | 12630 +++++
 gcc/config/sh/sh.h                                 |     8 +-
 gcc/config/sh/t-sh                                 |     4 +-
 gcc/config/{sol2-c.c => sol2-c.cc}                 |     0
 gcc/config/{sol2-cxx.c => sol2-cxx.cc}             |     0
 gcc/config/{sol2-d.c => sol2-d.cc}                 |     0
 gcc/config/sol2-protos.h                           |     6 +-
 gcc/config/{sol2-stubs.c => sol2-stubs.cc}         |     0
 gcc/config/{sol2.c => sol2.cc}                     |     0
 gcc/config/sol2.h                                  |     4 +-
 gcc/config/sparc/driver-sparc.c                    |   169 -
 gcc/config/sparc/driver-sparc.cc                   |   169 +
 gcc/config/sparc/freebsd.h                         |     4 +-
 gcc/config/sparc/{sparc-c.c => sparc-c.cc}         |     0
 gcc/config/sparc/{sparc-d.c => sparc-d.cc}         |     0
 gcc/config/sparc/sparc-protos.h                    |     2 +-
 gcc/config/sparc/sparc.c                           | 13958 ------
 gcc/config/sparc/sparc.cc                          | 13958 ++++++
 gcc/config/sparc/sparc.h                           |     6 +-
 gcc/config/sparc/sparc.md                          |     4 +-
 gcc/config/sparc/t-sparc                           |     4 +-
 gcc/config/sparc/x-sparc                           |     2 +-
 gcc/config/stormy16/stormy16.c                     |  2749 --
 gcc/config/stormy16/stormy16.cc                    |  2749 ++
 gcc/config/t-darwin                                |    10 +-
 gcc/config/t-dragonfly                             |     2 +-
 gcc/config/t-freebsd                               |     2 +-
 gcc/config/t-glibc                                 |     4 +-
 gcc/config/t-linux                                 |     2 +-
 gcc/config/t-netbsd                                |     4 +-
 gcc/config/t-openbsd                               |     2 +-
 gcc/config/t-pnt16-warn                            |     2 +-
 gcc/config/t-sol2                                  |    10 +-
 gcc/config/t-vxworks                               |     4 +-
 gcc/config/t-winnt                                 |     2 +-
 gcc/config/tilegx/{mul-tables.c => mul-tables.cc}  |     0
 gcc/config/tilegx/t-tilegx                         |     6 +-
 gcc/config/tilegx/tilegx-c.c                       |    55 -
 gcc/config/tilegx/tilegx-c.cc                      |    55 +
 gcc/config/tilegx/tilegx-protos.h                  |     2 +-
 gcc/config/tilegx/{tilegx.c => tilegx.cc}          |     0
 gcc/config/tilegx/tilegx.md                        |     2 +-
 gcc/config/tilepro/{mul-tables.c => mul-tables.cc} |     0
 gcc/config/tilepro/t-tilepro                       |     6 +-
 gcc/config/tilepro/tilepro-c.c                     |    54 -
 gcc/config/tilepro/tilepro-c.cc                    |    54 +
 gcc/config/tilepro/{tilepro.c => tilepro.cc}       |     0
 gcc/config/v850/t-v850                             |     4 +-
 gcc/config/v850/{v850-c.c => v850-c.cc}            |     0
 gcc/config/v850/v850-protos.h                      |     2 +-
 gcc/config/v850/v850.c                             |  3343 --
 gcc/config/v850/v850.cc                            |  3343 ++
 gcc/config/v850/v850.h                             |     6 +-
 gcc/config/vax/vax.c                               |  2212 -
 gcc/config/vax/vax.cc                              |  2212 +
 gcc/config/vax/vax.h                               |     2 +-
 gcc/config/vax/vax.md                              |     2 +-
 gcc/config/visium/visium.c                         |  4332 --
 gcc/config/visium/visium.cc                        |  4332 ++
 gcc/config/visium/visium.h                         |     6 +-
 gcc/config/vms/t-vms                               |     6 +-
 gcc/config/vms/{vms-c.c => vms-c.cc}               |     0
 gcc/config/vms/vms-crtlmap.map                     |     2 +-
 gcc/config/vms/{vms-f.c => vms-f.cc}               |     0
 gcc/config/vms/vms-protos.h                        |     4 +-
 gcc/config/vms/{vms.c => vms.cc}                   |     0
 gcc/config/vx-common.h                             |     2 +-
 gcc/config/{vxworks-c.c => vxworks-c.cc}           |     0
 gcc/config/{vxworks.c => vxworks.cc}               |     0
 gcc/config/{winnt-c.c => winnt-c.cc}               |     0
 gcc/config/x-darwin                                |     2 +-
 gcc/config/x-hpux                                  |     4 +-
 gcc/config/x-linux                                 |     2 +-
 gcc/config/x-netbsd                                |     4 +-
 gcc/config/x-openbsd                               |     4 +-
 gcc/config/x-solaris                               |     2 +-
 gcc/config/xtensa/xtensa-protos.h                  |     2 +-
 gcc/config/xtensa/xtensa.c                         |  4489 --
 gcc/config/xtensa/xtensa.cc                        |  4489 ++
 gcc/config/xtensa/xtensa.h                         |     2 +-
 gcc/configure                                      |    14 +-
 gcc/configure.ac                                   |    14 +-
 gcc/context.c                                      |    44 -
 gcc/context.cc                                     |    44 +
 gcc/{convert.c => convert.cc}                      |     0
 gcc/convert.h                                      |     2 +-
 gcc/coretypes.h                                    |     2 +-
 gcc/coverage.c                                     |  1392 -
 gcc/coverage.cc                                    |  1392 +
 gcc/coverage.h                                     |     2 +-
 gcc/cp/ChangeLog                                   |   177 +
 gcc/cp/Make-lang.in                                |     8 +-
 gcc/cp/{call.c => call.cc}                         |     0
 gcc/cp/{class.c => class.cc}                       |     0
 gcc/cp/config-lang.in                              |    34 +-
 gcc/cp/constexpr.c                                 |  9561 ----
 gcc/cp/constexpr.cc                                |  9561 ++++
 gcc/cp/coroutines.cc                               |     2 +-
 gcc/cp/cp-gimplify.c                               |  3280 --
 gcc/cp/cp-gimplify.cc                              |  3285 ++
 gcc/cp/cp-lang.c                                   |   291 -
 gcc/cp/cp-lang.cc                                  |   291 +
 gcc/cp/cp-objcp-common.c                           |   594 -
 gcc/cp/cp-objcp-common.cc                          |   594 +
 gcc/cp/cp-objcp-common.h                           |     4 +-
 gcc/cp/cp-tree.h                                   |    84 +-
 gcc/cp/{cp-ubsan.c => cp-ubsan.cc}                 |     0
 gcc/cp/cvt.c                                       |  2153 -
 gcc/cp/cvt.cc                                      |  2153 +
 gcc/cp/{cxx-pretty-print.c => cxx-pretty-print.cc} |     0
 gcc/cp/decl.c                                      | 18355 -------
 gcc/cp/decl.cc                                     | 18355 +++++++
 gcc/cp/decl2.c                                     |  5992 ---
 gcc/cp/decl2.cc                                    |  5992 +++
 gcc/cp/{dump.c => dump.cc}                         |     0
 gcc/cp/error.c                                     |  4659 --
 gcc/cp/error.cc                                    |  4659 ++
 gcc/cp/{except.c => except.cc}                     |     0
 gcc/cp/{expr.c => expr.cc}                         |     0
 gcc/cp/{friend.c => friend.cc}                     |     0
 gcc/cp/{g++spec.c => g++spec.cc}                   |     0
 gcc/cp/init.c                                      |  5354 --
 gcc/cp/init.cc                                     |  5354 ++
 gcc/cp/{lambda.c => lambda.cc}                     |     0
 gcc/cp/lang-specs.h                                |     2 +-
 gcc/cp/{lex.c => lex.cc}                           |     0
 gcc/cp/{mangle.c => mangle.cc}                     |     0
 gcc/cp/method.c                                    |  3506 --
 gcc/cp/method.cc                                   |  3506 ++
 gcc/cp/module.cc                                   |     4 +-
 gcc/cp/name-lookup.c                               |  8944 ----
 gcc/cp/name-lookup.cc                              |  8944 ++++
 gcc/cp/name-lookup.h                               |     2 +-
 gcc/cp/{optimize.c => optimize.cc}                 |     0
 gcc/cp/parser.c                                    | 48319 -------------------
 gcc/cp/parser.cc                                   | 48319 +++++++++++++++++++
 gcc/cp/parser.h                                    |     2 +-
 gcc/cp/pt.c                                        | 30596 ------------
 gcc/cp/pt.cc                                       | 30596 ++++++++++++
 gcc/cp/{ptree.c => ptree.cc}                       |     0
 gcc/cp/{rtti.c => rtti.cc}                         |     0
 gcc/cp/{search.c => search.cc}                     |     0
 gcc/cp/semantics.c                                 | 12340 -----
 gcc/cp/semantics.cc                                | 12340 +++++
 gcc/cp/tree.c                                      |  6180 ---
 gcc/cp/tree.cc                                     |  6180 +++
 gcc/cp/typeck.c                                    | 11350 -----
 gcc/cp/typeck.cc                                   | 11350 +++++
 gcc/cp/{typeck2.c => typeck2.cc}                   |     0
 gcc/cp/vtable-class-hierarchy.c                    |  1346 -
 gcc/cp/vtable-class-hierarchy.cc                   |  1346 +
 gcc/{cppbuiltin.c => cppbuiltin.cc}                |     0
 gcc/{cppdefault.c => cppdefault.cc}                |     0
 gcc/cppdefault.h                                   |     2 +-
 gcc/cprop.c                                        |  1974 -
 gcc/cprop.cc                                       |  1974 +
 gcc/cse.c                                          |  7736 ---
 gcc/cse.cc                                         |  7736 +++
 gcc/{cselib.c => cselib.cc}                        |     0
 gcc/{ctfc.c => ctfc.cc}                            |     0
 gcc/ctfc.h                                         |     4 +-
 gcc/{ctfout.c => ctfout.cc}                        |     0
 gcc/d/ChangeLog                                    |     6 +
 gcc/d/d-gimplify.cc                                |     2 +-
 gcc/d/d-incpath.cc                                 |     2 +-
 gcc/d/lang-specs.h                                 |     2 +-
 gcc/{data-streamer-in.c => data-streamer-in.cc}    |     0
 gcc/{data-streamer-out.c => data-streamer-out.cc}  |     0
 gcc/{data-streamer.c => data-streamer.cc}          |     0
 gcc/data-streamer.h                                |     6 +-
 gcc/{dbgcnt.c => dbgcnt.cc}                        |     0
 gcc/dbgcnt.def                                     |     2 +-
 gcc/dbxout.c                                       |  3936 --
 gcc/dbxout.cc                                      |  3936 ++
 gcc/dbxout.h                                       |     2 +-
 gcc/{dce.c => dce.cc}                              |     0
 gcc/{ddg.c => ddg.cc}                              |     0
 gcc/{debug.c => debug.cc}                          |     0
 gcc/debug.h                                        |     8 +-
 gcc/df-core.c                                      |  2472 -
 gcc/df-core.cc                                     |  2472 +
 gcc/{df-problems.c => df-problems.cc}              |     0
 gcc/df-scan.c                                      |  4252 --
 gcc/df-scan.cc                                     |  4252 ++
 gcc/df.h                                           |     8 +-
 gcc/dfp.c                                          |   745 -
 gcc/dfp.cc                                         |   745 +
 gcc/diagnostic-color.c                             |   348 -
 gcc/diagnostic-color.cc                            |   348 +
 gcc/diagnostic-event-id.h                          |     2 +-
 gcc/diagnostic-show-locus.c                        |  5703 ---
 gcc/diagnostic-show-locus.cc                       |  5703 +++
 gcc/diagnostic-spec.c                              |   196 -
 gcc/diagnostic-spec.cc                             |   205 +
 gcc/diagnostic-spec.h                              |     6 +-
 gcc/diagnostic.c                                   |  2456 -
 gcc/diagnostic.cc                                  |  2488 +
 gcc/diagnostic.h                                   |     4 +
 gcc/doc/avr-mmcu.texi                              |     4 +-
 gcc/doc/cfg.texi                                   |     2 +-
 gcc/doc/contrib.texi                               |     2 +-
 gcc/doc/cppinternals.texi                          |     6 +-
 gcc/doc/extend.texi                                |     2 +-
 gcc/doc/generic.texi                               |     8 +-
 gcc/doc/gimple.texi                                |     8 +-
 gcc/doc/gty.texi                                   |     8 +-
 gcc/doc/install.texi                               |     2 +-
 gcc/doc/invoke.texi                                |   128 +-
 gcc/doc/loop.texi                                  |     2 +-
 gcc/doc/lto.texi                                   |    40 +-
 gcc/doc/match-and-simplify.texi                    |     2 +-
 gcc/doc/md.texi                                    |    15 +-
 gcc/doc/optinfo.texi                               |     4 +-
 gcc/doc/options.texi                               |     2 +-
 gcc/doc/passes.texi                                |   288 +-
 gcc/doc/plugins.texi                               |     4 +-
 gcc/doc/rtl.texi                                   |    10 +-
 gcc/doc/sourcebuild.texi                           |     6 +-
 gcc/doc/tm.texi                                    |    46 +-
 gcc/doc/tm.texi.in                                 |    28 +-
 gcc/doc/tree-ssa.texi                              |     6 +-
 gcc/dojump.c                                       |  1300 -
 gcc/dojump.cc                                      |  1300 +
 gcc/dojump.h                                       |     2 +-
 gcc/{dominance.c => dominance.cc}                  |     0
 gcc/{domwalk.c => domwalk.cc}                      |     0
 gcc/{double-int.c => double-int.cc}                |     0
 gcc/{dse.c => dse.cc}                              |     0
 gcc/dumpfile.c                                     |  2778 --
 gcc/dumpfile.cc                                    |  2778 ++
 gcc/dumpfile.h                                     |    18 +-
 gcc/dwarf2asm.c                                    |  1162 -
 gcc/dwarf2asm.cc                                   |  1162 +
 gcc/{dwarf2cfi.c => dwarf2cfi.cc}                  |     0
 gcc/{dwarf2ctf.c => dwarf2ctf.cc}                  |     0
 gcc/dwarf2ctf.h                                    |     4 +-
 gcc/dwarf2out.c                                    | 33147 -------------
 gcc/dwarf2out.cc                                   | 33147 +++++++++++++
 gcc/dwarf2out.h                                    |     8 +-
 gcc/{early-remat.c => early-remat.cc}              |     0
 gcc/{edit-context.c => edit-context.cc}            |     0
 gcc/emit-rtl.c                                     |  6596 ---
 gcc/emit-rtl.cc                                    |  6596 +++
 gcc/emit-rtl.h                                     |     6 +-
 gcc/errors.c                                       |   134 -
 gcc/errors.cc                                      |   134 +
 gcc/et-forest.c                                    |   884 -
 gcc/et-forest.cc                                   |   884 +
 gcc/except.c                                       |  3522 --
 gcc/except.cc                                      |  3522 ++
 gcc/explow.c                                       |  2321 -
 gcc/explow.cc                                      |  2321 +
 gcc/explow.h                                       |     2 +-
 gcc/{expmed.c => expmed.cc}                        |     0
 gcc/expmed.h                                       |     2 +-
 gcc/expr.c                                         | 13145 -----
 gcc/expr.cc                                        | 13145 +++++
 gcc/{fibonacci_heap.c => fibonacci_heap.cc}        |     0
 gcc/{file-find.c => file-find.cc}                  |     0
 gcc/file-prefix-map.c                              |   149 -
 gcc/file-prefix-map.cc                             |   149 +
 gcc/final.c                                        |  4662 --
 gcc/final.cc                                       |  4662 ++
 gcc/{fixed-value.c => fixed-value.cc}              |     0
 gcc/fixed-value.h                                  |     2 +-
 gcc/flag-types.h                                   |    20 +-
 gcc/{fold-const-call.c => fold-const-call.cc}      |     0
 gcc/fold-const.c                                   | 16787 -------
 gcc/fold-const.cc                                  | 16787 +++++++
 gcc/fortran/ChangeLog                              |   254 +
 gcc/fortran/{arith.c => arith.cc}                  |     0
 gcc/fortran/{array.c => array.cc}                  |     0
 gcc/fortran/{bbt.c => bbt.cc}                      |     0
 gcc/fortran/check.c                                |  7523 ---
 gcc/fortran/check.cc                               |  7523 +++
 gcc/fortran/class.c                                |  3073 --
 gcc/fortran/class.cc                               |  3073 ++
 gcc/fortran/config-lang.in                         |     2 +-
 gcc/fortran/{constructor.c => constructor.cc}      |     0
 gcc/fortran/{convert.c => convert.cc}              |     0
 gcc/fortran/cpp.c                                  |  1203 -
 gcc/fortran/cpp.cc                                 |  1203 +
 gcc/fortran/data.c                                 |   848 -
 gcc/fortran/data.cc                                |   848 +
 gcc/fortran/decl.c                                 | 11910 -----
 gcc/fortran/decl.cc                                | 11910 +++++
 gcc/fortran/dependency.c                           |  2336 -
 gcc/fortran/dependency.cc                          |  2336 +
 .../{dump-parse-tree.c => dump-parse-tree.cc}      |     0
 gcc/fortran/error.c                                |  1656 -
 gcc/fortran/error.cc                               |  1656 +
 gcc/fortran/expr.c                                 |  6507 ---
 gcc/fortran/expr.cc                                |  6507 +++
 gcc/fortran/f95-lang.c                             |  1306 -
 gcc/fortran/f95-lang.cc                            |  1306 +
 .../{frontend-passes.c => frontend-passes.cc}      |     0
 gcc/fortran/gfc-internals.texi                     |    12 +-
 gcc/fortran/gfortran.h                             |    86 +-
 gcc/fortran/gfortranspec.c                         |   450 -
 gcc/fortran/gfortranspec.cc                        |   450 +
 gcc/fortran/interface.c                            |  5589 ---
 gcc/fortran/interface.cc                           |  5589 +++
 gcc/fortran/intrinsic.c                            |  5503 ---
 gcc/fortran/intrinsic.cc                           |  5503 +++
 gcc/fortran/{io.c => io.cc}                        |     0
 gcc/fortran/iresolve.c                             |  4050 --
 gcc/fortran/iresolve.cc                            |  4050 ++
 gcc/fortran/iso-c-binding.def                      |     2 +-
 gcc/fortran/lang-specs.h                           |     4 +-
 gcc/fortran/libgfortran.h                          |     2 +-
 gcc/fortran/match.c                                |  7264 ---
 gcc/fortran/match.cc                               |  7264 +++
 gcc/fortran/match.h                                |    28 +-
 gcc/fortran/matchexp.c                             |   903 -
 gcc/fortran/matchexp.cc                            |   903 +
 gcc/fortran/mathbuiltins.def                       |     2 +-
 gcc/fortran/{misc.c => misc.cc}                    |     0
 gcc/fortran/module.c                               |  7581 ---
 gcc/fortran/module.cc                              |  7581 +++
 gcc/fortran/openmp.c                               |  9410 ----
 gcc/fortran/openmp.cc                              |  9411 ++++
 gcc/fortran/{options.c => options.cc}              |     0
 gcc/fortran/parse.c                                |  6987 ---
 gcc/fortran/parse.cc                               |  6987 +++
 gcc/fortran/{primary.c => primary.cc}              |     0
 gcc/fortran/resolve.c                              | 17582 -------
 gcc/fortran/resolve.cc                             | 17582 +++++++
 gcc/fortran/{scanner.c => scanner.cc}              |     0
 gcc/fortran/simplify.c                             |  8966 ----
 gcc/fortran/simplify.cc                            |  8966 ++++
 gcc/fortran/{st.c => st.cc}                        |     0
 gcc/fortran/symbol.c                               |  5251 --
 gcc/fortran/symbol.cc                              |  5251 ++
 gcc/fortran/target-memory.c                        |   806 -
 gcc/fortran/target-memory.cc                       |   806 +
 gcc/fortran/target-memory.h                        |     2 +-
 gcc/fortran/trans-array.c                          | 11714 -----
 gcc/fortran/trans-array.cc                         | 11714 +++++
 gcc/fortran/{trans-common.c => trans-common.cc}    |     0
 gcc/fortran/trans-const.c                          |   430 -
 gcc/fortran/trans-const.cc                         |   430 +
 gcc/fortran/trans-decl.c                           |  7956 ---
 gcc/fortran/trans-decl.cc                          |  7956 +++
 gcc/fortran/trans-expr.c                           | 12125 -----
 gcc/fortran/trans-expr.cc                          | 12125 +++++
 gcc/fortran/trans-intrinsic.c                      | 12446 -----
 gcc/fortran/trans-intrinsic.cc                     | 12457 +++++
 gcc/fortran/trans-io.c                             |  2686 --
 gcc/fortran/trans-io.cc                            |  2686 ++
 gcc/fortran/trans-openmp.c                         |  7701 ---
 gcc/fortran/trans-openmp.cc                        |  7701 +++
 gcc/fortran/trans-stmt.c                           |  7468 ---
 gcc/fortran/trans-stmt.cc                          |  7468 +++
 gcc/fortran/trans-stmt.h                           |     8 +-
 gcc/fortran/trans-types.c                          |  3838 --
 gcc/fortran/trans-types.cc                         |  3838 ++
 gcc/fortran/trans-types.h                          |     2 +-
 gcc/fortran/trans.c                                |  2452 -
 gcc/fortran/trans.cc                               |  2452 +
 gcc/fortran/trans.h                                |    14 +-
 gcc/fp-test.c                                      |   251 -
 gcc/fp-test.cc                                     |   251 +
 gcc/{function-tests.c => function-tests.cc}        |     0
 gcc/function.c                                     |  6964 ---
 gcc/function.cc                                    |  6964 +++
 gcc/function.h                                     |    16 +-
 gcc/fwprop.c                                       |  1079 -
 gcc/fwprop.cc                                      |  1079 +
 gcc/{gcc-ar.c => gcc-ar.cc}                        |     0
 gcc/gcc-main.c                                     |    48 -
 gcc/gcc-main.cc                                    |    48 +
 gcc/{gcc-rich-location.c => gcc-rich-location.cc}  |     0
 gcc/gcc-rich-location.h                            |     2 +-
 gcc/gcc-symtab.h                                   |     2 +-
 gcc/gcc.c                                          | 11276 -----
 gcc/gcc.cc                                         | 11276 +++++
 gcc/gcc.h                                          |     6 +-
 gcc/gcov-dump.c                                    |   479 -
 gcc/gcov-dump.cc                                   |   479 +
 gcc/{gcov-io.c => gcov-io.cc}                      |     0
 gcc/{gcov-tool.c => gcov-tool.cc}                  |     0
 gcc/gcov.c                                         |  3262 --
 gcc/gcov.cc                                        |  3262 ++
 gcc/gcse-common.c                                  |   222 -
 gcc/gcse-common.cc                                 |   222 +
 gcc/gcse.c                                         |  4136 --
 gcc/gcse.cc                                        |  4136 ++
 gcc/genattr-common.c                               |   112 -
 gcc/genattr-common.cc                              |   112 +
 gcc/{genattr.c => genattr.cc}                      |     0
 gcc/genattrtab.c                                   |  5417 ---
 gcc/genattrtab.cc                                  |  5417 +++
 gcc/genautomata.c                                  |  9685 ----
 gcc/genautomata.cc                                 |  9685 ++++
 gcc/{gencfn-macros.c => gencfn-macros.cc}          |     0
 gcc/{gencheck.c => gencheck.cc}                    |     0
 gcc/{genchecksum.c => genchecksum.cc}              |     0
 gcc/{gencodes.c => gencodes.cc}                    |     0
 gcc/genconditions.c                                |   252 -
 gcc/genconditions.cc                               |   252 +
 gcc/{genconfig.c => genconfig.cc}                  |     0
 gcc/genconstants.c                                 |   105 -
 gcc/genconstants.cc                                |   105 +
 gcc/genemit.c                                      |   952 -
 gcc/genemit.cc                                     |   952 +
 gcc/{genenums.c => genenums.cc}                    |     0
 gcc/generic-match-head.c                           |   101 -
 gcc/generic-match-head.cc                          |   101 +
 gcc/genextract.c                                   |   507 -
 gcc/genextract.cc                                  |   507 +
 gcc/{genflags.c => genflags.cc}                    |     0
 gcc/gengenrtl.c                                    |   358 -
 gcc/gengenrtl.cc                                   |   358 +
 gcc/gengtype-parse.c                               |  1179 -
 gcc/gengtype-parse.cc                              |  1179 +
 gcc/gengtype-state.c                               |  2661 -
 gcc/gengtype-state.cc                              |  2661 +
 gcc/gengtype.c                                     |  5405 ---
 gcc/gengtype.cc                                    |  5399 +++
 gcc/gengtype.h                                     |     8 +-
 gcc/{genhooks.c => genhooks.cc}                    |     0
 gcc/genmatch.c                                     |  5257 --
 gcc/genmatch.cc                                    |  5257 ++
 gcc/genmddeps.c                                    |    71 -
 gcc/genmddeps.cc                                   |    71 +
 gcc/{genmddump.c => genmddump.cc}                  |     0
 gcc/genmodes.c                                     |  2068 -
 gcc/genmodes.cc                                    |  2068 +
 gcc/{genopinit.c => genopinit.cc}                  |     0
 gcc/{genoutput.c => genoutput.cc}                  |     0
 gcc/genpeep.c                                      |   418 -
 gcc/genpeep.cc                                     |   418 +
 gcc/genpreds.c                                     |  1682 -
 gcc/genpreds.cc                                    |  1682 +
 gcc/genrecog.c                                     |  5447 ---
 gcc/genrecog.cc                                    |  5447 +++
 gcc/gensupport.c                                   |  3316 --
 gcc/gensupport.cc                                  |  3316 ++
 gcc/gensupport.h                                   |    12 +-
 gcc/{gentarget-def.c => gentarget-def.cc}          |     0
 gcc/{genversion.c => genversion.cc}                |     0
 gcc/{ggc-common.c => ggc-common.cc}                |     0
 gcc/ggc-internal.h                                 |     2 +-
 gcc/{ggc-none.c => ggc-none.cc}                    |     0
 gcc/{ggc-page.c => ggc-page.cc}                    |     0
 gcc/{ggc-tests.c => ggc-tests.cc}                  |     0
 gcc/{gimple-builder.c => gimple-builder.cc}        |     0
 gcc/{gimple-expr.c => gimple-expr.cc}              |     0
 gcc/gimple-fold.c                                  |  9123 ----
 gcc/gimple-fold.cc                                 |  9123 ++++
 gcc/gimple-fold.h                                  |     2 +-
 gcc/{gimple-iterator.c => gimple-iterator.cc}      |     0
 gcc/{gimple-laddress.c => gimple-laddress.cc}      |     0
 gcc/{gimple-loop-jam.c => gimple-loop-jam.cc}      |     0
 gcc/gimple-low.c                                   |   947 -
 gcc/gimple-low.cc                                  |   947 +
 gcc/gimple-match-head.c                            |  1394 -
 gcc/gimple-match-head.cc                           |  1394 +
 ...imple-pretty-print.c => gimple-pretty-print.cc} |     0
 gcc/gimple-pretty-print.h                          |     2 +-
 ...imple-ssa-backprop.c => gimple-ssa-backprop.cc} |     0
 ...a-evrp-analyze.c => gimple-ssa-evrp-analyze.cc} |     0
 gcc/{gimple-ssa-evrp.c => gimple-ssa-evrp.cc}      |     0
 ...isolate-paths.c => gimple-ssa-isolate-paths.cc} |     0
 ...ull-compare.c => gimple-ssa-nonnull-compare.cc} |     0
 ...ssa-split-paths.c => gimple-ssa-split-paths.cc} |     0
 gcc/gimple-ssa-sprintf.c                           |  4728 --
 gcc/gimple-ssa-sprintf.cc                          |  4728 ++
 ...store-merging.c => gimple-ssa-store-merging.cc} |     0
 ...eduction.c => gimple-ssa-strength-reduction.cc} |     0
 gcc/gimple-ssa-warn-access.cc                      |   992 +-
 ...ssa-warn-alloca.c => gimple-ssa-warn-alloca.cc} |     0
 ...warn-restrict.c => gimple-ssa-warn-restrict.cc} |     0
 ...{gimple-streamer-in.c => gimple-streamer-in.cc} |     0
 ...imple-streamer-out.c => gimple-streamer-out.cc} |     0
 gcc/gimple-streamer.h                              |     4 +-
 gcc/{gimple-walk.c => gimple-walk.cc}              |     0
 ...e-warn-recursion.c => gimple-warn-recursion.cc} |     0
 gcc/{gimple.c => gimple.cc}                        |     0
 gcc/gimple.h                                       |     4 +-
 gcc/{gimplify-me.c => gimplify-me.cc}              |     0
 gcc/gimplify.c                                     | 16582 -------
 gcc/gimplify.cc                                    | 16582 +++++++
 gcc/go/ChangeLog                                   |    17 +
 gcc/go/config-lang.in                              |     2 +-
 gcc/go/go-backend.c                                |   194 -
 gcc/go/go-backend.cc                               |   194 +
 gcc/go/go-lang.c                                   |   638 -
 gcc/go/go-lang.cc                                  |   638 +
 gcc/go/gospec.c                                    |   466 -
 gcc/go/gospec.cc                                   |   466 +
 gcc/go/lang-specs.h                                |     2 +-
 gcc/{godump.c => godump.cc}                        |     0
 gcc/{graph.c => graph.cc}                          |     0
 gcc/{graphds.c => graphds.cc}                      |     0
 ...phite-dependences.c => graphite-dependences.cc} |     0
 gcc/graphite-isl-ast-to-gimple.c                   |  1556 -
 gcc/graphite-isl-ast-to-gimple.cc                  |  1556 +
 ...ite-optimize-isl.c => graphite-optimize-isl.cc} |     0
 gcc/{graphite-poly.c => graphite-poly.cc}          |     0
 ...scop-detection.c => graphite-scop-detection.cc} |     0
 ...ite-sese-to-poly.c => graphite-sese-to-poly.cc} |     0
 gcc/{graphite.c => graphite.cc}                    |     0
 gcc/haifa-sched.c                                  |  9263 ----
 gcc/haifa-sched.cc                                 |  9263 ++++
 gcc/{hash-map-tests.c => hash-map-tests.cc}        |     0
 gcc/{hash-set-tests.c => hash-set-tests.cc}        |     0
 gcc/{hash-table.c => hash-table.cc}                |     0
 gcc/{hooks.c => hooks.cc}                          |     0
 gcc/{host-default.c => host-default.cc}            |     0
 gcc/{hw-doloop.c => hw-doloop.cc}                  |     0
 gcc/{hwint.c => hwint.cc}                          |     0
 gcc/ifcvt.c                                        |  5694 ---
 gcc/ifcvt.cc                                       |  5694 +++
 gcc/{inchash.c => inchash.cc}                      |     0
 gcc/inchash.h                                      |     2 +-
 gcc/incpath.c                                      |   536 -
 gcc/incpath.cc                                     |   536 +
 gcc/{init-regs.c => init-regs.cc}                  |     0
 gcc/input.c                                        |  3932 --
 gcc/input.cc                                       |  3932 ++
 gcc/input.h                                        |     4 +-
 gcc/internal-fn.c                                  |  4429 --
 gcc/internal-fn.cc                                 |  4451 ++
 gcc/internal-fn.def                                |     3 +
 gcc/internal-fn.h                                  |     1 +
 gcc/{intl.c => intl.cc}                            |     0
 gcc/{ipa-comdats.c => ipa-comdats.cc}              |     0
 gcc/ipa-cp.c                                       |  6639 ---
 gcc/ipa-cp.cc                                      |  6639 +++
 gcc/{ipa-devirt.c => ipa-devirt.cc}                |     0
 gcc/ipa-fnsummary.c                                |  4972 --
 gcc/ipa-fnsummary.cc                               |  4972 ++
 gcc/ipa-fnsummary.h                                |     6 +-
 gcc/ipa-free-lang-data.cc                          |     4 +-
 gcc/{ipa-icf-gimple.c => ipa-icf-gimple.cc}        |     0
 gcc/{ipa-icf.c => ipa-icf.cc}                      |     0
 ...pa-inline-analysis.c => ipa-inline-analysis.cc} |     0
 ...-inline-transform.c => ipa-inline-transform.cc} |     0
 gcc/ipa-inline.c                                   |  3158 --
 gcc/ipa-inline.cc                                  |  3158 ++
 gcc/ipa-inline.h                                   |     6 +-
 gcc/{ipa-modref-tree.c => ipa-modref-tree.cc}      |     0
 gcc/ipa-modref.c                                   |  5509 ---
 gcc/ipa-modref.cc                                  |  5509 +++
 gcc/ipa-param-manipulation.c                       |  2401 -
 gcc/ipa-param-manipulation.cc                      |  2401 +
 ...-polymorphic-call.c => ipa-polymorphic-call.cc} |     0
 gcc/{ipa-predicate.c => ipa-predicate.cc}          |     0
 gcc/{ipa-profile.c => ipa-profile.cc}              |     0
 gcc/ipa-prop.c                                     |  6088 ---
 gcc/ipa-prop.cc                                    |  6088 +++
 gcc/ipa-prop.h                                     |     8 +-
 gcc/{ipa-pure-const.c => ipa-pure-const.cc}        |     0
 gcc/{ipa-ref.c => ipa-ref.cc}                      |     0
 gcc/ipa-reference.c                                |  1341 -
 gcc/ipa-reference.cc                               |  1341 +
 gcc/ipa-reference.h                                |     2 +-
 gcc/ipa-split.c                                    |  1982 -
 gcc/ipa-split.cc                                   |  2000 +
 gcc/ipa-sra.c                                      |  4148 --
 gcc/ipa-sra.cc                                     |  4148 ++
 gcc/{ipa-utils.c => ipa-utils.cc}                  |     0
 gcc/ipa-utils.h                                    |     8 +-
 gcc/{ipa-visibility.c => ipa-visibility.cc}        |     0
 gcc/{ipa.c => ipa.cc}                              |     0
 gcc/ira-build.c                                    |  3568 --
 gcc/ira-build.cc                                   |  3568 ++
 gcc/ira-color.c                                    |  5274 --
 gcc/ira-color.cc                                   |  5274 ++
 gcc/ira-conflicts.c                                |   895 -
 gcc/ira-conflicts.cc                               |   895 +
 gcc/{ira-costs.c => ira-costs.cc}                  |     0
 gcc/{ira-emit.c => ira-emit.cc}                    |     0
 gcc/ira-int.h                                      |    20 +-
 gcc/ira-lives.c                                    |  1765 -
 gcc/ira-lives.cc                                   |  1765 +
 gcc/ira.c                                          |  6132 ---
 gcc/ira.cc                                         |  6132 +++
 gcc/ira.h                                          |     4 +-
 gcc/jit/ChangeLog                                  |   156 +
 gcc/jit/config-lang.in                             |     2 +-
 gcc/jit/docs/_build/texinfo/libgccjit.texi         |    18 +-
 gcc/jit/docs/internals/index.rst                   |    12 +-
 gcc/jit/{dummy-frontend.c => dummy-frontend.cc}    |     0
 gcc/jit/jit-builtins.c                             |   707 -
 gcc/jit/jit-builtins.cc                            |   707 +
 gcc/jit/{jit-logging.c => jit-logging.cc}          |     0
 gcc/jit/jit-playback.c                             |  3618 --
 gcc/jit/jit-playback.cc                            |  3618 ++
 gcc/jit/jit-recording.c                            |  7537 ---
 gcc/jit/jit-recording.cc                           |  7537 +++
 gcc/jit/jit-recording.h                            |     2 +-
 gcc/jit/{jit-result.c => jit-result.cc}            |     0
 gcc/jit/{jit-spec.c => jit-spec.cc}                |     0
 gcc/jit/{jit-tempdir.c => jit-tempdir.cc}          |     0
 gcc/jit/{jit-w32.c => jit-w32.cc}                  |     0
 gcc/jit/libgccjit.c                                |  4172 --
 gcc/jit/libgccjit.cc                               |  4172 ++
 gcc/jit/notes.txt                                  |     6 +-
 gcc/{jump.c => jump.cc}                            |     0
 gcc/langhooks.c                                    |   952 -
 gcc/langhooks.cc                                   |   952 +
 gcc/langhooks.h                                    |    12 +-
 gcc/{lcm.c => lcm.cc}                              |     0
 gcc/libfuncs.h                                     |     2 +-
 gcc/{lists.c => lists.cc}                          |     0
 gcc/loop-doloop.c                                  |   807 -
 gcc/loop-doloop.cc                                 |   807 +
 gcc/loop-init.c                                    |   653 -
 gcc/loop-init.cc                                   |   653 +
 gcc/loop-invariant.c                               |  2322 -
 gcc/loop-invariant.cc                              |  2322 +
 gcc/{loop-iv.c => loop-iv.cc}                      |     0
 gcc/{loop-unroll.c => loop-unroll.cc}              |     0
 gcc/{lower-subreg.c => lower-subreg.cc}            |     0
 gcc/lower-subreg.h                                 |     2 +-
 gcc/{lra-assigns.c => lra-assigns.cc}              |     0
 gcc/{lra-coalesce.c => lra-coalesce.cc}            |     0
 gcc/lra-constraints.c                              |  7380 ---
 gcc/lra-constraints.cc                             |  7380 +++
 gcc/{lra-eliminations.c => lra-eliminations.cc}    |     0
 gcc/lra-int.h                                      |    24 +-
 gcc/{lra-lives.c => lra-lives.cc}                  |     0
 gcc/{lra-remat.c => lra-remat.cc}                  |     0
 gcc/lra-spills.c                                   |   880 -
 gcc/lra-spills.cc                                  |   880 +
 gcc/lra.c                                          |  2523 -
 gcc/lra.cc                                         |  2523 +
 gcc/{lto-cgraph.c => lto-cgraph.cc}                |     0
 gcc/{lto-compress.c => lto-compress.cc}            |     0
 gcc/lto-compress.h                                 |     2 +-
 gcc/{lto-opts.c => lto-opts.cc}                    |     0
 gcc/{lto-section-in.c => lto-section-in.cc}        |     0
 gcc/{lto-section-out.c => lto-section-out.cc}      |     0
 gcc/{lto-streamer-in.c => lto-streamer-in.cc}      |     0
 gcc/lto-streamer-out.c                             |  3363 --
 gcc/lto-streamer-out.cc                            |  3363 ++
 gcc/{lto-streamer.c => lto-streamer.cc}            |     0
 gcc/lto-streamer.h                                 |    22 +-
 gcc/lto-wrapper.c                                  |  2146 -
 gcc/lto-wrapper.cc                                 |  2146 +
 gcc/lto/ChangeLog                                  |    32 +
 gcc/lto/{common.c => common.cc}                    |     0
 gcc/lto/config-lang.in                             |     2 +-
 gcc/lto/lang-specs.h                               |     2 +-
 gcc/lto/lto-common.c                               |  3106 --
 gcc/lto/lto-common.cc                              |  3106 ++
 gcc/lto/lto-common.h                               |     2 +-
 gcc/lto/lto-dump.c                                 |   379 -
 gcc/lto/lto-dump.cc                                |   379 +
 gcc/lto/lto-lang.c                                 |  1490 -
 gcc/lto/lto-lang.cc                                |  1490 +
 gcc/lto/{lto-object.c => lto-object.cc}            |     0
 gcc/lto/{lto-partition.c => lto-partition.cc}      |     0
 gcc/lto/{lto-symtab.c => lto-symtab.cc}            |     0
 gcc/lto/lto.c                                      |   670 -
 gcc/lto/lto.cc                                     |   670 +
 gcc/lto/lto.h                                      |     4 +-
 gcc/machmode.def                                   |     2 +-
 gcc/machmode.h                                     |     4 +-
 gcc/main.c                                         |    45 -
 gcc/main.cc                                        |    45 +
 gcc/match.pd                                       |    16 +-
 gcc/{mcf.c => mcf.cc}                              |     0
 gcc/{mode-switching.c => mode-switching.cc}        |     0
 gcc/{modulo-sched.c => modulo-sched.cc}            |     0
 gcc/{multiple_target.c => multiple_target.cc}      |     0
 gcc/objc/ChangeLog                                 |    36 +
 gcc/objc/Make-lang.in                              |     2 +-
 gcc/objc/config-lang.in                            |     2 +-
 gcc/objc/lang-specs.h                              |     2 +-
 gcc/objc/objc-act.c                                | 10365 ----
 gcc/objc/objc-act.cc                               | 10365 ++++
 gcc/objc/objc-act.h                                |     4 +-
 gcc/objc/{objc-encoding.c => objc-encoding.cc}     |     0
 gcc/objc/objc-gnu-runtime-abi-01.c                 |  2262 -
 gcc/objc/objc-gnu-runtime-abi-01.cc                |  2262 +
 gcc/objc/objc-lang.c                               |    56 -
 gcc/objc/objc-lang.cc                              |    56 +
 gcc/objc/objc-map.c                                |   160 -
 gcc/objc/objc-map.cc                               |   160 +
 gcc/objc/objc-next-runtime-abi-01.c                |  2957 --
 gcc/objc/objc-next-runtime-abi-01.cc               |  2957 ++
 ...untime-abi-02.c => objc-next-runtime-abi-02.cc} |     0
 gcc/objc/objc-runtime-shared-support.c             |   718 -
 gcc/objc/objc-runtime-shared-support.cc            |   718 +
 gcc/objc/objc-runtime-shared-support.h             |     2 +-
 gcc/objcp/ChangeLog                                |    15 +
 gcc/objcp/Make-lang.in                             |    16 +-
 gcc/objcp/config-lang.in                           |    16 +-
 gcc/objcp/lang-specs.h                             |     2 +-
 gcc/objcp/objcp-decl.c                             |   115 -
 gcc/objcp/objcp-decl.cc                            |   115 +
 gcc/objcp/objcp-lang.c                             |    92 -
 gcc/objcp/objcp-lang.cc                            |    92 +
 gcc/omp-builtins.def                               |     4 +-
 gcc/omp-expand.c                                   | 10808 -----
 gcc/omp-expand.cc                                  | 10808 +++++
 gcc/{omp-general.c => omp-general.cc}              |     0
 gcc/omp-low.c                                      | 14777 ------
 gcc/omp-low.cc                                     | 14777 ++++++
 gcc/omp-oacc-neuter-broadcast.cc                   |    12 +-
 gcc/omp-offload.c                                  |  2823 --
 gcc/omp-offload.cc                                 |  2823 ++
 gcc/{omp-simd-clone.c => omp-simd-clone.cc}        |     0
 gcc/{opt-suggestions.c => opt-suggestions.cc}      |     0
 gcc/{optabs-libfuncs.c => optabs-libfuncs.cc}      |     0
 gcc/optabs-query.c                                 |   765 -
 gcc/optabs-query.cc                                |   765 +
 gcc/{optabs-tree.c => optabs-tree.cc}              |     0
 gcc/{optabs.c => optabs.cc}                        |     0
 gcc/optabs.def                                     |     1 +
 gcc/optc-gen.awk                                   |     2 +-
 gcc/optc-save-gen.awk                              |     2 +-
 gcc/optinfo-emit-json.cc                           |     4 +-
 gcc/opts-common.c                                  |  1857 -
 gcc/opts-common.cc                                 |  1857 +
 gcc/{opts-global.c => opts-global.cc}              |     0
 gcc/{opts.c => opts.cc}                            |     0
 gcc/output.h                                       |    16 +-
 gcc/pass_manager.h                                 |     2 +-
 gcc/{passes.c => passes.cc}                        |     0
 gcc/passes.def                                     |     5 +-
 gcc/plugin.c                                       |  1011 -
 gcc/plugin.cc                                      |  1011 +
 gcc/plugin.def                                     |     2 +-
 gcc/plugin.h                                       |     2 +-
 gcc/po/ChangeLog                                   |     4 +
 gcc/po/EXCLUDES                                    |    44 +-
 gcc/pointer-query.cc                               |     2 +-
 gcc/postreload-gcse.c                              |  1466 -
 gcc/postreload-gcse.cc                             |  1466 +
 gcc/{postreload.c => postreload.cc}                |     0
 gcc/{predict.c => predict.cc}                      |     0
 gcc/predict.h                                      |     4 +-
 gcc/{prefix.c => prefix.cc}                        |     0
 gcc/prefix.h                                       |     2 +-
 gcc/{pretty-print.c => pretty-print.cc}            |     0
 ...{print-rtl-function.c => print-rtl-function.cc} |     0
 gcc/{print-rtl.c => print-rtl.cc}                  |     0
 gcc/{print-tree.c => print-tree.cc}                |     0
 gcc/{profile-count.c => profile-count.cc}          |     0
 gcc/{profile.c => profile.cc}                      |     0
 gcc/profile.h                                      |     2 +-
 gcc/read-md.c                                      |  1363 -
 gcc/read-md.cc                                     |  1363 +
 gcc/read-md.h                                      |    10 +-
 gcc/read-rtl-function.c                            |  2230 -
 gcc/read-rtl-function.cc                           |  2230 +
 gcc/read-rtl.c                                     |  2092 -
 gcc/read-rtl.cc                                    |  2092 +
 gcc/real.c                                         |  5560 ---
 gcc/real.cc                                        |  5560 +++
 gcc/real.h                                         |     8 +-
 gcc/{realmpfr.c => realmpfr.cc}                    |     0
 gcc/recog.c                                        |  4625 --
 gcc/recog.cc                                       |  4625 ++
 gcc/recog.h                                        |     8 +-
 gcc/{ree.c => ree.cc}                              |     0
 gcc/reg-notes.def                                  |     2 +-
 gcc/reg-stack.c                                    |  3484 --
 gcc/reg-stack.cc                                   |  3484 ++
 gcc/{regcprop.c => regcprop.cc}                    |     0
 gcc/{reginfo.c => reginfo.cc}                      |     0
 gcc/{regrename.c => regrename.cc}                  |     0
 gcc/regs.h                                         |     2 +-
 gcc/regset.h                                       |     2 +-
 gcc/{regstat.c => regstat.cc}                      |     0
 gcc/reload.c                                       |  7364 ---
 gcc/reload.cc                                      |  7364 +++
 gcc/reload.h                                       |     8 +-
 gcc/reload1.c                                      |  9069 ----
 gcc/reload1.cc                                     |  9069 ++++
 gcc/reorg.c                                        |  3939 --
 gcc/reorg.cc                                       |  3939 ++
 gcc/{resource.c => resource.cc}                    |     0
 gcc/{rtl-error.c => rtl-error.cc}                  |     0
 gcc/{rtl-tests.c => rtl-tests.cc}                  |     0
 gcc/{rtl.c => rtl.cc}                              |     0
 gcc/rtl.def                                        |    12 +-
 gcc/rtl.h                                          |   146 +-
 gcc/rtlanal.c                                      |  6992 ---
 gcc/rtlanal.cc                                     |  6992 +++
 gcc/rtlanal.h                                      |     4 +-
 gcc/{rtlhash.c => rtlhash.cc}                      |     0
 gcc/{rtlhooks.c => rtlhooks.cc}                    |     0
 ...{rtx-vector-builder.c => rtx-vector-builder.cc} |     0
 gcc/run-rtl-passes.c                               |    79 -
 gcc/run-rtl-passes.cc                              |    79 +
 gcc/{sancov.c => sancov.cc}                        |     0
 gcc/sanitizer.def                                  |     6 +-
 gcc/{sanopt.c => sanopt.cc}                        |     0
 gcc/{sbitmap.c => sbitmap.cc}                      |     0
 gcc/sched-deps.c                                   |  4954 --
 gcc/sched-deps.cc                                  |  4954 ++
 gcc/sched-ebb.c                                    |   734 -
 gcc/sched-ebb.cc                                   |   734 +
 gcc/sched-int.h                                    |    28 +-
 gcc/sched-rgn.c                                    |  3956 --
 gcc/sched-rgn.cc                                   |  3956 ++
 gcc/sel-sched-dump.c                               |  1027 -
 gcc/sel-sched-dump.cc                              |  1027 +
 gcc/sel-sched-dump.h                               |     2 +-
 gcc/sel-sched-ir.c                                 |  6461 ---
 gcc/sel-sched-ir.cc                                |  6461 +++
 gcc/sel-sched-ir.h                                 |     8 +-
 gcc/sel-sched.c                                    |  7725 ---
 gcc/sel-sched.cc                                   |  7725 +++
 ...elftest-diagnostic.c => selftest-diagnostic.cc} |     0
 gcc/{selftest-rtl.c => selftest-rtl.cc}            |     0
 gcc/selftest-run-tests.c                           |   135 -
 gcc/selftest-run-tests.cc                          |   135 +
 gcc/{selftest.c => selftest.cc}                    |     0
 gcc/selftest.h                                     |     4 +-
 gcc/{sese.c => sese.cc}                            |     0
 gcc/shrink-wrap.c                                  |  1890 -
 gcc/shrink-wrap.cc                                 |  1890 +
 gcc/shrink-wrap.h                                  |     2 +-
 gcc/simplify-rtx.c                                 |  8471 ----
 gcc/simplify-rtx.cc                                |  8471 ++++
 gcc/{sparseset.c => sparseset.cc}                  |     0
 gcc/{spellcheck-tree.c => spellcheck-tree.cc}      |     0
 gcc/spellcheck-tree.h                              |     2 +-
 gcc/{spellcheck.c => spellcheck.cc}                |     0
 gcc/spellcheck.h                                   |     2 +-
 gcc/{sreal.c => sreal.cc}                          |     0
 gcc/{stack-ptr-mod.c => stack-ptr-mod.cc}          |     0
 gcc/{statistics.c => statistics.cc}                |     0
 gcc/statistics.h                                   |     2 +-
 gcc/stmt.c                                         |  1119 -
 gcc/stmt.cc                                        |  1119 +
 gcc/stmt.h                                         |     2 +-
 gcc/{stor-layout.c => stor-layout.cc}              |     0
 gcc/stor-layout.h                                  |     2 +-
 gcc/{store-motion.c => store-motion.cc}            |     0
 gcc/{streamer-hooks.c => streamer-hooks.cc}        |     0
 gcc/streamer-hooks.h                               |     2 +-
 gcc/{stringpool.c => stringpool.cc}                |     0
 gcc/stringpool.h                                   |     2 +-
 ...ubstring-locations.c => substring-locations.cc} |     0
 gcc/symtab.c                                       |  2586 -
 gcc/symtab.cc                                      |  2586 +
 gcc/{target-globals.c => target-globals.cc}        |     0
 gcc/target.def                                     |    24 +-
 gcc/target.h                                       |     6 +-
 gcc/targhooks.c                                    |  2593 -
 gcc/targhooks.cc                                   |  2593 +
 gcc/testsuite/ChangeLog                            |   325 +
 gcc/testsuite/c-c++-common/Wdangling-pointer-2.c   |   437 +
 gcc/testsuite/c-c++-common/Wdangling-pointer-3.c   |    64 +
 gcc/testsuite/c-c++-common/Wdangling-pointer-4.c   |    73 +
 gcc/testsuite/c-c++-common/Wdangling-pointer-5.c   |    90 +
 gcc/testsuite/c-c++-common/Wdangling-pointer-6.c   |    32 +
 gcc/testsuite/c-c++-common/Wdangling-pointer.c     |   434 +
 gcc/testsuite/c-c++-common/Wuse-after-free-2.c     |   169 +
 gcc/testsuite/c-c++-common/Wuse-after-free-3.c     |    83 +
 gcc/testsuite/c-c++-common/Wuse-after-free-4.c     |   102 +
 gcc/testsuite/c-c++-common/Wuse-after-free-5.c     |   103 +
 gcc/testsuite/c-c++-common/Wuse-after-free-6.c     |   105 +
 gcc/testsuite/c-c++-common/Wuse-after-free-7.c     |   103 +
 gcc/testsuite/c-c++-common/Wuse-after-free.c       |   167 +
 gcc/testsuite/c-c++-common/cpp/line-2.c            |     2 +-
 gcc/testsuite/c-c++-common/cpp/line-3.c            |     2 +-
 gcc/testsuite/g++.dg/cpp1y/pr104031.C              |    23 +
 gcc/testsuite/g++.dg/torture/pr57993-2.C           |    22 +-
 ...nit-pred-loop-1_a.cc => uninit-pred-loop-1_a.C} |     0
 gcc/testsuite/g++.dg/uninit-pred-loop-1_b.C        |    21 +
 gcc/testsuite/g++.dg/uninit-pred-loop-1_b.cc       |    21 -
 gcc/testsuite/g++.dg/uninit-pred-loop-1_c.C        |    23 +
 gcc/testsuite/g++.dg/uninit-pred-loop-1_c.cc       |    23 -
 ...{uninit-pred-loop_1.cc => uninit-pred-loop_1.C} |     0
 gcc/testsuite/g++.dg/warn/Wdangling-pointer-2.C    |    23 +
 gcc/testsuite/g++.dg/warn/Wdangling-pointer.C      |    74 +
 gcc/testsuite/g++.dg/warn/Wfree-nonheap-object-6.C |     4 +-
 gcc/testsuite/g++.dg/warn/Wmismatched-dealloc-3.C  |    70 +
 gcc/testsuite/g++.dg/warn/Wuse-after-free.C        |   158 +
 gcc/testsuite/g++.dg/warn/ref-temp1.C              |     3 +
 gcc/testsuite/g++.old-deja/g++.robertl/eb43.C      |     1 +
 gcc/testsuite/g++.target/i386/pr103973-1.C         |    71 +
 gcc/testsuite/g++.target/i386/pr103973-10.C        |     7 +
 gcc/testsuite/g++.target/i386/pr103973-11.C        |     8 +
 gcc/testsuite/g++.target/i386/pr103973-12.C        |     8 +
 gcc/testsuite/g++.target/i386/pr103973-13.C        |    62 +
 gcc/testsuite/g++.target/i386/pr103973-14.C        |     7 +
 gcc/testsuite/g++.target/i386/pr103973-15.C        |     8 +
 gcc/testsuite/g++.target/i386/pr103973-16.C        |     8 +
 gcc/testsuite/g++.target/i386/pr103973-17.C        |     8 +
 gcc/testsuite/g++.target/i386/pr103973-18.C        |     8 +
 gcc/testsuite/g++.target/i386/pr103973-19.C        |     8 +
 gcc/testsuite/g++.target/i386/pr103973-2.C         |     7 +
 gcc/testsuite/g++.target/i386/pr103973-20.C        |     8 +
 gcc/testsuite/g++.target/i386/pr103973-3.C         |     8 +
 gcc/testsuite/g++.target/i386/pr103973-4.C         |     8 +
 gcc/testsuite/g++.target/i386/pr103973-5.C         |    66 +
 gcc/testsuite/g++.target/i386/pr103973-6.C         |     7 +
 gcc/testsuite/g++.target/i386/pr103973-7.C         |     8 +
 gcc/testsuite/g++.target/i386/pr103973-8.C         |     8 +
 gcc/testsuite/g++.target/i386/pr103973-9.C         |    67 +
 gcc/testsuite/gcc.c-torture/compile/pr101941-1.c   |    50 +
 gcc/testsuite/gcc.dg/Wdangling-pointer-2.c         |    82 +
 gcc/testsuite/gcc.dg/Wdangling-pointer.c           |    75 +
 gcc/testsuite/gcc.dg/Wmismatched-dealloc-2.c       |    13 +-
 gcc/testsuite/gcc.dg/Wmismatched-dealloc-3.c       |     5 +
 gcc/testsuite/gcc.dg/analyzer/file-1.c             |     3 +
 gcc/testsuite/gcc.dg/analyzer/file-2.c             |     3 +
 gcc/testsuite/gcc.dg/attr-alloc_size-6.c           |     2 +-
 gcc/testsuite/gcc.dg/attr-alloc_size-7.c           |     2 +-
 gcc/testsuite/gcc.dg/auto-init-uninit-16.c         |     4 +-
 gcc/testsuite/gcc.dg/auto-init-uninit-34.c         |     8 +-
 gcc/testsuite/gcc.dg/auto-init-uninit-37.c         |    44 +-
 gcc/testsuite/gcc.dg/auto-init-uninit-B.c          |     4 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr101941-1.c         |    53 +
 gcc/testsuite/gcc.dg/uninit-pr50476.c              |     2 +-
 gcc/testsuite/gcc.misc-tests/options.exp           |     2 +-
 gcc/testsuite/gcc.src/maintainers.exp              |    24 +-
 gcc/testsuite/gcc.target/aarch64/pr104005.c        |    17 +
 .../gcc.target/i386/avx2-dest-false-dep-for-glc.c  |    24 +
 .../i386/avx512dq-dest-false-dep-for-glc.c         |    73 +
 .../i386/avx512f-dest-false-dep-for-glc.c          |   103 +
 .../i386/avx512fp16-dest-false-dep-for-glc.c       |    45 +
 .../i386/avx512fp16vl-dest-false-dep-for-glc.c     |    24 +
 .../i386/avx512vl-dest-false-dep-for-glc.c         |    76 +
 gcc/testsuite/gcc.target/i386/pr103973-1.c         |    98 +
 gcc/testsuite/gcc.target/i386/pr103973-10.c        |     7 +
 gcc/testsuite/gcc.target/i386/pr103973-11.c        |     8 +
 gcc/testsuite/gcc.target/i386/pr103973-12.c        |     8 +
 gcc/testsuite/gcc.target/i386/pr103973-13.c        |    76 +
 gcc/testsuite/gcc.target/i386/pr103973-14.c        |     7 +
 gcc/testsuite/gcc.target/i386/pr103973-15.c        |     8 +
 gcc/testsuite/gcc.target/i386/pr103973-16.c        |     8 +
 gcc/testsuite/gcc.target/i386/pr103973-17.c        |     8 +
 gcc/testsuite/gcc.target/i386/pr103973-18.c        |     8 +
 gcc/testsuite/gcc.target/i386/pr103973-19.c        |     8 +
 gcc/testsuite/gcc.target/i386/pr103973-2.c         |     7 +
 gcc/testsuite/gcc.target/i386/pr103973-20.c        |     8 +
 gcc/testsuite/gcc.target/i386/pr103973-3.c         |     8 +
 gcc/testsuite/gcc.target/i386/pr103973-4.c         |     8 +
 gcc/testsuite/gcc.target/i386/pr103973-5.c         |    85 +
 gcc/testsuite/gcc.target/i386/pr103973-6.c         |     7 +
 gcc/testsuite/gcc.target/i386/pr103973-7.c         |     8 +
 gcc/testsuite/gcc.target/i386/pr103973-8.c         |     8 +
 gcc/testsuite/gcc.target/i386/pr103973-9.c         |    89 +
 gcc/testsuite/gcc.target/powerpc/pr103124.c        |    12 +
 gcc/testsuite/gfortran.dg/gomp/allocate-2.f90      |     2 +-
 gcc/testsuite/gfortran.dg/gomp/requires-8.f90      |     2 +-
 gcc/testsuite/gfortran.dg/ieee/ieee_10.f90         |    12 +-
 gcc/testsuite/gfortran.dg/ieee/signaling_1.f90     |     6 +-
 gcc/testsuite/gfortran.dg/ieee/signaling_2.f90     |    72 +
 gcc/testsuite/gfortran.dg/ieee/signaling_2_c.c     |     8 +
 gcc/testsuite/gfortran.dg/index_5.f90              |     2 +
 gcc/testsuite/gfortran.dg/len_trim.f90             |     6 +
 gcc/testsuite/gfortran.dg/maskl_1.f90              |     3 +-
 gcc/testsuite/gfortran.dg/maskr_1.f90              |     3 +-
 gcc/testsuite/gfortran.dg/scan_3.f90               |     5 +-
 gcc/testsuite/gfortran.dg/transfer_char_kind4.f90  |   115 +
 gcc/testsuite/gfortran.dg/verify_3.f90             |     5 +-
 gcc/testsuite/lib/gcov.exp                         |     9 +-
 gcc/testsuite/lib/target-supports.exp              |    36 +
 gcc/{timevar.c => timevar.cc}                      |     0
 gcc/timevar.def                                    |     2 +-
 gcc/timevar.h                                      |     2 +-
 gcc/{toplev.c => toplev.cc}                        |     0
 gcc/toplev.h                                       |    10 +-
 gcc/{tracer.c => tracer.cc}                        |     0
 gcc/trans-mem.c                                    |  5683 ---
 gcc/trans-mem.cc                                   |  5683 +++
 gcc/{tree-affine.c => tree-affine.cc}              |     0
 gcc/tree-call-cdce.c                               |  1240 -
 gcc/tree-call-cdce.cc                              |  1240 +
 gcc/tree-cfg.c                                     | 10214 ----
 gcc/tree-cfg.cc                                    | 10239 ++++
 gcc/tree-cfg.h                                     |     1 +
 gcc/{tree-cfgcleanup.c => tree-cfgcleanup.cc}      |     0
 gcc/tree-cfgcleanup.h                              |     2 +-
 gcc/{tree-chrec.c => tree-chrec.cc}                |     0
 gcc/tree-complex.c                                 |  1956 -
 gcc/tree-complex.cc                                |  1956 +
 gcc/tree-core.h                                    |     6 +-
 gcc/{tree-data-ref.c => tree-data-ref.cc}          |     0
 gcc/{tree-dfa.c => tree-dfa.cc}                    |     0
 gcc/{tree-diagnostic.c => tree-diagnostic.cc}      |     0
 gcc/{tree-dump.c => tree-dump.cc}                  |     0
 gcc/tree-eh.c                                      |  5052 --
 gcc/tree-eh.cc                                     |  5052 ++
 gcc/{tree-emutls.c => tree-emutls.cc}              |     0
 gcc/tree-if-conv.c                                 |  3510 --
 gcc/tree-if-conv.cc                                |  3510 ++
 gcc/tree-inline.c                                  |  6634 ---
 gcc/tree-inline.cc                                 |  6634 +++
 gcc/tree-inline.h                                  |     2 +-
 gcc/{tree-into-ssa.c => tree-into-ssa.cc}          |     0
 gcc/{tree-iterator.c => tree-iterator.cc}          |     0
 ...op-distribution.c => tree-loop-distribution.cc} |     0
 gcc/tree-nested.c                                  |  3755 --
 gcc/tree-nested.cc                                 |  3755 ++
 gcc/{tree-nrv.c => tree-nrv.cc}                    |     0
 gcc/{tree-object-size.c => tree-object-size.cc}    |     0
 gcc/tree-object-size.h                             |     2 +-
 gcc/tree-outof-ssa.c                               |  1329 -
 gcc/tree-outof-ssa.cc                              |  1329 +
 gcc/tree-parloops.c                                |  4241 --
 gcc/tree-parloops.cc                               |  4241 ++
 gcc/{tree-phinodes.c => tree-phinodes.cc}          |     0
 gcc/{tree-predcom.c => tree-predcom.cc}            |     0
 gcc/tree-pretty-print.c                            |  4623 --
 gcc/tree-pretty-print.cc                           |  4623 ++
 gcc/tree-profile.c                                 |   911 -
 gcc/tree-profile.cc                                |   911 +
 ...scalar-evolution.c => tree-scalar-evolution.cc} |     0
 gcc/tree-sra.c                                     |  4794 --
 gcc/tree-sra.cc                                    |  4794 ++
 gcc/tree-ssa-address.c                             |  1213 -
 gcc/tree-ssa-address.cc                            |  1213 +
 gcc/tree-ssa-alias.c                               |  4336 --
 gcc/tree-ssa-alias.cc                              |  4336 ++
 gcc/tree-ssa-alias.h                               |     4 +-
 gcc/tree-ssa-ccp.c                                 |  4640 --
 gcc/tree-ssa-ccp.cc                                |  4640 ++
 gcc/{tree-ssa-coalesce.c => tree-ssa-coalesce.cc}  |     0
 gcc/tree-ssa-coalesce.h                            |     2 +-
 gcc/{tree-ssa-copy.c => tree-ssa-copy.cc}          |     0
 gcc/{tree-ssa-dce.c => tree-ssa-dce.cc}            |     0
 gcc/{tree-ssa-dom.c => tree-ssa-dom.cc}            |     0
 gcc/{tree-ssa-dse.c => tree-ssa-dse.cc}            |     0
 gcc/{tree-ssa-forwprop.c => tree-ssa-forwprop.cc}  |     0
 ...{tree-ssa-ifcombine.c => tree-ssa-ifcombine.cc} |     0
 gcc/tree-ssa-live.c                                |  1633 -
 gcc/tree-ssa-live.cc                               |  1633 +
 gcc/{tree-ssa-loop-ch.c => tree-ssa-loop-ch.cc}    |     0
 gcc/{tree-ssa-loop-im.c => tree-ssa-loop-im.cc}    |     0
 ...ssa-loop-ivcanon.c => tree-ssa-loop-ivcanon.cc} |     0
 ...e-ssa-loop-ivopts.c => tree-ssa-loop-ivopts.cc} |     0
 gcc/tree-ssa-loop-manip.c                          |  1677 -
 gcc/tree-ssa-loop-manip.cc                         |  1677 +
 ...ree-ssa-loop-niter.c => tree-ssa-loop-niter.cc} |     0
 ...a-loop-prefetch.c => tree-ssa-loop-prefetch.cc} |     0
 ...ree-ssa-loop-split.c => tree-ssa-loop-split.cc} |     0
 gcc/tree-ssa-loop-unswitch.c                       |  1042 -
 gcc/tree-ssa-loop-unswitch.cc                      |  1042 +
 gcc/{tree-ssa-loop.c => tree-ssa-loop.cc}          |     0
 gcc/tree-ssa-math-opts.c                           |  4847 --
 gcc/tree-ssa-math-opts.cc                          |  5070 ++
 gcc/tree-ssa-operands.c                            |  1415 -
 gcc/tree-ssa-operands.cc                           |  1415 +
 gcc/tree-ssa-phiopt.c                              |  3879 --
 gcc/tree-ssa-phiopt.cc                             |  3854 ++
 gcc/{tree-ssa-phiprop.c => tree-ssa-phiprop.cc}    |     0
 gcc/tree-ssa-pre.c                                 |  4481 --
 gcc/tree-ssa-pre.cc                                |  4481 ++
 ...{tree-ssa-propagate.c => tree-ssa-propagate.cc} |     0
 gcc/tree-ssa-reassoc.c                             |  7052 ---
 gcc/tree-ssa-reassoc.cc                            |  7052 +++
 gcc/tree-ssa-sccvn.c                               |  8250 ----
 gcc/tree-ssa-sccvn.cc                              |  8250 ++++
 gcc/tree-ssa-sccvn.h                               |     2 +-
 gcc/tree-ssa-scopedtables.c                        |  1201 -
 gcc/tree-ssa-scopedtables.cc                       |  1201 +
 gcc/{tree-ssa-sink.c => tree-ssa-sink.cc}          |     0
 gcc/tree-ssa-strlen.c                              |  5970 ---
 gcc/tree-ssa-strlen.cc                             |  5970 +++
 gcc/tree-ssa-strlen.h                              |     2 +-
 ...e-ssa-structalias.c => tree-ssa-structalias.cc} |     0
 gcc/tree-ssa-tail-merge.c                          |  1818 -
 gcc/tree-ssa-tail-merge.cc                         |  1818 +
 gcc/{tree-ssa-ter.c => tree-ssa-ter.cc}            |     0
 gcc/tree-ssa-ter.h                                 |     2 +-
 ...threadbackward.c => tree-ssa-threadbackward.cc} |     0
 ...ree-ssa-threadedge.c => tree-ssa-threadedge.cc} |     0
 ...ssa-threadupdate.c => tree-ssa-threadupdate.cc} |     0
 gcc/tree-ssa-threadupdate.h                        |     2 +-
 gcc/{tree-ssa-uncprop.c => tree-ssa-uncprop.cc}    |     0
 gcc/tree-ssa-uninit.c                              |  1358 -
 gcc/tree-ssa-uninit.cc                             |  1447 +
 gcc/{tree-ssa.c => tree-ssa.cc}                    |     0
 gcc/{tree-ssanames.c => tree-ssanames.cc}          |     0
 gcc/{tree-stdarg.c => tree-stdarg.cc}              |     0
 gcc/tree-streamer-in.c                             |  1130 -
 gcc/tree-streamer-in.cc                            |  1130 +
 gcc/tree-streamer-out.c                            |  1045 -
 gcc/tree-streamer-out.cc                           |  1045 +
 gcc/{tree-streamer.c => tree-streamer.cc}          |     0
 gcc/tree-streamer.h                                |     6 +-
 ...itch-conversion.c => tree-switch-conversion.cc} |     0
 gcc/{tree-tailcall.c => tree-tailcall.cc}          |     0
 gcc/tree-vect-data-refs.c                          |  6814 ---
 gcc/tree-vect-data-refs.cc                         |  6817 +++
 gcc/{tree-vect-generic.c => tree-vect-generic.cc}  |     0
 ...e-vect-loop-manip.c => tree-vect-loop-manip.cc} |     0
 gcc/{tree-vect-loop.c => tree-vect-loop.cc}        |     0
 gcc/tree-vect-patterns.c                           |  5856 ---
 gcc/tree-vect-patterns.cc                          |  5856 +++
 ...ct-slp-patterns.c => tree-vect-slp-patterns.cc} |     0
 gcc/{tree-vect-slp.c => tree-vect-slp.cc}          |     0
 gcc/tree-vect-stmts.c                              | 12484 -----
 gcc/tree-vect-stmts.cc                             | 12484 +++++
 ...ree-vector-builder.c => tree-vector-builder.cc} |     0
 gcc/tree-vectorizer.c                              |  2026 -
 gcc/tree-vectorizer.cc                             |  2026 +
 gcc/tree-vectorizer.h                              |    20 +-
 gcc/{tree-vrp.c => tree-vrp.cc}                    |     0
 gcc/tree.c                                         | 15221 ------
 gcc/tree.cc                                        | 15221 ++++++
 gcc/tree.def                                       |     2 +-
 gcc/tree.h                                         |    10 +-
 gcc/{tsan.c => tsan.cc}                            |     0
 gcc/{typed-splay-tree.c => typed-splay-tree.cc}    |     0
 gcc/{ubsan.c => ubsan.cc}                          |     0
 gcc/{valtrack.c => valtrack.cc}                    |     0
 gcc/value-prof.c                                   |  1966 -
 gcc/value-prof.cc                                  |  1966 +
 gcc/value-prof.h                                   |     4 +-
 gcc/value-range.cc                                 |     2 +-
 gcc/value-range.h                                  |     2 +-
 gcc/var-tracking.c                                 | 10613 ----
 gcc/var-tracking.cc                                | 10613 ++++
 gcc/varasm.c                                       |  8509 ----
 gcc/varasm.cc                                      |  8509 ++++
 gcc/{varpool.c => varpool.cc}                      |     0
 gcc/{vec-perm-indices.c => vec-perm-indices.cc}    |     0
 gcc/{vec.c => vec.cc}                              |     0
 gcc/vec.h                                          |     2 +-
 gcc/vmsdbgout.c                                    |  1851 -
 gcc/vmsdbgout.cc                                   |  1851 +
 gcc/{vr-values.c => vr-values.cc}                  |     0
 gcc/vtable-verify.c                                |   850 -
 gcc/vtable-verify.cc                               |   850 +
 gcc/vtable-verify.h                                |     2 +-
 gcc/warning-control.cc                             |     3 +-
 gcc/{web.c => web.cc}                              |     0
 gcc/xcoffout.c                                     |   494 -
 gcc/xcoffout.cc                                    |   494 +
 gcc/xcoffout.h                                     |     8 +-
 libcpp/ChangeLog                                   |    80 +
 libcpp/Makefile.in                                 |    10 +-
 libcpp/charset.c                                   |  2510 -
 libcpp/charset.cc                                  |  2510 +
 libcpp/directives.c                                |  2799 --
 libcpp/directives.cc                               |  2799 ++
 libcpp/{errors.c => errors.cc}                     |     0
 libcpp/{expr.c => expr.cc}                         |     0
 libcpp/files.c                                     |  2175 -
 libcpp/files.cc                                    |  2174 +
 libcpp/{identifiers.c => identifiers.cc}           |     0
 libcpp/include/cpplib.h                            |    22 +-
 libcpp/include/line-map.h                          |     6 +-
 libcpp/include/mkdeps.h                            |     2 +-
 libcpp/init.c                                      |   914 -
 libcpp/init.cc                                     |   914 +
 libcpp/internal.h                                  |    32 +-
 libcpp/{lex.c => lex.cc}                           |     0
 libcpp/line-map.c                                  |  2555 -
 libcpp/line-map.cc                                 |  2555 +
 libcpp/{macro.c => macro.cc}                       |     0
 libcpp/{makeucnid.c => makeucnid.cc}               |     0
 libcpp/{mkdeps.c => mkdeps.cc}                     |     0
 libcpp/{pch.c => pch.cc}                           |     0
 libcpp/{symtab.c => symtab.cc}                     |     0
 libcpp/traditional.c                               |  1321 -
 libcpp/traditional.cc                              |  1321 +
 libgcc/ChangeLog                                   |    16 +
 libgcc/config/i386/gthr-win32.c                    |     2 -
 libgcc/config/i386/gthr-win32.h                    |    23 +-
 libgcc/libgcov-driver.c                            |     2 +-
 libgfortran/ChangeLog                              |    30 +
 libgfortran/ieee/ieee_arithmetic.F90               |   284 +-
 libgfortran/ieee/ieee_helper.c                     |    74 +
 libgfortran/mk-kinds-h.sh                          |     7 +
 libgfortran/runtime/environ.c                      |   111 +-
 libgomp/ChangeLog                                  |    21 +
 libgomp/plugin/plugin-gcn.c                        |    82 +-
 .../testsuite/libgomp.oacc-c++/privatized-ref-2.C  |   178 +
 .../testsuite/libgomp.oacc-c++/privatized-ref-3.C  |   212 +
 .../libgomp.oacc-fortran/privatized-ref-1.f95      |   163 +
 libiberty/ChangeLog                                |     4 +
 libiberty/regex.c                                  |     4 +
 libstdc++-v3/ChangeLog                             |   397 +
 libstdc++-v3/Makefile.in                           |     7 +
 libstdc++-v3/acinclude.m4                          |   155 +-
 libstdc++-v3/config.h.in                           |    10 +
 libstdc++-v3/configure                             |   321 +-
 libstdc++-v3/configure.ac                          |     3 +
 libstdc++-v3/doc/Makefile.in                       |     7 +
 libstdc++-v3/doc/html/manual/status.html           |    36 +-
 libstdc++-v3/doc/xml/manual/status_cxx2017.xml     |     8 +-
 libstdc++-v3/doc/xml/manual/status_cxx2020.xml     |    24 +-
 libstdc++-v3/include/Makefile.am                   |     3 +
 libstdc++-v3/include/Makefile.in                   |    10 +
 libstdc++-v3/include/bits/shared_ptr_atomic.h      |   455 +
 libstdc++-v3/include/bits/shared_ptr_base.h        |    17 +
 libstdc++-v3/include/experimental/bits/simd.h      |   389 +-
 .../include/experimental/bits/simd_builtin.h       |    41 +-
 .../include/experimental/bits/simd_detail.h        |    40 +
 .../include/experimental/bits/simd_fixed_size.h    |    39 +-
 libstdc++-v3/include/experimental/bits/simd_math.h |    45 +-
 libstdc++-v3/include/experimental/bits/simd_neon.h |     4 +-
 libstdc++-v3/include/experimental/bits/simd_ppc.h  |     4 +-
 .../include/experimental/bits/simd_scalar.h        |    71 +-
 libstdc++-v3/include/experimental/bits/simd_x86.h  |     4 +-
 libstdc++-v3/include/std/stacktrace                |   672 +
 libstdc++-v3/include/std/version                   |     4 +
 libstdc++-v3/libsupc++/Makefile.in                 |     7 +
 libstdc++-v3/po/Makefile.in                        |     7 +
 libstdc++-v3/python/Makefile.in                    |     7 +
 libstdc++-v3/src/Makefile.am                       |    11 +-
 libstdc++-v3/src/Makefile.in                       |    14 +-
 libstdc++-v3/src/c++11/Makefile.in                 |     7 +
 libstdc++-v3/src/c++17/Makefile.in                 |     7 +
 libstdc++-v3/src/c++17/fast_float/LOCAL_PATCHES    |     4 +
 libstdc++-v3/src/c++17/fast_float/MERGE            |     4 +
 libstdc++-v3/src/c++17/fast_float/README.md        |   218 +
 libstdc++-v3/src/c++17/fast_float/fast_float.h     |  2905 ++
 libstdc++-v3/src/c++17/floating_from_chars.cc      |   397 +
 libstdc++-v3/src/c++20/Makefile.in                 |     7 +
 libstdc++-v3/src/c++98/Makefile.in                 |     7 +
 libstdc++-v3/src/filesystem/Makefile.in            |     7 +
 libstdc++-v3/src/libbacktrace/Makefile.am          |   101 +
 libstdc++-v3/src/libbacktrace/Makefile.in          |   860 +
 libstdc++-v3/src/libbacktrace/backtrace-rename.h   |    41 +
 .../src/libbacktrace/backtrace-supported.h.in      |    61 +
 libstdc++-v3/src/libbacktrace/config.h.in          |   184 +
 .../testsuite/17_intro/headers/c++1998/103650.cc   |    13 +
 libstdc++-v3/testsuite/20_util/from_chars/7.cc     |   152 +
 .../20_util/shared_ptr/atomic/atomic_shared_ptr.cc |   150 +
 libstdc++-v3/testsuite/20_util/stacktrace/entry.cc |    53 +
 .../testsuite/20_util/stacktrace/synopsis.cc       |    46 +
 .../testsuite/20_util/stacktrace/version.cc        |    11 +
 .../testsuite/20_util/weak_ptr/atomic_weak_ptr.cc  |    95 +
 libstdc++-v3/testsuite/Makefile.in                 |     7 +
 libstdc++-v3/testsuite/lib/libstdc++.exp           |     8 +
 1867 files changed, 1663961 insertions(+), 1645562 deletions(-)

diff --cc gcc/config/ia64/ia64.cc
index 00000000000,f9fb681a36c..c68b2ff69bd
mode 000000,100644..100644
--- a/gcc/config/ia64/ia64.cc
+++ b/gcc/config/ia64/ia64.cc
@@@ -1,0 -1,11923 +1,11927 @@@
+ /* Definitions of target machine for GNU compiler.
+    Copyright (C) 1999-2022 Free Software Foundation, Inc.
+    Contributed by James E. Wilson <wilson@cygnus.com> and
+ 		  David Mosberger <davidm@hpl.hp.com>.
+ 
+ This file is part of GCC.
+ 
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+ 
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3.  If not see
+ <http://www.gnu.org/licenses/>.  */
+ 
+ #define IN_TARGET_CODE 1
+ 
+ #include "config.h"
+ #include "system.h"
+ #include "coretypes.h"
+ #include "backend.h"
+ #include "target.h"
+ #include "rtl.h"
+ #include "tree.h"
+ #include "memmodel.h"
+ #include "cfghooks.h"
+ #include "df.h"
+ #include "tm_p.h"
+ #include "stringpool.h"
+ #include "attribs.h"
+ #include "optabs.h"
+ #include "regs.h"
+ #include "emit-rtl.h"
+ #include "recog.h"
+ #include "diagnostic-core.h"
+ #include "alias.h"
+ #include "fold-const.h"
+ #include "stor-layout.h"
+ #include "calls.h"
+ #include "varasm.h"
+ #include "output.h"
+ #include "insn-attr.h"
+ #include "flags.h"
+ #include "explow.h"
+ #include "expr.h"
+ #include "cfgrtl.h"
+ #include "libfuncs.h"
+ #include "sched-int.h"
+ #include "common/common-target.h"
+ #include "langhooks.h"
+ #include "gimplify.h"
+ #include "intl.h"
+ #include "debug.h"
+ #include "dbgcnt.h"
+ #include "tm-constrs.h"
+ #include "sel-sched.h"
+ #include "reload.h"
+ #include "opts.h"
+ #include "dumpfile.h"
+ #include "builtins.h"
+ 
+ /* This file should be included last.  */
+ #include "target-def.h"
+ 
+ /* This is used for communication between ASM_OUTPUT_LABEL and
+    ASM_OUTPUT_LABELREF.  */
+ int ia64_asm_output_label = 0;
+ 
+ /* Register names for ia64_expand_prologue.  */
+ static const char * const ia64_reg_numbers[96] =
+ { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
+   "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
+   "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
+   "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
+   "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
+   "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
+   "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
+   "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
+   "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
+   "r104","r105","r106","r107","r108","r109","r110","r111",
+   "r112","r113","r114","r115","r116","r117","r118","r119",
+   "r120","r121","r122","r123","r124","r125","r126","r127"};
+ 
+ /* ??? These strings could be shared with REGISTER_NAMES.  */
+ static const char * const ia64_input_reg_names[8] =
+ { "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7" };
+ 
+ /* ??? These strings could be shared with REGISTER_NAMES.  */
+ static const char * const ia64_local_reg_names[80] =
+ { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
+   "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
+   "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
+   "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
+   "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
+   "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
+   "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
+   "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
+   "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
+   "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
+ 
+ /* ??? These strings could be shared with REGISTER_NAMES.  */
+ static const char * const ia64_output_reg_names[8] =
+ { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
+ 
+ /* Variables which are this size or smaller are put in the sdata/sbss
+    sections.  */
+ 
+ unsigned int ia64_section_threshold;
+ 
+ /* The following variable is used by the DFA insn scheduler.  The value is
+    TRUE if we do insn bundling instead of insn scheduling.  */
+ int bundling_p = 0;
+ 
+ enum ia64_frame_regs
+ {
+    reg_fp,
+    reg_save_b0,
+    reg_save_pr,
+    reg_save_ar_pfs,
+    reg_save_ar_unat,
+    reg_save_ar_lc,
+    reg_save_gp,
+    number_of_ia64_frame_regs
+ };
+ 
+ /* Structure to be filled in by ia64_compute_frame_size with register
+    save masks and offsets for the current function.  */
+ 
+ struct ia64_frame_info
+ {
+   HOST_WIDE_INT total_size;	/* size of the stack frame, not including
+ 				   the caller's scratch area.  */
+   HOST_WIDE_INT spill_cfa_off;	/* top of the reg spill area from the cfa.  */
+   HOST_WIDE_INT spill_size;	/* size of the gr/br/fr spill area.  */
+   HOST_WIDE_INT extra_spill_size;  /* size of spill area for others.  */
+   HARD_REG_SET mask;		/* mask of saved registers.  */
+   unsigned int gr_used_mask;	/* mask of registers in use as gr spill
+ 				   registers or long-term scratches.  */
+   int n_spilled;		/* number of spilled registers.  */
+   int r[number_of_ia64_frame_regs];  /* Frame related registers.  */
+   int n_input_regs;		/* number of input registers used.  */
+   int n_local_regs;		/* number of local registers used.  */
+   int n_output_regs;		/* number of output registers used.  */
+   int n_rotate_regs;		/* number of rotating registers used.  */
+ 
+   char need_regstk;		/* true if a .regstk directive needed.  */
+   char initialized;		/* true if the data is finalized.  */
+ };
+ 
+ /* Current frame information calculated by ia64_compute_frame_size.  */
+ static struct ia64_frame_info current_frame_info;
+ /* The actual registers that are emitted.  */
+ static int emitted_frame_related_regs[number_of_ia64_frame_regs];
+ 
+ static int ia64_first_cycle_multipass_dfa_lookahead (void);
+ static void ia64_dependencies_evaluation_hook (rtx_insn *, rtx_insn *);
+ static void ia64_init_dfa_pre_cycle_insn (void);
+ static rtx ia64_dfa_pre_cycle_insn (void);
+ static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
+ static int ia64_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *);
+ static void ia64_h_i_d_extended (void);
+ static void * ia64_alloc_sched_context (void);
+ static void ia64_init_sched_context (void *, bool);
+ static void ia64_set_sched_context (void *);
+ static void ia64_clear_sched_context (void *);
+ static void ia64_free_sched_context (void *);
+ static int ia64_mode_to_int (machine_mode);
+ static void ia64_set_sched_flags (spec_info_t);
+ static ds_t ia64_get_insn_spec_ds (rtx_insn *);
+ static ds_t ia64_get_insn_checked_ds (rtx_insn *);
+ static bool ia64_skip_rtx_p (const_rtx);
+ static int ia64_speculate_insn (rtx_insn *, ds_t, rtx *);
+ static bool ia64_needs_block_p (ds_t);
+ static rtx ia64_gen_spec_check (rtx_insn *, rtx_insn *, ds_t);
+ static int ia64_spec_check_p (rtx);
+ static int ia64_spec_check_src_p (rtx);
+ static rtx gen_tls_get_addr (void);
+ static rtx gen_thread_pointer (void);
+ static int find_gr_spill (enum ia64_frame_regs, int);
+ static int next_scratch_gr_reg (void);
+ static void mark_reg_gr_used_mask (rtx, void *);
+ static void ia64_compute_frame_size (HOST_WIDE_INT);
+ static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
+ static void finish_spill_pointers (void);
+ static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
+ static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
+ static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
+ static rtx gen_movdi_x (rtx, rtx, rtx);
+ static rtx gen_fr_spill_x (rtx, rtx, rtx);
+ static rtx gen_fr_restore_x (rtx, rtx, rtx);
+ 
+ static void ia64_option_override (void);
+ static bool ia64_can_eliminate (const int, const int);
+ static machine_mode hfa_element_mode (const_tree, bool);
+ static void ia64_setup_incoming_varargs (cumulative_args_t,
+ 					 const function_arg_info &,
+ 					 int *, int);
+ static int ia64_arg_partial_bytes (cumulative_args_t,
+ 				   const function_arg_info &);
+ static rtx ia64_function_arg (cumulative_args_t, const function_arg_info &);
+ static rtx ia64_function_incoming_arg (cumulative_args_t,
+ 				       const function_arg_info &);
+ static void ia64_function_arg_advance (cumulative_args_t,
+ 				       const function_arg_info &);
+ static pad_direction ia64_function_arg_padding (machine_mode, const_tree);
+ static unsigned int ia64_function_arg_boundary (machine_mode,
+ 						const_tree);
+ static bool ia64_function_ok_for_sibcall (tree, tree);
+ static bool ia64_return_in_memory (const_tree, const_tree);
+ static rtx ia64_function_value (const_tree, const_tree, bool);
+ static rtx ia64_libcall_value (machine_mode, const_rtx);
+ static bool ia64_function_value_regno_p (const unsigned int);
+ static int ia64_register_move_cost (machine_mode, reg_class_t,
+                                     reg_class_t);
+ static int ia64_memory_move_cost (machine_mode mode, reg_class_t,
+ 				  bool);
+ static bool ia64_rtx_costs (rtx, machine_mode, int, int, int *, bool);
+ static int ia64_unspec_may_trap_p (const_rtx, unsigned);
+ static void fix_range (const char *);
+ static struct machine_function * ia64_init_machine_status (void);
+ static void emit_insn_group_barriers (FILE *);
+ static void emit_all_insn_group_barriers (FILE *);
+ static void final_emit_insn_group_barriers (FILE *);
+ static void emit_predicate_relation_info (void);
+ static void ia64_reorg (void);
+ static bool ia64_in_small_data_p (const_tree);
+ static void process_epilogue (FILE *, rtx, bool, bool);
+ 
+ static bool ia64_assemble_integer (rtx, unsigned int, int);
+ static void ia64_output_function_prologue (FILE *);
+ static void ia64_output_function_epilogue (FILE *);
+ static void ia64_output_function_end_prologue (FILE *);
+ 
+ static void ia64_print_operand (FILE *, rtx, int);
+ static void ia64_print_operand_address (FILE *, machine_mode, rtx);
+ static bool ia64_print_operand_punct_valid_p (unsigned char code);
+ 
+ static int ia64_issue_rate (void);
+ static int ia64_adjust_cost (rtx_insn *, int, rtx_insn *, int, dw_t);
+ static void ia64_sched_init (FILE *, int, int);
+ static void ia64_sched_init_global (FILE *, int, int);
+ static void ia64_sched_finish_global (FILE *, int);
+ static void ia64_sched_finish (FILE *, int);
+ static int ia64_dfa_sched_reorder (FILE *, int, rtx_insn **, int *, int, int);
+ static int ia64_sched_reorder (FILE *, int, rtx_insn **, int *, int);
+ static int ia64_sched_reorder2 (FILE *, int, rtx_insn **, int *, int);
+ static int ia64_variable_issue (FILE *, int, rtx_insn *, int);
+ 
+ static void ia64_asm_unwind_emit (FILE *, rtx_insn *);
+ static void ia64_asm_emit_except_personality (rtx);
+ static void ia64_asm_init_sections (void);
+ 
+ static enum unwind_info_type ia64_debug_unwind_info (void);
+ 
+ static struct bundle_state *get_free_bundle_state (void);
+ static void free_bundle_state (struct bundle_state *);
+ static void initiate_bundle_states (void);
+ static void finish_bundle_states (void);
+ static int insert_bundle_state (struct bundle_state *);
+ static void initiate_bundle_state_table (void);
+ static void finish_bundle_state_table (void);
+ static int try_issue_nops (struct bundle_state *, int);
+ static int try_issue_insn (struct bundle_state *, rtx);
+ static void issue_nops_and_insn (struct bundle_state *, int, rtx_insn *,
+ 				 int, int);
+ static int get_max_pos (state_t);
+ static int get_template (state_t, int);
+ 
+ static rtx_insn *get_next_important_insn (rtx_insn *, rtx_insn *);
+ static bool important_for_bundling_p (rtx_insn *);
+ static bool unknown_for_bundling_p (rtx_insn *);
+ static void bundling (FILE *, int, rtx_insn *, rtx_insn *);
+ 
+ static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
+ 				  HOST_WIDE_INT, tree);
+ static void ia64_file_start (void);
+ static void ia64_globalize_decl_name (FILE *, tree);
+ 
+ static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
+ static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
+ static section *ia64_select_rtx_section (machine_mode, rtx,
+ 					 unsigned HOST_WIDE_INT);
+ static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
+      ATTRIBUTE_UNUSED;
+ static unsigned int ia64_section_type_flags (tree, const char *, int);
++static void ia64_linux_file_end (void)
++     ATTRIBUTE_UNUSED;
+ static void ia64_init_libfuncs (void)
+      ATTRIBUTE_UNUSED;
+ static void ia64_hpux_init_libfuncs (void)
+      ATTRIBUTE_UNUSED;
+ static void ia64_sysv4_init_libfuncs (void)
+      ATTRIBUTE_UNUSED;
+ static void ia64_vms_init_libfuncs (void)
+      ATTRIBUTE_UNUSED;
+ static void ia64_soft_fp_init_libfuncs (void)
+      ATTRIBUTE_UNUSED;
+ static bool ia64_vms_valid_pointer_mode (scalar_int_mode mode)
+      ATTRIBUTE_UNUSED;
+ static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
+      ATTRIBUTE_UNUSED;
+ 
+ static bool ia64_attribute_takes_identifier_p (const_tree);
+ static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
+ static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
+ static void ia64_encode_section_info (tree, rtx, int);
+ static rtx ia64_struct_value_rtx (tree, int);
+ static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
+ static bool ia64_scalar_mode_supported_p (scalar_mode mode);
+ static bool ia64_vector_mode_supported_p (machine_mode mode);
+ static bool ia64_legitimate_constant_p (machine_mode, rtx);
+ static bool ia64_legitimate_address_p (machine_mode, rtx, bool);
+ static bool ia64_cannot_force_const_mem (machine_mode, rtx);
+ static const char *ia64_mangle_type (const_tree);
+ static const char *ia64_invalid_conversion (const_tree, const_tree);
+ static const char *ia64_invalid_unary_op (int, const_tree);
+ static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
+ static machine_mode ia64_c_mode_for_suffix (char);
+ static void ia64_trampoline_init (rtx, tree, rtx);
+ static void ia64_override_options_after_change (void);
+ static bool ia64_member_type_forces_blk (const_tree, machine_mode);
+ 
+ static tree ia64_fold_builtin (tree, int, tree *, bool);
+ static tree ia64_builtin_decl (unsigned, bool);
+ 
+ static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
+ static fixed_size_mode ia64_get_reg_raw_mode (int regno);
+ static section * ia64_hpux_function_section (tree, enum node_frequency,
+ 					     bool, bool);
+ 
+ static bool ia64_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
+ 					   const vec_perm_indices &);
+ 
+ static unsigned int ia64_hard_regno_nregs (unsigned int, machine_mode);
+ static bool ia64_hard_regno_mode_ok (unsigned int, machine_mode);
+ static bool ia64_modes_tieable_p (machine_mode, machine_mode);
+ static bool ia64_can_change_mode_class (machine_mode, machine_mode,
+ 					reg_class_t);
+ 
+ #define MAX_VECT_LEN	8
+ 
+ struct expand_vec_perm_d
+ {
+   rtx target, op0, op1;
+   unsigned char perm[MAX_VECT_LEN];
+   machine_mode vmode;
+   unsigned char nelt;
+   bool one_operand_p;
+   bool testing_p; 
+ };
+ 
+ static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
+ 
+ 
+ /* Table of valid machine attributes.  */
+ static const struct attribute_spec ia64_attribute_table[] =
+ {
+   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
+        affects_type_identity, handler, exclude } */
+   { "syscall_linkage", 0, 0, false, true,  true,  false, NULL, NULL },
+   { "model",	       1, 1, true, false, false,  false,
+     ia64_handle_model_attribute, NULL },
+ #if TARGET_ABI_OPEN_VMS
+   { "common_object",   1, 1, true, false, false, false,
+     ia64_vms_common_object_attribute, NULL },
+ #endif
+   { "version_id",      1, 1, true, false, false, false,
+     ia64_handle_version_id_attribute, NULL },
+   { NULL,	       0, 0, false, false, false, false, NULL, NULL }
+ };
+ 
+ /* Initialize the GCC target structure.  */
+ #undef TARGET_ATTRIBUTE_TABLE
+ #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
+ 
+ #undef TARGET_INIT_BUILTINS
+ #define TARGET_INIT_BUILTINS ia64_init_builtins
+ 
+ #undef TARGET_FOLD_BUILTIN
+ #define TARGET_FOLD_BUILTIN ia64_fold_builtin
+ 
+ #undef TARGET_EXPAND_BUILTIN
+ #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
+ 
+ #undef TARGET_BUILTIN_DECL
+ #define TARGET_BUILTIN_DECL ia64_builtin_decl
+ 
+ #undef TARGET_ASM_BYTE_OP
+ #define TARGET_ASM_BYTE_OP "\tdata1\t"
+ #undef TARGET_ASM_ALIGNED_HI_OP
+ #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
+ #undef TARGET_ASM_ALIGNED_SI_OP
+ #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
+ #undef TARGET_ASM_ALIGNED_DI_OP
+ #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
+ #undef TARGET_ASM_UNALIGNED_HI_OP
+ #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
+ #undef TARGET_ASM_UNALIGNED_SI_OP
+ #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
+ #undef TARGET_ASM_UNALIGNED_DI_OP
+ #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
+ #undef TARGET_ASM_INTEGER
+ #define TARGET_ASM_INTEGER ia64_assemble_integer
+ 
+ #undef TARGET_OPTION_OVERRIDE
+ #define TARGET_OPTION_OVERRIDE ia64_option_override
+ 
+ #undef TARGET_ASM_FUNCTION_PROLOGUE
+ #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
+ #undef TARGET_ASM_FUNCTION_END_PROLOGUE
+ #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
+ #undef TARGET_ASM_FUNCTION_EPILOGUE
+ #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
+ 
+ #undef TARGET_PRINT_OPERAND
+ #define TARGET_PRINT_OPERAND ia64_print_operand
+ #undef TARGET_PRINT_OPERAND_ADDRESS
+ #define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
+ #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+ #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
+ 
+ #undef TARGET_IN_SMALL_DATA_P
+ #define TARGET_IN_SMALL_DATA_P  ia64_in_small_data_p
+ 
+ #undef TARGET_SCHED_ADJUST_COST
+ #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
+ #undef TARGET_SCHED_ISSUE_RATE
+ #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
+ #undef TARGET_SCHED_VARIABLE_ISSUE
+ #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
+ #undef TARGET_SCHED_INIT
+ #define TARGET_SCHED_INIT ia64_sched_init
+ #undef TARGET_SCHED_FINISH
+ #define TARGET_SCHED_FINISH ia64_sched_finish
+ #undef TARGET_SCHED_INIT_GLOBAL
+ #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
+ #undef TARGET_SCHED_FINISH_GLOBAL
+ #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
+ #undef TARGET_SCHED_REORDER
+ #define TARGET_SCHED_REORDER ia64_sched_reorder
+ #undef TARGET_SCHED_REORDER2
+ #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
+ 
+ #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
+ #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
+ 
+ #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+ #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
+ 
+ #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
+ #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
+ #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
+ #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
+ 
+ #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
+ #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
+   ia64_first_cycle_multipass_dfa_lookahead_guard
+ 
+ #undef TARGET_SCHED_DFA_NEW_CYCLE
+ #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
+ 
+ #undef TARGET_SCHED_H_I_D_EXTENDED
+ #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
+ 
+ #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
+ #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
+ 
+ #undef TARGET_SCHED_INIT_SCHED_CONTEXT
+ #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
+ 
+ #undef TARGET_SCHED_SET_SCHED_CONTEXT
+ #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
+ 
+ #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
+ #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
+ 
+ #undef TARGET_SCHED_FREE_SCHED_CONTEXT
+ #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
+ 
+ #undef TARGET_SCHED_SET_SCHED_FLAGS
+ #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
+ 
+ #undef TARGET_SCHED_GET_INSN_SPEC_DS
+ #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
+ 
+ #undef TARGET_SCHED_GET_INSN_CHECKED_DS
+ #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
+ 
+ #undef TARGET_SCHED_SPECULATE_INSN
+ #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
+ 
+ #undef TARGET_SCHED_NEEDS_BLOCK_P
+ #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
+ 
+ #undef TARGET_SCHED_GEN_SPEC_CHECK
+ #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
+ 
+ #undef TARGET_SCHED_SKIP_RTX_P
+ #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
+ 
+ #undef TARGET_FUNCTION_OK_FOR_SIBCALL
+ #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
+ #undef TARGET_ARG_PARTIAL_BYTES
+ #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
+ #undef TARGET_FUNCTION_ARG
+ #define TARGET_FUNCTION_ARG ia64_function_arg
+ #undef TARGET_FUNCTION_INCOMING_ARG
+ #define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
+ #undef TARGET_FUNCTION_ARG_ADVANCE
+ #define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
+ #undef TARGET_FUNCTION_ARG_PADDING
+ #define TARGET_FUNCTION_ARG_PADDING ia64_function_arg_padding
+ #undef TARGET_FUNCTION_ARG_BOUNDARY
+ #define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
+ 
+ #undef TARGET_ASM_OUTPUT_MI_THUNK
+ #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
+ #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+ #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
+ 
+ #undef TARGET_ASM_FILE_START
+ #define TARGET_ASM_FILE_START ia64_file_start
+ 
+ #undef TARGET_ASM_GLOBALIZE_DECL_NAME
+ #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
+ 
+ #undef TARGET_REGISTER_MOVE_COST
+ #define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
+ #undef TARGET_MEMORY_MOVE_COST
+ #define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
+ #undef TARGET_RTX_COSTS
+ #define TARGET_RTX_COSTS ia64_rtx_costs
+ #undef TARGET_ADDRESS_COST
+ #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
+ 
+ #undef TARGET_UNSPEC_MAY_TRAP_P
+ #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
+ 
+ #undef TARGET_MACHINE_DEPENDENT_REORG
+ #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
+ 
+ #undef TARGET_ENCODE_SECTION_INFO
+ #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
+ 
+ #undef  TARGET_SECTION_TYPE_FLAGS
+ #define TARGET_SECTION_TYPE_FLAGS  ia64_section_type_flags
+ 
+ #ifdef HAVE_AS_TLS
+ #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
+ #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
+ #endif
+ 
+ /* ??? Investigate.  */
+ #if 0
+ #undef TARGET_PROMOTE_PROTOTYPES
+ #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
+ #endif
+ 
+ #undef TARGET_FUNCTION_VALUE
+ #define TARGET_FUNCTION_VALUE ia64_function_value
+ #undef TARGET_LIBCALL_VALUE
+ #define TARGET_LIBCALL_VALUE ia64_libcall_value
+ #undef TARGET_FUNCTION_VALUE_REGNO_P
+ #define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
+ 
+ #undef TARGET_STRUCT_VALUE_RTX
+ #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
+ #undef TARGET_RETURN_IN_MEMORY
+ #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
+ #undef TARGET_SETUP_INCOMING_VARARGS
+ #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
+ #undef TARGET_STRICT_ARGUMENT_NAMING
+ #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
+ #undef TARGET_MUST_PASS_IN_STACK
+ #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
+ #undef TARGET_GET_RAW_RESULT_MODE
+ #define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
+ #undef TARGET_GET_RAW_ARG_MODE
+ #define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
+ 
+ #undef TARGET_MEMBER_TYPE_FORCES_BLK
+ #define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk
+ 
+ #undef TARGET_GIMPLIFY_VA_ARG_EXPR
+ #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
+ 
+ #undef TARGET_ASM_UNWIND_EMIT
+ #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
+ #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
+ #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY  ia64_asm_emit_except_personality
+ #undef TARGET_ASM_INIT_SECTIONS
+ #define TARGET_ASM_INIT_SECTIONS  ia64_asm_init_sections
+ 
+ #undef TARGET_DEBUG_UNWIND_INFO
+ #define TARGET_DEBUG_UNWIND_INFO  ia64_debug_unwind_info
+ 
+ #undef TARGET_SCALAR_MODE_SUPPORTED_P
+ #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
+ #undef TARGET_VECTOR_MODE_SUPPORTED_P
+ #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
+ 
+ #undef TARGET_LEGITIMATE_CONSTANT_P
+ #define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
+ #undef TARGET_LEGITIMATE_ADDRESS_P
+ #define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
+ 
+ #undef TARGET_LRA_P
+ #define TARGET_LRA_P hook_bool_void_false
+ 
+ #undef TARGET_CANNOT_FORCE_CONST_MEM
+ #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
+ 
+ #undef TARGET_MANGLE_TYPE
+ #define TARGET_MANGLE_TYPE ia64_mangle_type
+ 
+ #undef TARGET_INVALID_CONVERSION
+ #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
+ #undef TARGET_INVALID_UNARY_OP
+ #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
+ #undef TARGET_INVALID_BINARY_OP
+ #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
+ 
+ #undef TARGET_C_MODE_FOR_SUFFIX
+ #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
+ 
+ #undef TARGET_CAN_ELIMINATE
+ #define TARGET_CAN_ELIMINATE ia64_can_eliminate
+ 
+ #undef TARGET_TRAMPOLINE_INIT
+ #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
+ 
+ #undef TARGET_CAN_USE_DOLOOP_P
+ #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
+ #undef TARGET_INVALID_WITHIN_DOLOOP
+ #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
+ 
+ #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
+ #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
+ 
+ #undef TARGET_PREFERRED_RELOAD_CLASS
+ #define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
+ 
+ #undef TARGET_DELAY_SCHED2
+ #define TARGET_DELAY_SCHED2 true
+ 
+ /* Variable tracking should be run after all optimizations which
+    change order of insns.  It also needs a valid CFG.  */
+ #undef TARGET_DELAY_VARTRACK
+ #define TARGET_DELAY_VARTRACK true
+ 
+ #undef TARGET_VECTORIZE_VEC_PERM_CONST
+ #define TARGET_VECTORIZE_VEC_PERM_CONST ia64_vectorize_vec_perm_const
+ 
+ #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
+ #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p
+ 
+ #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
+ #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 0
+ 
+ #undef TARGET_HARD_REGNO_NREGS
+ #define TARGET_HARD_REGNO_NREGS ia64_hard_regno_nregs
+ #undef TARGET_HARD_REGNO_MODE_OK
+ #define TARGET_HARD_REGNO_MODE_OK ia64_hard_regno_mode_ok
+ 
+ #undef TARGET_MODES_TIEABLE_P
+ #define TARGET_MODES_TIEABLE_P ia64_modes_tieable_p
+ 
+ #undef TARGET_CAN_CHANGE_MODE_CLASS
+ #define TARGET_CAN_CHANGE_MODE_CLASS ia64_can_change_mode_class
+ 
+ #undef TARGET_CONSTANT_ALIGNMENT
+ #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
+ 
+ struct gcc_target targetm = TARGET_INITIALIZER;
+ 
+ /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
+    identifier as an argument, so the front end shouldn't look it up.  */
+ 
+ static bool
+ ia64_attribute_takes_identifier_p (const_tree attr_id)
+ {
+   if (is_attribute_p ("model", attr_id))
+     return true;
+ #if TARGET_ABI_OPEN_VMS
+   if (is_attribute_p ("common_object", attr_id))
+     return true;
+ #endif
+   return false;
+ }
+ 
+ typedef enum
+   {
+     ADDR_AREA_NORMAL,	/* normal address area */
+     ADDR_AREA_SMALL	/* addressable by "addl" (-2MB < addr < 2MB) */
+   }
+ ia64_addr_area;
+ 
+ static GTY(()) tree small_ident1;
+ static GTY(()) tree small_ident2;
+ 
+ static void
+ init_idents (void)
+ {
+   if (small_ident1 == 0)
+     {
+       small_ident1 = get_identifier ("small");
+       small_ident2 = get_identifier ("__small__");
+     }
+ }
+ 
+ /* Retrieve the address area that has been chosen for the given decl.  */
+ 
+ static ia64_addr_area
+ ia64_get_addr_area (tree decl)
+ {
+   tree model_attr;
+ 
+   model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
+   if (model_attr)
+     {
+       tree id;
+ 
+       init_idents ();
+       id = TREE_VALUE (TREE_VALUE (model_attr));
+       if (id == small_ident1 || id == small_ident2)
+ 	return ADDR_AREA_SMALL;
+     }
+   return ADDR_AREA_NORMAL;
+ }
+ 
+ static tree
+ ia64_handle_model_attribute (tree *node, tree name, tree args,
+ 			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+ {
+   ia64_addr_area addr_area = ADDR_AREA_NORMAL;
+   ia64_addr_area area;
+   tree arg, decl = *node;
+ 
+   init_idents ();
+   arg = TREE_VALUE (args);
+   if (arg == small_ident1 || arg == small_ident2)
+     {
+       addr_area = ADDR_AREA_SMALL;
+     }
+   else
+     {
+       warning (OPT_Wattributes, "invalid argument of %qE attribute",
+ 	       name);
+       *no_add_attrs = true;
+     }
+ 
+   switch (TREE_CODE (decl))
+     {
+     case VAR_DECL:
+       if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
+ 	   == FUNCTION_DECL)
+ 	  && !TREE_STATIC (decl))
+ 	{
+ 	  error_at (DECL_SOURCE_LOCATION (decl),
+ 		    "an address area attribute cannot be specified for "
+ 		    "local variables");
+ 	  *no_add_attrs = true;
+ 	}
+       area = ia64_get_addr_area (decl);
+       if (area != ADDR_AREA_NORMAL && addr_area != area)
+ 	{
+ 	  error ("address area of %q+D conflicts with previous "
+ 		 "declaration", decl);
+ 	  *no_add_attrs = true;
+ 	}
+       break;
+ 
+     case FUNCTION_DECL:
+       error_at (DECL_SOURCE_LOCATION (decl),
+ 		"address area attribute cannot be specified for "
+ 		"functions");
+       *no_add_attrs = true;
+       break;
+ 
+     default:
+       warning (OPT_Wattributes, "%qE attribute ignored",
+ 	       name);
+       *no_add_attrs = true;
+       break;
+     }
+ 
+   return NULL_TREE;
+ }
+ 
+ /* Part of the low level implementation of DEC Ada pragma Common_Object which
+    enables the shared use of variables stored in overlaid linker areas
+    corresponding to the use of Fortran COMMON.  */
+ 
+ static tree
+ ia64_vms_common_object_attribute (tree *node, tree name, tree args,
+ 				  int flags ATTRIBUTE_UNUSED,
+ 				  bool *no_add_attrs)
+ {
+     tree decl = *node;
+     tree id;
+ 
+     gcc_assert (DECL_P (decl));
+   
+     DECL_COMMON (decl) = 1;
+     id = TREE_VALUE (args);
+     if (TREE_CODE (id) != IDENTIFIER_NODE && TREE_CODE (id) != STRING_CST)
+       {
+ 	error ("%qE attribute requires a string constant argument", name);
+ 	*no_add_attrs = true;
+ 	return NULL_TREE;
+       }
+     return NULL_TREE;
+ }
+ 
+ /* Part of the low level implementation of DEC Ada pragma Common_Object.  */
+ 
+ void
+ ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
+ 				     unsigned HOST_WIDE_INT size,
+ 				     unsigned int align)
+ {
+   tree attr = DECL_ATTRIBUTES (decl);
+ 
+   if (attr)
+     attr = lookup_attribute ("common_object", attr);
+   if (attr)
+     {
+       tree id = TREE_VALUE (TREE_VALUE (attr));
+       const char *name;
+ 
+       if (TREE_CODE (id) == IDENTIFIER_NODE)
+         name = IDENTIFIER_POINTER (id);
+       else if (TREE_CODE (id) == STRING_CST)
+         name = TREE_STRING_POINTER (id);
+       else
+         abort ();
+ 
+       fprintf (file, "\t.vms_common\t\"%s\",", name);
+     }
+   else
+     fprintf (file, "%s", COMMON_ASM_OP);
+ 
+   /*  Code from elfos.h.  */
+   assemble_name (file, name);
+   fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED",%u",
+            size, align / BITS_PER_UNIT);
+ 
+   fputc ('\n', file);
+ }
+ 
+ static void
+ ia64_encode_addr_area (tree decl, rtx symbol)
+ {
+   int flags;
+ 
+   flags = SYMBOL_REF_FLAGS (symbol);
+   switch (ia64_get_addr_area (decl))
+     {
+     case ADDR_AREA_NORMAL: break;
+     case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
+     default: gcc_unreachable ();
+     }
+   SYMBOL_REF_FLAGS (symbol) = flags;
+ }
+ 
+ static void
+ ia64_encode_section_info (tree decl, rtx rtl, int first)
+ {
+   default_encode_section_info (decl, rtl, first);
+ 
+   /* Careful not to prod global register variables.  */
+   if (TREE_CODE (decl) == VAR_DECL
+       && GET_CODE (DECL_RTL (decl)) == MEM
+       && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
+       && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
+     ia64_encode_addr_area (decl, XEXP (rtl, 0));
+ }
+ 
+ /* Return 1 if the operands of a move are ok.  */
+ 
+ int
+ ia64_move_ok (rtx dst, rtx src)
+ {
+   /* If we're under init_recog_no_volatile, we'll not be able to use
+      memory_operand.  So check the code directly and don't worry about
+      the validity of the underlying address, which should have been
+      checked elsewhere anyway.  */
+   if (GET_CODE (dst) != MEM)
+     return 1;
+   if (GET_CODE (src) == MEM)
+     return 0;
+   if (register_operand (src, VOIDmode))
+     return 1;
+ 
+   /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0.  */
+   if (INTEGRAL_MODE_P (GET_MODE (dst)))
+     return src == const0_rtx;
+   else
+     return satisfies_constraint_G (src);
+ }
+ 
+ /* Return 1 if the operands are ok for a floating point load pair.  */
+ 
+ int
+ ia64_load_pair_ok (rtx dst, rtx src)
+ {
+   /* ??? There is a thinko in the implementation of the "x" constraint and the
+      FP_REGS class.  The constraint will also reject (reg f30:TI) so we must
+      also return false for it.  */
+   if (GET_CODE (dst) != REG
+       || !(FP_REGNO_P (REGNO (dst)) && FP_REGNO_P (REGNO (dst) + 1)))
+     return 0;
+   if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
+     return 0;
+   switch (GET_CODE (XEXP (src, 0)))
+     {
+     case REG:
+     case POST_INC:
+       break;
+     case POST_DEC:
+       return 0;
+     case POST_MODIFY:
+       {
+ 	rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
+ 
+ 	if (GET_CODE (adjust) != CONST_INT
+ 	    || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
+ 	  return 0;
+       }
+       break;
+     default:
+       abort ();
+     }
+   return 1;
+ }
+ 
+ int
+ addp4_optimize_ok (rtx op1, rtx op2)
+ {
+   return (basereg_operand (op1, GET_MODE(op1)) !=
+ 	  basereg_operand (op2, GET_MODE(op2)));
+ }
+ 
+ /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
+    Return the length of the field, or <= 0 on failure.  */
+ 
+ int
+ ia64_depz_field_mask (rtx rop, rtx rshift)
+ {
+   unsigned HOST_WIDE_INT op = INTVAL (rop);
+   unsigned HOST_WIDE_INT shift = INTVAL (rshift);
+ 
+   /* Get rid of the zero bits we're shifting in.  */
+   op >>= shift;
+ 
+   /* We must now have a solid block of 1's at bit 0.  */
+   return exact_log2 (op + 1);
+ }
+ 
+ /* Return the TLS model to use for ADDR.  */
+ 
+ static enum tls_model
+ tls_symbolic_operand_type (rtx addr)
+ {
+   enum tls_model tls_kind = TLS_MODEL_NONE;
+ 
+   if (GET_CODE (addr) == CONST)
+     {
+       if (GET_CODE (XEXP (addr, 0)) == PLUS
+ 	  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
+         tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
+     }
+   else if (GET_CODE (addr) == SYMBOL_REF)
+     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
+ 
+   return tls_kind;
+ }
+ 
+ /* Returns true if REG (assumed to be a `reg' RTX) is valid for use
+    as a base register.  */
+ 
+ static inline bool
+ ia64_reg_ok_for_base_p (const_rtx reg, bool strict)
+ {
+   if (strict
+       && REGNO_OK_FOR_BASE_P (REGNO (reg)))
+     return true;
+   else if (!strict
+ 	   && (GENERAL_REGNO_P (REGNO (reg))
+ 	       || !HARD_REGISTER_P (reg)))
+     return true;
+   else
+     return false;
+ }
+ 
+ static bool
+ ia64_legitimate_address_reg (const_rtx reg, bool strict)
+ {
+   if ((REG_P (reg) && ia64_reg_ok_for_base_p (reg, strict))
+       || (GET_CODE (reg) == SUBREG && REG_P (XEXP (reg, 0))
+ 	  && ia64_reg_ok_for_base_p (XEXP (reg, 0), strict)))
+     return true;
+ 
+   return false;
+ }
+ 
+ static bool
+ ia64_legitimate_address_disp (const_rtx reg, const_rtx disp, bool strict)
+ {
+   if (GET_CODE (disp) == PLUS
+       && rtx_equal_p (reg, XEXP (disp, 0))
+       && (ia64_legitimate_address_reg (XEXP (disp, 1), strict)
+ 	  || (CONST_INT_P (XEXP (disp, 1))
+ 	      && IN_RANGE (INTVAL (XEXP (disp, 1)), -256, 255))))
+     return true;
+ 
+   return false;
+ }
+ 
+ /* Implement TARGET_LEGITIMATE_ADDRESS_P.  */
+ 
+ static bool
+ ia64_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED,
+ 			   rtx x, bool strict)
+ {
+   if (ia64_legitimate_address_reg (x, strict))
+     return true;
+   else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC)
+ 	   && ia64_legitimate_address_reg (XEXP (x, 0), strict)
+ 	   && XEXP (x, 0) != arg_pointer_rtx) 
+     return true;
+   else if (GET_CODE (x) == POST_MODIFY
+ 	   && ia64_legitimate_address_reg (XEXP (x, 0), strict)
+ 	   && XEXP (x, 0) != arg_pointer_rtx
+ 	   && ia64_legitimate_address_disp (XEXP (x, 0), XEXP (x, 1), strict))
+     return true;
+   else
+     return false;
+ }
+ 
+ /* Return true if X is a constant that is valid for some immediate
+    field in an instruction.  */
+ 
+ static bool
+ ia64_legitimate_constant_p (machine_mode mode, rtx x)
+ {
+   switch (GET_CODE (x))
+     {
+     case CONST_INT:
+     case LABEL_REF:
+       return true;
+ 
+     case CONST_DOUBLE:
+       if (GET_MODE (x) == VOIDmode || mode == SFmode || mode == DFmode)
+ 	return true;
+       return satisfies_constraint_G (x);
+ 
+     case CONST:
+     case SYMBOL_REF:
+       /* ??? Short term workaround for PR 28490.  We must make the code here
+ 	 match the code in ia64_expand_move and move_operand, even though they
+ 	 are both technically wrong.  */
+       if (tls_symbolic_operand_type (x) == 0)
+ 	{
+ 	  HOST_WIDE_INT addend = 0;
+ 	  rtx op = x;
+ 
+ 	  if (GET_CODE (op) == CONST
+ 	      && GET_CODE (XEXP (op, 0)) == PLUS
+ 	      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
+ 	    {
+ 	      addend = INTVAL (XEXP (XEXP (op, 0), 1));
+ 	      op = XEXP (XEXP (op, 0), 0);
+ 	    }
+ 
+           if (any_offset_symbol_operand (op, mode)
+               || function_operand (op, mode))
+             return true;
+ 	  if (aligned_offset_symbol_operand (op, mode))
+ 	    return (addend & 0x3fff) == 0;
+ 	  return false;
+ 	}
+       return false;
+ 
+     case CONST_VECTOR:
+       if (mode == V2SFmode)
+ 	return satisfies_constraint_Y (x);
+ 
+       return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+ 	      && GET_MODE_SIZE (mode) <= 8);
+ 
+     default:
+       return false;
+     }
+ }
+ 
+ /* Don't allow TLS addresses to get spilled to memory.  */
+ 
+ static bool
+ ia64_cannot_force_const_mem (machine_mode mode, rtx x)
+ {
+   if (mode == RFmode)
+     return true;
+   return tls_symbolic_operand_type (x) != 0;
+ }
+ 
+ /* Expand a symbolic constant load.  */
+ 
+ bool
+ ia64_expand_load_address (rtx dest, rtx src)
+ {
+   gcc_assert (GET_CODE (dest) == REG);
+ 
+   /* ILP32 mode still loads 64-bits of data from the GOT.  This avoids
+      having to pointer-extend the value afterward.  Other forms of address
+      computation below are also more natural to compute as 64-bit quantities.
+      If we've been given an SImode destination register, change it.  */
+   if (GET_MODE (dest) != Pmode)
+     dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
+ 			       byte_lowpart_offset (Pmode, GET_MODE (dest)));
+ 
+   if (TARGET_NO_PIC)
+     return false;
+   if (small_addr_symbolic_operand (src, VOIDmode))
+     return false;
+ 
+   if (TARGET_AUTO_PIC)
+     emit_insn (gen_load_gprel64 (dest, src));
+   else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
+     emit_insn (gen_load_fptr (dest, src));
+   else if (sdata_symbolic_operand (src, VOIDmode))
+     emit_insn (gen_load_gprel (dest, src));
+   else if (local_symbolic_operand64 (src, VOIDmode))
+     {
+       /* We want to use @gprel rather than @ltoff relocations for local
+ 	 symbols:
+ 	  - @gprel does not require dynamic linker
+ 	  - and does not use .sdata section
+ 	 https://gcc.gnu.org/bugzilla/60465 */
+       emit_insn (gen_load_gprel64 (dest, src));
+     }
+   else
+     {
+       HOST_WIDE_INT addend = 0;
+       rtx tmp;
+ 
+       /* We did split constant offsets in ia64_expand_move, and we did try
+ 	 to keep them split in move_operand, but we also allowed reload to
+ 	 rematerialize arbitrary constants rather than spill the value to
+ 	 the stack and reload it.  So we have to be prepared here to split
+ 	 them apart again.  */
+       if (GET_CODE (src) == CONST)
+ 	{
+ 	  HOST_WIDE_INT hi, lo;
+ 
+ 	  hi = INTVAL (XEXP (XEXP (src, 0), 1));
+ 	  lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
+ 	  hi = hi - lo;
+ 
+ 	  if (lo != 0)
+ 	    {
+ 	      addend = lo;
+ 	      src = plus_constant (Pmode, XEXP (XEXP (src, 0), 0), hi);
+ 	    }
+ 	}
+ 
+       tmp = gen_rtx_HIGH (Pmode, src);
+       tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
+       emit_insn (gen_rtx_SET (dest, tmp));
+ 
+       tmp = gen_rtx_LO_SUM (Pmode, gen_const_mem (Pmode, dest), src);
+       emit_insn (gen_rtx_SET (dest, tmp));
+ 
+       if (addend)
+ 	{
+ 	  tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
+ 	  emit_insn (gen_rtx_SET (dest, tmp));
+ 	}
+     }
+ 
+   return true;
+ }
+ 
+ static GTY(()) rtx gen_tls_tga;
+ static rtx
+ gen_tls_get_addr (void)
+ {
+   if (!gen_tls_tga)
+     gen_tls_tga = init_one_libfunc ("__tls_get_addr");
+   return gen_tls_tga;
+ }
+ 
+ static GTY(()) rtx thread_pointer_rtx;
+ static rtx
+ gen_thread_pointer (void)
+ {
+   if (!thread_pointer_rtx)
+     thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
+   return thread_pointer_rtx;
+ }
+ 
+ static rtx
+ ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
+ 			 rtx orig_op1, HOST_WIDE_INT addend)
+ {
+   rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp;
+   rtx_insn *insns;
+   rtx orig_op0 = op0;
+   HOST_WIDE_INT addend_lo, addend_hi;
+ 
+   switch (tls_kind)
+     {
+     case TLS_MODEL_GLOBAL_DYNAMIC:
+       start_sequence ();
+ 
+       tga_op1 = gen_reg_rtx (Pmode);
+       emit_insn (gen_load_dtpmod (tga_op1, op1));
+ 
+       tga_op2 = gen_reg_rtx (Pmode);
+       emit_insn (gen_load_dtprel (tga_op2, op1));
+ 
+       tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
+ 					 LCT_CONST, Pmode,
+ 					 tga_op1, Pmode, tga_op2, Pmode);
+ 
+       insns = get_insns ();
+       end_sequence ();
+ 
+       if (GET_MODE (op0) != Pmode)
+ 	op0 = tga_ret;
+       emit_libcall_block (insns, op0, tga_ret, op1);
+       break;
+ 
+     case TLS_MODEL_LOCAL_DYNAMIC:
+       /* ??? This isn't the completely proper way to do local-dynamic
+ 	 If the call to __tls_get_addr is used only by a single symbol,
+ 	 then we should (somehow) move the dtprel to the second arg
+ 	 to avoid the extra add.  */
+       start_sequence ();
+ 
+       tga_op1 = gen_reg_rtx (Pmode);
+       emit_insn (gen_load_dtpmod (tga_op1, op1));
+ 
+       tga_op2 = const0_rtx;
+ 
+       tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
+ 					 LCT_CONST, Pmode,
+ 					 tga_op1, Pmode, tga_op2, Pmode);
+ 
+       insns = get_insns ();
+       end_sequence ();
+ 
+       tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+ 				UNSPEC_LD_BASE);
+       tmp = gen_reg_rtx (Pmode);
+       emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
+ 
+       if (!register_operand (op0, Pmode))
+ 	op0 = gen_reg_rtx (Pmode);
+       if (TARGET_TLS64)
+ 	{
+ 	  emit_insn (gen_load_dtprel (op0, op1));
+ 	  emit_insn (gen_adddi3 (op0, tmp, op0));
+ 	}
+       else
+ 	emit_insn (gen_add_dtprel (op0, op1, tmp));
+       break;
+ 
+     case TLS_MODEL_INITIAL_EXEC:
+       addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
+       addend_hi = addend - addend_lo;
+ 
+       op1 = plus_constant (Pmode, op1, addend_hi);
+       addend = addend_lo;
+ 
+       tmp = gen_reg_rtx (Pmode);
+       emit_insn (gen_load_tprel (tmp, op1));
+ 
+       if (!register_operand (op0, Pmode))
+ 	op0 = gen_reg_rtx (Pmode);
+       emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
+       break;
+ 
+     case TLS_MODEL_LOCAL_EXEC:
+       if (!register_operand (op0, Pmode))
+ 	op0 = gen_reg_rtx (Pmode);
+ 
+       op1 = orig_op1;
+       addend = 0;
+       if (TARGET_TLS64)
+ 	{
+ 	  emit_insn (gen_load_tprel (op0, op1));
+ 	  emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
+ 	}
+       else
+ 	emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
+       break;
+ 
+     default:
+       gcc_unreachable ();
+     }
+ 
+   if (addend)
+     op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
+ 			       orig_op0, 1, OPTAB_DIRECT);
+   if (orig_op0 == op0)
+     return NULL_RTX;
+   if (GET_MODE (orig_op0) == Pmode)
+     return op0;
+   return gen_lowpart (GET_MODE (orig_op0), op0);
+ }
+ 
+ rtx
+ ia64_expand_move (rtx op0, rtx op1)
+ {
+   machine_mode mode = GET_MODE (op0);
+ 
+   if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
+     op1 = force_reg (mode, op1);
+ 
+   if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
+     {
+       HOST_WIDE_INT addend = 0;
+       enum tls_model tls_kind;
+       rtx sym = op1;
+ 
+       if (GET_CODE (op1) == CONST
+ 	  && GET_CODE (XEXP (op1, 0)) == PLUS
+ 	  && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
+ 	{
+ 	  addend = INTVAL (XEXP (XEXP (op1, 0), 1));
+ 	  sym = XEXP (XEXP (op1, 0), 0);
+ 	}
+ 
+       tls_kind = tls_symbolic_operand_type (sym);
+       if (tls_kind)
+ 	return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
+ 
+       if (any_offset_symbol_operand (sym, mode))
+ 	addend = 0;
+       else if (aligned_offset_symbol_operand (sym, mode))
+ 	{
+ 	  HOST_WIDE_INT addend_lo, addend_hi;
+ 	      
+ 	  addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
+ 	  addend_hi = addend - addend_lo;
+ 
+ 	  if (addend_lo != 0)
+ 	    {
+ 	      op1 = plus_constant (mode, sym, addend_hi);
+ 	      addend = addend_lo;
+ 	    }
+ 	  else
+ 	    addend = 0;
+ 	}
+       else
+ 	op1 = sym;
+ 
+       if (reload_completed)
+ 	{
+ 	  /* We really should have taken care of this offset earlier.  */
+ 	  gcc_assert (addend == 0);
+ 	  if (ia64_expand_load_address (op0, op1))
+ 	    return NULL_RTX;
+ 	}
+ 
+       if (addend)
+ 	{
+ 	  rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
+ 
+ 	  emit_insn (gen_rtx_SET (subtarget, op1));
+ 
+ 	  op1 = expand_simple_binop (mode, PLUS, subtarget,
+ 				     GEN_INT (addend), op0, 1, OPTAB_DIRECT);
+ 	  if (op0 == op1)
+ 	    return NULL_RTX;
+ 	}
+     }
+ 
+   return op1;
+ }
+ 
+ /* Split a move from OP1 to OP0 conditional on COND.  */
+ 
+ void
+ ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
+ {
+   rtx_insn *insn, *first = get_last_insn ();
+ 
+   emit_move_insn (op0, op1);
+ 
+   for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
+     if (INSN_P (insn))
+       PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
+ 					  PATTERN (insn));
+ }
+ 
+ /* Split a post-reload TImode or TFmode reference into two DImode
+    components.  This is made extra difficult by the fact that we do
+    not get any scratch registers to work with, because reload cannot
+    be prevented from giving us a scratch that overlaps the register
+    pair involved.  So instead, when addressing memory, we tweak the
+    pointer register up and back down with POST_INCs.  Or up and not
+    back down when we can get away with it.
+ 
+    REVERSED is true when the loads must be done in reversed order
+    (high word first) for correctness.  DEAD is true when the pointer
+    dies with the second insn we generate and therefore the second
+    address must not carry a postmodify.
+ 
+    May return an insn which is to be emitted after the moves.  */
+ 
+ static rtx
+ ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
+ {
+   rtx fixup = 0;
+ 
+   switch (GET_CODE (in))
+     {
+     case REG:
+       out[reversed] = gen_rtx_REG (DImode, REGNO (in));
+       out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
+       break;
+ 
+     case CONST_INT:
+     case CONST_DOUBLE:
+       /* Cannot occur reversed.  */
+       gcc_assert (!reversed);
+       
+       if (GET_MODE (in) != TFmode)
+ 	split_double (in, &out[0], &out[1]);
+       else
+ 	/* split_double does not understand how to split a TFmode
+ 	   quantity into a pair of DImode constants.  */
+ 	{
+ 	  unsigned HOST_WIDE_INT p[2];
+ 	  long l[4];  /* TFmode is 128 bits */
+ 
+ 	  real_to_target (l, CONST_DOUBLE_REAL_VALUE (in), TFmode);
+ 
+ 	  if (FLOAT_WORDS_BIG_ENDIAN)
+ 	    {
+ 	      p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
+ 	      p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
+ 	    }
+ 	  else
+ 	    {
+ 	      p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
+ 	      p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
+ 	    }
+ 	  out[0] = GEN_INT (p[0]);
+ 	  out[1] = GEN_INT (p[1]);
+ 	}
+       break;
+ 
+     case MEM:
+       {
+ 	rtx base = XEXP (in, 0);
+ 	rtx offset;
+ 
+ 	switch (GET_CODE (base))
+ 	  {
+ 	  case REG:
+ 	    if (!reversed)
+ 	      {
+ 		out[0] = adjust_automodify_address
+ 		  (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
+ 		out[1] = adjust_automodify_address
+ 		  (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
+ 	      }
+ 	    else
+ 	      {
+ 		/* Reversal requires a pre-increment, which can only
+ 		   be done as a separate insn.  */
+ 		emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
+ 		out[0] = adjust_automodify_address
+ 		  (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
+ 		out[1] = adjust_address (in, DImode, 0);
+ 	      }
+ 	    break;
+ 
+ 	  case POST_INC:
+ 	    gcc_assert (!reversed && !dead);
+ 	    
+ 	    /* Just do the increment in two steps.  */
+ 	    out[0] = adjust_automodify_address (in, DImode, 0, 0);
+ 	    out[1] = adjust_automodify_address (in, DImode, 0, 8);
+ 	    break;
+ 
+ 	  case POST_DEC:
+ 	    gcc_assert (!reversed && !dead);
+ 	    
+ 	    /* Add 8, subtract 24.  */
+ 	    base = XEXP (base, 0);
+ 	    out[0] = adjust_automodify_address
+ 	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
+ 	    out[1] = adjust_automodify_address
+ 	      (in, DImode,
+ 	       gen_rtx_POST_MODIFY (Pmode, base,
+ 				    plus_constant (Pmode, base, -24)),
+ 	       8);
+ 	    break;
+ 
+ 	  case POST_MODIFY:
+ 	    gcc_assert (!reversed && !dead);
+ 
+ 	    /* Extract and adjust the modification.  This case is
+ 	       trickier than the others, because we might have an
+ 	       index register, or we might have a combined offset that
+ 	       doesn't fit a signed 9-bit displacement field.  We can
+ 	       assume the incoming expression is already legitimate.  */
+ 	    offset = XEXP (base, 1);
+ 	    base = XEXP (base, 0);
+ 
+ 	    out[0] = adjust_automodify_address
+ 	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
+ 
+ 	    if (GET_CODE (XEXP (offset, 1)) == REG)
+ 	      {
+ 		/* Can't adjust the postmodify to match.  Emit the
+ 		   original, then a separate addition insn.  */
+ 		out[1] = adjust_automodify_address (in, DImode, 0, 8);
+ 		fixup = gen_adddi3 (base, base, GEN_INT (-8));
+ 	      }
+ 	    else
+ 	      {
+ 		gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
+ 		if (INTVAL (XEXP (offset, 1)) < -256 + 8)
+ 		  {
+ 		    /* Again the postmodify cannot be made to match,
+ 		       but in this case it's more efficient to get rid
+ 		       of the postmodify entirely and fix up with an
+ 		       add insn.  */
+ 		    out[1] = adjust_automodify_address (in, DImode, base, 8);
+ 		    fixup = gen_adddi3
+ 		      (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
+ 		  }
+ 		else
+ 		  {
+ 		    /* Combined offset still fits in the displacement field.
+ 		       (We cannot overflow it at the high end.)  */
+ 		    out[1] = adjust_automodify_address
+ 		      (in, DImode, gen_rtx_POST_MODIFY
+ 		       (Pmode, base, gen_rtx_PLUS
+ 			(Pmode, base,
+ 			 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
+ 		       8);
+ 		  }
+ 	      }
+ 	    break;
+ 
+ 	  default:
+ 	    gcc_unreachable ();
+ 	  }
+ 	break;
+       }
+ 
+     default:
+       gcc_unreachable ();
+     }
+ 
+   return fixup;
+ }
+ 
+ /* Split a TImode or TFmode move instruction after reload.
+    This is used by *movtf_internal and *movti_internal.  */
+ void
+ ia64_split_tmode_move (rtx operands[])
+ {
+   rtx in[2], out[2], insn;
+   rtx fixup[2];
+   bool dead = false;
+   bool reversed = false;
+ 
+   /* It is possible for reload to decide to overwrite a pointer with
+      the value it points to.  In that case we have to do the loads in
+      the appropriate order so that the pointer is not destroyed too
+      early.  Also we must not generate a postmodify for that second
+      load, or rws_access_regno will die.  And we must not generate a
+      postmodify for the second load if the destination register 
+      overlaps with the base register.  */
+   if (GET_CODE (operands[1]) == MEM
+       && reg_overlap_mentioned_p (operands[0], operands[1]))
+     {
+       rtx base = XEXP (operands[1], 0);
+       while (GET_CODE (base) != REG)
+ 	base = XEXP (base, 0);
+ 
+       if (REGNO (base) == REGNO (operands[0]))
+ 	reversed = true;
+ 
+       if (refers_to_regno_p (REGNO (operands[0]),
+ 			     REGNO (operands[0])+2,
+ 			     base, 0))
+ 	dead = true;
+     }
+   /* Another reason to do the moves in reversed order is if the first
+      element of the target register pair is also the second element of
+      the source register pair.  */
+   if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
+       && REGNO (operands[0]) == REGNO (operands[1]) + 1)
+     reversed = true;
+ 
+   fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
+   fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
+ 
+ #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP)				\
+   if (GET_CODE (EXP) == MEM						\
+       && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY			\
+ 	  || GET_CODE (XEXP (EXP, 0)) == POST_INC			\
+ 	  || GET_CODE (XEXP (EXP, 0)) == POST_DEC))			\
+     add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
+ 
+   insn = emit_insn (gen_rtx_SET (out[0], in[0]));
+   MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
+   MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
+ 
+   insn = emit_insn (gen_rtx_SET (out[1], in[1]));
+   MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
+   MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
+ 
+   if (fixup[0])
+     emit_insn (fixup[0]);
+   if (fixup[1])
+     emit_insn (fixup[1]);
+ 
+ #undef MAYBE_ADD_REG_INC_NOTE
+ }
+ 
+ /* ??? Fixing GR->FR XFmode moves during reload is hard.  You need to go
+    through memory plus an extra GR scratch register.  Except that you can
+    either get the first from TARGET_SECONDARY_MEMORY_NEEDED or the second
+    from SECONDARY_RELOAD_CLASS, but not both.
+ 
+    We got into problems in the first place by allowing a construct like
+    (subreg:XF (reg:TI)), which we got from a union containing a long double.
+    This solution attempts to prevent this situation from occurring.  When
+    we see something like the above, we spill the inner register to memory.  */
+ 
+ static rtx
+ spill_xfmode_rfmode_operand (rtx in, int force, machine_mode mode)
+ {
+   if (GET_CODE (in) == SUBREG
+       && GET_MODE (SUBREG_REG (in)) == TImode
+       && GET_CODE (SUBREG_REG (in)) == REG)
+     {
+       rtx memt = assign_stack_temp (TImode, 16);
+       emit_move_insn (memt, SUBREG_REG (in));
+       return adjust_address (memt, mode, 0);
+     }
+   else if (force && GET_CODE (in) == REG)
+     {
+       rtx memx = assign_stack_temp (mode, 16);
+       emit_move_insn (memx, in);
+       return memx;
+     }
+   else
+     return in;
+ }
+ 
+ /* Expand the movxf or movrf pattern (MODE says which) with the given
+    OPERANDS, returning true if the pattern should then invoke
+    DONE.  */
+ 
+ bool
+ ia64_expand_movxf_movrf (machine_mode mode, rtx operands[])
+ {
+   rtx op0 = operands[0];
+ 
+   if (GET_CODE (op0) == SUBREG)
+     op0 = SUBREG_REG (op0);
+ 
+   /* We must support XFmode loads into general registers for stdarg/vararg,
+      unprototyped calls, and a rare case where a long double is passed as
+      an argument after a float HFA fills the FP registers.  We split them into
+      DImode loads for convenience.  We also need to support XFmode stores
+      for the last case.  This case does not happen for stdarg/vararg routines,
+      because we do a block store to memory of unnamed arguments.  */
+ 
+   if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
+     {
+       rtx out[2];
+ 
+       /* We're hoping to transform everything that deals with XFmode
+ 	 quantities and GR registers early in the compiler.  */
+       gcc_assert (can_create_pseudo_p ());
+ 
+       /* Struct to register can just use TImode instead.  */
+       if ((GET_CODE (operands[1]) == SUBREG
+ 	   && GET_MODE (SUBREG_REG (operands[1])) == TImode)
+ 	  || (GET_CODE (operands[1]) == REG
+ 	      && GR_REGNO_P (REGNO (operands[1]))))
+ 	{
+ 	  rtx op1 = operands[1];
+ 
+ 	  if (GET_CODE (op1) == SUBREG)
+ 	    op1 = SUBREG_REG (op1);
+ 	  else
+ 	    op1 = gen_rtx_REG (TImode, REGNO (op1));
+ 
+ 	  emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
+ 	  return true;
+ 	}
+ 
+       if (GET_CODE (operands[1]) == CONST_DOUBLE)
+ 	{
+ 	  /* Don't word-swap when reading in the constant.  */
+ 	  emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
+ 			  operand_subword (operands[1], WORDS_BIG_ENDIAN,
+ 					   0, mode));
+ 	  emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
+ 			  operand_subword (operands[1], !WORDS_BIG_ENDIAN,
+ 					   0, mode));
+ 	  return true;
+ 	}
+ 
+       /* If the quantity is in a register not known to be GR, spill it.  */
+       if (register_operand (operands[1], mode))
+ 	operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
+ 
+       gcc_assert (GET_CODE (operands[1]) == MEM);
+ 
+       /* Don't word-swap when reading in the value.  */
+       out[0] = gen_rtx_REG (DImode, REGNO (op0));
+       out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
+ 
+       emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
+       emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
+       return true;
+     }
+ 
+   if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
+     {
+       /* We're hoping to transform everything that deals with XFmode
+ 	 quantities and GR registers early in the compiler.  */
+       gcc_assert (can_create_pseudo_p ());
+ 
+       /* Op0 can't be a GR_REG here, as that case is handled above.
+ 	 If op0 is a register, then we spill op1, so that we now have a
+ 	 MEM operand.  This requires creating an XFmode subreg of a TImode reg
+ 	 to force the spill.  */
+       if (register_operand (operands[0], mode))
+ 	{
+ 	  rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
+ 	  op1 = gen_rtx_SUBREG (mode, op1, 0);
+ 	  operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
+ 	}
+ 
+       else
+ 	{
+ 	  rtx in[2];
+ 
+ 	  gcc_assert (GET_CODE (operands[0]) == MEM);
+ 
+ 	  /* Don't word-swap when writing out the value.  */
+ 	  in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
+ 	  in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
+ 
+ 	  emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
+ 	  emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
+ 	  return true;
+ 	}
+     }
+ 
+   if (!reload_in_progress && !reload_completed)
+     {
+       operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
+ 
+       if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
+ 	{
+ 	  rtx memt, memx, in = operands[1];
+ 	  if (CONSTANT_P (in))
+ 	    in = validize_mem (force_const_mem (mode, in));
+ 	  if (GET_CODE (in) == MEM)
+ 	    memt = adjust_address (in, TImode, 0);
+ 	  else
+ 	    {
+ 	      memt = assign_stack_temp (TImode, 16);
+ 	      memx = adjust_address (memt, mode, 0);
+ 	      emit_move_insn (memx, in);
+ 	    }
+ 	  emit_move_insn (op0, memt);
+ 	  return true;
+ 	}
+ 
+       if (!ia64_move_ok (operands[0], operands[1]))
+ 	operands[1] = force_reg (mode, operands[1]);
+     }
+ 
+   return false;
+ }
+ 
+ /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
+    with the expression that holds the compare result (in VOIDmode).  */
+ 
+ static GTY(()) rtx cmptf_libfunc;
+ 
+ void
+ ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
+ {
+   enum rtx_code code = GET_CODE (*expr);
+   rtx cmp;
+ 
+   /* If we have a BImode input, then we already have a compare result, and
+      do not need to emit another comparison.  */
+   if (GET_MODE (*op0) == BImode)
+     {
+       gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
+       cmp = *op0;
+     }
+   /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
+      magic number as its third argument, that indicates what to do.
+      The return value is an integer to be compared against zero.  */
+   else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
+     {
+       enum qfcmp_magic {
+ 	QCMP_INV = 1,	/* Raise FP_INVALID on NaNs as a side effect.  */
+ 	QCMP_UNORD = 2,
+ 	QCMP_EQ = 4,
+ 	QCMP_LT = 8,
+ 	QCMP_GT = 16
+       };
+       int magic;
+       enum rtx_code ncode;
+       rtx ret;
+       
+       gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
+       switch (code)
+ 	{
+ 	  /* 1 = equal, 0 = not equal.  Equality operators do
+ 	     not raise FP_INVALID when given a NaN operand.  */
+ 	case EQ:        magic = QCMP_EQ;                  ncode = NE; break;
+ 	case NE:        magic = QCMP_EQ;                  ncode = EQ; break;
+ 	  /* isunordered() from C99.  */
+ 	case UNORDERED: magic = QCMP_UNORD;               ncode = NE; break;
+ 	case ORDERED:   magic = QCMP_UNORD;               ncode = EQ; break;
+ 	  /* Relational operators raise FP_INVALID when given
+ 	     a NaN operand.  */
+ 	case LT:        magic = QCMP_LT        |QCMP_INV; ncode = NE; break;
+ 	case LE:        magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
+ 	case GT:        magic = QCMP_GT        |QCMP_INV; ncode = NE; break;
+ 	case GE:        magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
+           /* Unordered relational operators do not raise FP_INVALID
+ 	     when given a NaN operand.  */
+ 	case UNLT:    magic = QCMP_LT        |QCMP_UNORD; ncode = NE; break;
+ 	case UNLE:    magic = QCMP_LT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
+ 	case UNGT:    magic = QCMP_GT        |QCMP_UNORD; ncode = NE; break;
+ 	case UNGE:    magic = QCMP_GT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
+ 	  /* Not supported.  */
+ 	case UNEQ:
+ 	case LTGT:
+ 	default: gcc_unreachable ();
+ 	}
+ 
+       start_sequence ();
+ 
+       ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode,
+ 				     *op0, TFmode, *op1, TFmode,
+ 				     GEN_INT (magic), DImode);
+       cmp = gen_reg_rtx (BImode);
+       emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (ncode, BImode,
+ 						   ret, const0_rtx)));
+ 
+       rtx_insn *insns = get_insns ();
+       end_sequence ();
+ 
+       emit_libcall_block (insns, cmp, cmp,
+ 			  gen_rtx_fmt_ee (code, BImode, *op0, *op1));
+       code = NE;
+     }
+   else
+     {
+       cmp = gen_reg_rtx (BImode);
+       emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
+       code = NE;
+     }
+ 
+   *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
+   *op0 = cmp;
+   *op1 = const0_rtx;
+ }
+ 
+ /* Generate an integral vector comparison.  Return true if the condition has
+    been reversed, and so the sense of the comparison should be inverted.  */
+ 
+ static bool
+ ia64_expand_vecint_compare (enum rtx_code code, machine_mode mode,
+ 			    rtx dest, rtx op0, rtx op1)
+ {
+   bool negate = false;
+   rtx x;
+ 
+   /* Canonicalize the comparison to EQ, GT, GTU.  */
+   switch (code)
+     {
+     case EQ:
+     case GT:
+     case GTU:
+       break;
+ 
+     case NE:
+     case LE:
+     case LEU:
+       code = reverse_condition (code);
+       negate = true;
+       break;
+ 
+     case GE:
+     case GEU:
+       code = reverse_condition (code);
+       negate = true;
+       /* FALLTHRU */
+ 
+     case LT:
+     case LTU:
+       code = swap_condition (code);
+       x = op0, op0 = op1, op1 = x;
+       break;
+ 
+     default:
+       gcc_unreachable ();
+     }
+ 
+   /* Unsigned parallel compare is not supported by the hardware.  Play some
+      tricks to turn this into a signed comparison against 0.  */
+   if (code == GTU)
+     {
+       switch (mode)
+ 	{
+ 	case E_V2SImode:
+ 	  {
+ 	    rtx t1, t2, mask;
+ 
+ 	    /* Subtract (-(INT MAX) - 1) from both operands to make
+ 	       them signed.  */
+ 	    mask = gen_int_mode (0x80000000, SImode);
+ 	    mask = gen_const_vec_duplicate (V2SImode, mask);
+ 	    mask = force_reg (mode, mask);
+ 	    t1 = gen_reg_rtx (mode);
+ 	    emit_insn (gen_subv2si3 (t1, op0, mask));
+ 	    t2 = gen_reg_rtx (mode);
+ 	    emit_insn (gen_subv2si3 (t2, op1, mask));
+ 	    op0 = t1;
+ 	    op1 = t2;
+ 	    code = GT;
+ 	  }
+ 	  break;
+ 
+ 	case E_V8QImode:
+ 	case E_V4HImode:
+ 	  /* Perform a parallel unsigned saturating subtraction.  */
+ 	  x = gen_reg_rtx (mode);
+ 	  emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, op0, op1)));
+ 
+ 	  code = EQ;
+ 	  op0 = x;
+ 	  op1 = CONST0_RTX (mode);
+ 	  negate = !negate;
+ 	  break;
+ 
+ 	default:
+ 	  gcc_unreachable ();
+ 	}
+     }
+ 
+   x = gen_rtx_fmt_ee (code, mode, op0, op1);
+   emit_insn (gen_rtx_SET (dest, x));
+ 
+   return negate;
+ }
+ 
+ /* Emit an integral vector conditional move.  */
+ 
+ void
+ ia64_expand_vecint_cmov (rtx operands[])
+ {
+   machine_mode mode = GET_MODE (operands[0]);
+   enum rtx_code code = GET_CODE (operands[3]);
+   bool negate;
+   rtx cmp, x, ot, of;
+ 
+   cmp = gen_reg_rtx (mode);
+   negate = ia64_expand_vecint_compare (code, mode, cmp,
+ 				       operands[4], operands[5]);
+ 
+   ot = operands[1+negate];
+   of = operands[2-negate];
+ 
+   if (ot == CONST0_RTX (mode))
+     {
+       if (of == CONST0_RTX (mode))
+ 	{
+ 	  emit_move_insn (operands[0], ot);
+ 	  return;
+ 	}
+ 
+       x = gen_rtx_NOT (mode, cmp);
+       x = gen_rtx_AND (mode, x, of);
+       emit_insn (gen_rtx_SET (operands[0], x));
+     }
+   else if (of == CONST0_RTX (mode))
+     {
+       x = gen_rtx_AND (mode, cmp, ot);
+       emit_insn (gen_rtx_SET (operands[0], x));
+     }
+   else
+     {
+       rtx t, f;
+ 
+       t = gen_reg_rtx (mode);
+       x = gen_rtx_AND (mode, cmp, operands[1+negate]);
+       emit_insn (gen_rtx_SET (t, x));
+ 
+       f = gen_reg_rtx (mode);
+       x = gen_rtx_NOT (mode, cmp);
+       x = gen_rtx_AND (mode, x, operands[2-negate]);
+       emit_insn (gen_rtx_SET (f, x));
+ 
+       x = gen_rtx_IOR (mode, t, f);
+       emit_insn (gen_rtx_SET (operands[0], x));
+     }
+ }
+ 
+ /* Emit an integral vector min or max operation.  Return true if all done.  */
+ 
+ bool
+ ia64_expand_vecint_minmax (enum rtx_code code, machine_mode mode,
+ 			   rtx operands[])
+ {
+   rtx xops[6];
+ 
+   /* These four combinations are supported directly.  */
+   if (mode == V8QImode && (code == UMIN || code == UMAX))
+     return false;
+   if (mode == V4HImode && (code == SMIN || code == SMAX))
+     return false;
+ 
+   /* This combination can be implemented with only saturating subtraction.  */
+   if (mode == V4HImode && code == UMAX)
+     {
+       rtx x, tmp = gen_reg_rtx (mode);
+ 
+       x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
+       emit_insn (gen_rtx_SET (tmp, x));
+ 
+       emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
+       return true;
+     }
+ 
+   /* Everything else implemented via vector comparisons.  */
+   xops[0] = operands[0];
+   xops[4] = xops[1] = operands[1];
+   xops[5] = xops[2] = operands[2];
+ 
+   switch (code)
+     {
+     case UMIN:
+       code = LTU;
+       break;
+     case UMAX:
+       code = GTU;
+       break;
+     case SMIN:
+       code = LT;
+       break;
+     case SMAX:
+       code = GT;
+       break;
+     default:
+       gcc_unreachable ();
+     }
+   xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
+ 
+   ia64_expand_vecint_cmov (xops);
+   return true;
+ }
+ 
+ /* The vectors LO and HI each contain N halves of a double-wide vector.
+    Reassemble either the first N/2 or the second N/2 elements.  */
+ 
+ void
+ ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
+ {
+   machine_mode vmode = GET_MODE (lo);
+   unsigned int i, high, nelt = GET_MODE_NUNITS (vmode);
+   struct expand_vec_perm_d d;
+   bool ok;
+ 
+   d.target = gen_lowpart (vmode, out);
+   d.op0 = (TARGET_BIG_ENDIAN ? hi : lo);
+   d.op1 = (TARGET_BIG_ENDIAN ? lo : hi);
+   d.vmode = vmode;
+   d.nelt = nelt;
+   d.one_operand_p = false;
+   d.testing_p = false;
+ 
+   high = (highp ? nelt / 2 : 0);
+   for (i = 0; i < nelt / 2; ++i)
+     {
+       d.perm[i * 2] = i + high;
+       d.perm[i * 2 + 1] = i + high + nelt;
+     }
+ 
+   ok = ia64_expand_vec_perm_const_1 (&d);
+   gcc_assert (ok);
+ }
+ 
+ /* Return a vector of the sign-extension of VEC.  */
+ 
+ static rtx
+ ia64_unpack_sign (rtx vec, bool unsignedp)
+ {
+   machine_mode mode = GET_MODE (vec);
+   rtx zero = CONST0_RTX (mode);
+ 
+   if (unsignedp)
+     return zero;
+   else
+     {
+       rtx sign = gen_reg_rtx (mode);
+       bool neg;
+ 
+       neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
+       gcc_assert (!neg);
+ 
+       return sign;
+     }
+ }
+ 
+ /* Emit an integral vector unpack operation.  */
+ 
+ void
+ ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
+ {
+   rtx sign = ia64_unpack_sign (operands[1], unsignedp);
+   ia64_unpack_assemble (operands[0], operands[1], sign, highp);
+ }
+ 
+ /* Emit an integral vector widening sum operations.  */
+ 
+ void
+ ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
+ {
+   machine_mode wmode;
+   rtx l, h, t, sign;
+ 
+   sign = ia64_unpack_sign (operands[1], unsignedp);
+ 
+   wmode = GET_MODE (operands[0]);
+   l = gen_reg_rtx (wmode);
+   h = gen_reg_rtx (wmode);
+ 
+   ia64_unpack_assemble (l, operands[1], sign, false);
+   ia64_unpack_assemble (h, operands[1], sign, true);
+ 
+   t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
+   t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
+   if (t != operands[0])
+     emit_move_insn (operands[0], t);
+ }
+ 
+ /* Emit the appropriate sequence for a call.  */
+ 
+ void
+ ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
+ 		  int sibcall_p)
+ {
+   rtx insn, b0;
+ 
+   addr = XEXP (addr, 0);
+   addr = convert_memory_address (DImode, addr);
+   b0 = gen_rtx_REG (DImode, R_BR (0));
+ 
+   /* ??? Should do this for functions known to bind local too.  */
+   if (TARGET_NO_PIC || TARGET_AUTO_PIC)
+     {
+       if (sibcall_p)
+ 	insn = gen_sibcall_nogp (addr);
+       else if (! retval)
+ 	insn = gen_call_nogp (addr, b0);
+       else
+ 	insn = gen_call_value_nogp (retval, addr, b0);
+       insn = emit_call_insn (insn);
+     }
+   else
+     {
+       if (sibcall_p)
+ 	insn = gen_sibcall_gp (addr);
+       else if (! retval)
+ 	insn = gen_call_gp (addr, b0);
+       else
+ 	insn = gen_call_value_gp (retval, addr, b0);
+       insn = emit_call_insn (insn);
+ 
+       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
+     }
+ 
+   if (sibcall_p)
+     use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
+ 
+   if (TARGET_ABI_OPEN_VMS)
+     use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
+ 	     gen_rtx_REG (DImode, GR_REG (25)));
+ }
+ 
+ static void
+ reg_emitted (enum ia64_frame_regs r)
+ {
+   if (emitted_frame_related_regs[r] == 0)
+     emitted_frame_related_regs[r] = current_frame_info.r[r];
+   else
+     gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
+ }
+ 
+ static int
+ get_reg (enum ia64_frame_regs r)
+ {
+   reg_emitted (r);
+   return current_frame_info.r[r];
+ }
+ 
+ static bool
+ is_emitted (int regno)
+ {
+   unsigned int r;
+ 
+   for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
+     if (emitted_frame_related_regs[r] == regno)
+       return true;
+   return false;
+ }
+ 
+ void
+ ia64_reload_gp (void)
+ {
+   rtx tmp;
+ 
+   if (current_frame_info.r[reg_save_gp])
+     {
+       tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
+     }
+   else
+     {
+       HOST_WIDE_INT offset;
+       rtx offset_r;
+ 
+       offset = (current_frame_info.spill_cfa_off
+ 	        + current_frame_info.spill_size);
+       if (frame_pointer_needed)
+         {
+           tmp = hard_frame_pointer_rtx;
+           offset = -offset;
+         }
+       else
+         {
+           tmp = stack_pointer_rtx;
+           offset = current_frame_info.total_size - offset;
+         }
+ 
+       offset_r = GEN_INT (offset);
+       if (satisfies_constraint_I (offset_r))
+         emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
+       else
+         {
+           emit_move_insn (pic_offset_table_rtx, offset_r);
+           emit_insn (gen_adddi3 (pic_offset_table_rtx,
+ 			         pic_offset_table_rtx, tmp));
+         }
+ 
+       tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
+     }
+ 
+   emit_move_insn (pic_offset_table_rtx, tmp);
+ }
+ 
+ void
+ ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
+ 		 rtx scratch_b, int noreturn_p, int sibcall_p)
+ {
+   rtx insn;
+   bool is_desc = false;
+ 
+   /* If we find we're calling through a register, then we're actually
+      calling through a descriptor, so load up the values.  */
+   if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
+     {
+       rtx tmp;
+       bool addr_dead_p;
+ 
+       /* ??? We are currently constrained to *not* use peep2, because
+ 	 we can legitimately change the global lifetime of the GP
+ 	 (in the form of killing where previously live).  This is
+ 	 because a call through a descriptor doesn't use the previous
+ 	 value of the GP, while a direct call does, and we do not
+ 	 commit to either form until the split here.
+ 
+ 	 That said, this means that we lack precise life info for
+ 	 whether ADDR is dead after this call.  This is not terribly
+ 	 important, since we can fix things up essentially for free
+ 	 with the POST_DEC below, but it's nice to not use it when we
+ 	 can immediately tell it's not necessary.  */
+       addr_dead_p = ((noreturn_p || sibcall_p
+ 		      || TEST_HARD_REG_BIT (regs_invalidated_by_call,
+ 					    REGNO (addr)))
+ 		     && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
+ 
+       /* Load the code address into scratch_b.  */
+       tmp = gen_rtx_POST_INC (Pmode, addr);
+       tmp = gen_rtx_MEM (Pmode, tmp);
+       emit_move_insn (scratch_r, tmp);
+       emit_move_insn (scratch_b, scratch_r);
+ 
+       /* Load the GP address.  If ADDR is not dead here, then we must
+ 	 revert the change made above via the POST_INCREMENT.  */
+       if (!addr_dead_p)
+ 	tmp = gen_rtx_POST_DEC (Pmode, addr);
+       else
+ 	tmp = addr;
+       tmp = gen_rtx_MEM (Pmode, tmp);
+       emit_move_insn (pic_offset_table_rtx, tmp);
+ 
+       is_desc = true;
+       addr = scratch_b;
+     }
+ 
+   if (sibcall_p)
+     insn = gen_sibcall_nogp (addr);
+   else if (retval)
+     insn = gen_call_value_nogp (retval, addr, retaddr);
+   else
+     insn = gen_call_nogp (addr, retaddr);
+   emit_call_insn (insn);
+ 
+   if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
+     ia64_reload_gp ();
+ }
+ 
+ /* Expand an atomic operation.  We want to perform MEM <CODE>= VAL atomically.
+ 
+    This differs from the generic code in that we know about the zero-extending
+    properties of cmpxchg, and the zero-extending requirements of ar.ccv.  We
+    also know that ld.acq+cmpxchg.rel equals a full barrier.
+ 
+    The loop we want to generate looks like
+ 
+ 	cmp_reg = mem;
+       label:
+         old_reg = cmp_reg;
+ 	new_reg = cmp_reg op val;
+ 	cmp_reg = compare-and-swap(mem, old_reg, new_reg)
+ 	if (cmp_reg != old_reg)
+ 	  goto label;
+ 
+    Note that we only do the plain load from memory once.  Subsequent
+    iterations use the value loaded by the compare-and-swap pattern.  */
+ 
+ void
+ ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
+ 		       rtx old_dst, rtx new_dst, enum memmodel model)
+ {
+   machine_mode mode = GET_MODE (mem);
+   rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
+   enum insn_code icode;
+ 
+   /* Special case for using fetchadd.  */
+   if ((mode == SImode || mode == DImode)
+       && (code == PLUS || code == MINUS)
+       && fetchadd_operand (val, mode))
+     {
+       if (code == MINUS)
+ 	val = GEN_INT (-INTVAL (val));
+ 
+       if (!old_dst)
+         old_dst = gen_reg_rtx (mode);
+ 
+       switch (model)
+ 	{
+ 	case MEMMODEL_ACQ_REL:
+ 	case MEMMODEL_SEQ_CST:
+ 	case MEMMODEL_SYNC_SEQ_CST:
+ 	  emit_insn (gen_memory_barrier ());
+ 	  /* FALLTHRU */
+ 	case MEMMODEL_RELAXED:
+ 	case MEMMODEL_ACQUIRE:
+ 	case MEMMODEL_SYNC_ACQUIRE:
+ 	case MEMMODEL_CONSUME:
+ 	  if (mode == SImode)
+ 	    icode = CODE_FOR_fetchadd_acq_si;
+ 	  else
+ 	    icode = CODE_FOR_fetchadd_acq_di;
+ 	  break;
+ 	case MEMMODEL_RELEASE:
+ 	case MEMMODEL_SYNC_RELEASE:
+ 	  if (mode == SImode)
+ 	    icode = CODE_FOR_fetchadd_rel_si;
+ 	  else
+ 	    icode = CODE_FOR_fetchadd_rel_di;
+ 	  break;
+ 
+ 	default:
+ 	  gcc_unreachable ();
+ 	}
+ 
+       emit_insn (GEN_FCN (icode) (old_dst, mem, val));
+ 
+       if (new_dst)
+ 	{
+ 	  new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
+ 					 true, OPTAB_WIDEN);
+ 	  if (new_reg != new_dst)
+ 	    emit_move_insn (new_dst, new_reg);
+ 	}
+       return;
+     }
+ 
+   /* Because of the volatile mem read, we get an ld.acq, which is the
+      front half of the full barrier.  The end half is the cmpxchg.rel.
+      For relaxed and release memory models, we don't need this.  But we
+      also don't bother trying to prevent it either.  */
+   gcc_assert (is_mm_relaxed (model) || is_mm_release (model)
+ 	      || MEM_VOLATILE_P (mem));
+ 
+   old_reg = gen_reg_rtx (DImode);
+   cmp_reg = gen_reg_rtx (DImode);
+   label = gen_label_rtx ();
+ 
+   if (mode != DImode)
+     {
+       val = simplify_gen_subreg (DImode, val, mode, 0);
+       emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
+     }
+   else
+     emit_move_insn (cmp_reg, mem);
+ 
+   emit_label (label);
+ 
+   ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
+   emit_move_insn (old_reg, cmp_reg);
+   emit_move_insn (ar_ccv, cmp_reg);
+ 
+   if (old_dst)
+     emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
+ 
+   new_reg = cmp_reg;
+   if (code == NOT)
+     {
+       new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
+ 				     true, OPTAB_DIRECT);
+       new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
+     }
+   else
+     new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
+ 				   true, OPTAB_DIRECT);
+ 
+   if (mode != DImode)
+     new_reg = gen_lowpart (mode, new_reg);
+   if (new_dst)
+     emit_move_insn (new_dst, new_reg);
+ 
+   switch (model)
+     {
+     case MEMMODEL_RELAXED:
+     case MEMMODEL_ACQUIRE:
+     case MEMMODEL_SYNC_ACQUIRE:
+     case MEMMODEL_CONSUME:
+       switch (mode)
+ 	{
+ 	case E_QImode: icode = CODE_FOR_cmpxchg_acq_qi;  break;
+ 	case E_HImode: icode = CODE_FOR_cmpxchg_acq_hi;  break;
+ 	case E_SImode: icode = CODE_FOR_cmpxchg_acq_si;  break;
+ 	case E_DImode: icode = CODE_FOR_cmpxchg_acq_di;  break;
+ 	default:
+ 	  gcc_unreachable ();
+ 	}
+       break;
+ 
+     case MEMMODEL_RELEASE:
+     case MEMMODEL_SYNC_RELEASE:
+     case MEMMODEL_ACQ_REL:
+     case MEMMODEL_SEQ_CST:
+     case MEMMODEL_SYNC_SEQ_CST:
+       switch (mode)
+ 	{
+ 	case E_QImode: icode = CODE_FOR_cmpxchg_rel_qi;  break;
+ 	case E_HImode: icode = CODE_FOR_cmpxchg_rel_hi;  break;
+ 	case E_SImode: icode = CODE_FOR_cmpxchg_rel_si;  break;
+ 	case E_DImode: icode = CODE_FOR_cmpxchg_rel_di;  break;
+ 	default:
+ 	  gcc_unreachable ();
+ 	}
+       break;
+ 
+     default:
+       gcc_unreachable ();
+     }
+ 
+   emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
+ 
+   emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
+ }
+ 
+ /* Begin the assembly file.  */
+ 
+ static void
+ ia64_file_start (void)
+ {
+   default_file_start ();
+   emit_safe_across_calls ();
+ }
+ 
+ void
+ emit_safe_across_calls (void)
+ {
+   unsigned int rs, re;
+   int out_state;
+ 
+   rs = 1;
+   out_state = 0;
+   while (1)
+     {
+       while (rs < 64 && call_used_or_fixed_reg_p (PR_REG (rs)))
+ 	rs++;
+       if (rs >= 64)
+ 	break;
+       for (re = rs + 1;
+ 	   re < 64 && ! call_used_or_fixed_reg_p (PR_REG (re)); re++)
+ 	continue;
+       if (out_state == 0)
+ 	{
+ 	  fputs ("\t.pred.safe_across_calls ", asm_out_file);
+ 	  out_state = 1;
+ 	}
+       else
+ 	fputc (',', asm_out_file);
+       if (re == rs + 1)
+ 	fprintf (asm_out_file, "p%u", rs);
+       else
+ 	fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
+       rs = re + 1;
+     }
+   if (out_state)
+     fputc ('\n', asm_out_file);
+ }
+ 
+ /* Globalize a declaration.  */
+ 
+ static void
+ ia64_globalize_decl_name (FILE * stream, tree decl)
+ {
+   const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
+   tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
+   if (version_attr)
+     {
+       tree v = TREE_VALUE (TREE_VALUE (version_attr));
+       const char *p = TREE_STRING_POINTER (v);
+       fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
+     }
+   targetm.asm_out.globalize_label (stream, name);
+   if (TREE_CODE (decl) == FUNCTION_DECL)
+     ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
+ }
+ 
+ /* Helper function for ia64_compute_frame_size: find an appropriate general
+    register to spill some special register to.  SPECIAL_SPILL_MASK contains
+    bits in GR0 to GR31 that have already been allocated by this routine.
+    TRY_LOCALS is true if we should attempt to locate a local regnum.  */
+ 
+ static int
+ find_gr_spill (enum ia64_frame_regs r, int try_locals)
+ {
+   int regno;
+ 
+   if (emitted_frame_related_regs[r] != 0)
+     {
+       regno = emitted_frame_related_regs[r];
+       if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
+ 	  && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
+         current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
+       else if (crtl->is_leaf
+                && regno >= GR_REG (1) && regno <= GR_REG (31))
+         current_frame_info.gr_used_mask |= 1 << regno;
+ 
+       return regno;
+     }
+ 
+   /* If this is a leaf function, first try an otherwise unused
+      call-clobbered register.  */
+   if (crtl->is_leaf)
+     {
+       for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
+ 	if (! df_regs_ever_live_p (regno)
+ 	    && call_used_or_fixed_reg_p (regno)
+ 	    && ! fixed_regs[regno]
+ 	    && ! global_regs[regno]
+ 	    && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
+             && ! is_emitted (regno))
+ 	  {
+ 	    current_frame_info.gr_used_mask |= 1 << regno;
+ 	    return regno;
+ 	  }
+     }
+ 
+   if (try_locals)
+     {
+       regno = current_frame_info.n_local_regs;
+       /* If there is a frame pointer, then we can't use loc79, because
+ 	 that is HARD_FRAME_POINTER_REGNUM.  In particular, see the
+ 	 reg_name switching code in ia64_expand_prologue.  */
+       while (regno < (80 - frame_pointer_needed))
+ 	if (! is_emitted (LOC_REG (regno++)))
+ 	  {
+ 	    current_frame_info.n_local_regs = regno;
+ 	    return LOC_REG (regno - 1);
+ 	  }
+     }
+ 
+   /* Failed to find a general register to spill to.  Must use stack.  */
+   return 0;
+ }
+ 
+ /* In order to make for nice schedules, we try to allocate every temporary
+    to a different register.  We must of course stay away from call-saved,
+    fixed, and global registers.  We must also stay away from registers
+    allocated in current_frame_info.gr_used_mask, since those include regs
+    used all through the prologue.
+ 
+    Any register allocated here must be used immediately.  The idea is to
+    aid scheduling, not to solve data flow problems.  */
+ 
+ static int last_scratch_gr_reg;
+ 
+ static int
+ next_scratch_gr_reg (void)
+ {
+   int i, regno;
+ 
+   for (i = 0; i < 32; ++i)
+     {
+       regno = (last_scratch_gr_reg + i + 1) & 31;
+       if (call_used_or_fixed_reg_p (regno)
+ 	  && ! fixed_regs[regno]
+ 	  && ! global_regs[regno]
+ 	  && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
+ 	{
+ 	  last_scratch_gr_reg = regno;
+ 	  return regno;
+ 	}
+     }
+ 
+   /* There must be _something_ available.  */
+   gcc_unreachable ();
+ }
+ 
+ /* Helper function for ia64_compute_frame_size, called through
+    diddle_return_value.  Mark REG in current_frame_info.gr_used_mask.  */
+ 
+ static void
+ mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
+ {
+   unsigned int regno = REGNO (reg);
+   if (regno < 32)
+     {
+       unsigned int i, n = REG_NREGS (reg);
+       for (i = 0; i < n; ++i)
+ 	current_frame_info.gr_used_mask |= 1 << (regno + i);
+     }
+ }
+ 
+ 
+ /* Returns the number of bytes offset between the frame pointer and the stack
+    pointer for the current function.  SIZE is the number of bytes of space
+    needed for local variables.  */
+ 
+ static void
+ ia64_compute_frame_size (HOST_WIDE_INT size)
+ {
+   HOST_WIDE_INT total_size;
+   HOST_WIDE_INT spill_size = 0;
+   HOST_WIDE_INT extra_spill_size = 0;
+   HOST_WIDE_INT pretend_args_size;
+   HARD_REG_SET mask;
+   int n_spilled = 0;
+   int spilled_gr_p = 0;
+   int spilled_fr_p = 0;
+   unsigned int regno;
+   int min_regno;
+   int max_regno;
+   int i;
+ 
+   if (current_frame_info.initialized)
+     return;
+ 
+   memset (&current_frame_info, 0, sizeof current_frame_info);
+   CLEAR_HARD_REG_SET (mask);
+ 
+   /* Don't allocate scratches to the return register.  */
+   diddle_return_value (mark_reg_gr_used_mask, NULL);
+ 
+   /* Don't allocate scratches to the EH scratch registers.  */
+   if (cfun->machine->ia64_eh_epilogue_sp)
+     mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
+   if (cfun->machine->ia64_eh_epilogue_bsp)
+     mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
+ 
+   /* Static stack checking uses r2 and r3.  */
+   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
+       || flag_stack_clash_protection)
+     current_frame_info.gr_used_mask |= 0xc;
+ 
+   /* Find the size of the register stack frame.  We have only 80 local
+      registers, because we reserve 8 for the inputs and 8 for the
+      outputs.  */
+ 
+   /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
+      since we'll be adjusting that down later.  */
+   regno = LOC_REG (78) + ! frame_pointer_needed;
+   for (; regno >= LOC_REG (0); regno--)
+     if (df_regs_ever_live_p (regno) && !is_emitted (regno))
+       break;
+   current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
+ 
+   /* For functions marked with the syscall_linkage attribute, we must mark
+      all eight input registers as in use, so that locals aren't visible to
+      the caller.  */
+ 
+   if (cfun->machine->n_varargs > 0
+       || lookup_attribute ("syscall_linkage",
+ 			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
+     current_frame_info.n_input_regs = 8;
+   else
+     {
+       for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
+ 	if (df_regs_ever_live_p (regno))
+ 	  break;
+       current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
+     }
+ 
+   for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
+     if (df_regs_ever_live_p (regno))
+       break;
+   i = regno - OUT_REG (0) + 1;
+ 
+ #ifndef PROFILE_HOOK
+   /* When -p profiling, we need one output register for the mcount argument.
+      Likewise for -a profiling for the bb_init_func argument.  For -ax
+      profiling, we need two output registers for the two bb_init_trace_func
+      arguments.  */
+   if (crtl->profile)
+     i = MAX (i, 1);
+ #endif
+   current_frame_info.n_output_regs = i;
+ 
+   /* ??? No rotating register support yet.  */
+   current_frame_info.n_rotate_regs = 0;
+ 
+   /* Discover which registers need spilling, and how much room that
+      will take.  Begin with floating point and general registers,
+      which will always wind up on the stack.  */
+ 
+   for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
+     if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno))
+       {
+ 	SET_HARD_REG_BIT (mask, regno);
+ 	spill_size += 16;
+ 	n_spilled += 1;
+ 	spilled_fr_p = 1;
+       }
+ 
+   for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
+     if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno))
+       {
+ 	SET_HARD_REG_BIT (mask, regno);
+ 	spill_size += 8;
+ 	n_spilled += 1;
+ 	spilled_gr_p = 1;
+       }
+ 
+   for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
+     if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno))
+       {
+ 	SET_HARD_REG_BIT (mask, regno);
+ 	spill_size += 8;
+ 	n_spilled += 1;
+       }
+ 
+   /* Now come all special registers that might get saved in other
+      general registers.  */
+ 
+   if (frame_pointer_needed)
+     {
+       current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
+       /* If we did not get a register, then we take LOC79.  This is guaranteed
+ 	 to be free, even if regs_ever_live is already set, because this is
+ 	 HARD_FRAME_POINTER_REGNUM.  This requires incrementing n_local_regs,
+ 	 as we don't count loc79 above.  */
+       if (current_frame_info.r[reg_fp] == 0)
+ 	{
+ 	  current_frame_info.r[reg_fp] = LOC_REG (79);
+ 	  current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
+ 	}
+     }
+ 
+   if (! crtl->is_leaf)
+     {
+       /* Emit a save of BR0 if we call other functions.  Do this even
+ 	 if this function doesn't return, as EH depends on this to be
+ 	 able to unwind the stack.  */
+       SET_HARD_REG_BIT (mask, BR_REG (0));
+ 
+       current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
+       if (current_frame_info.r[reg_save_b0] == 0)
+ 	{
+ 	  extra_spill_size += 8;
+ 	  n_spilled += 1;
+ 	}
+ 
+       /* Similarly for ar.pfs.  */
+       SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
+       current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
+       if (current_frame_info.r[reg_save_ar_pfs] == 0)
+ 	{
+ 	  extra_spill_size += 8;
+ 	  n_spilled += 1;
+ 	}
+ 
+       /* Similarly for gp.  Note that if we're calling setjmp, the stacked
+ 	 registers are clobbered, so we fall back to the stack.  */
+       current_frame_info.r[reg_save_gp]
+ 	= (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
+       if (current_frame_info.r[reg_save_gp] == 0)
+ 	{
+ 	  SET_HARD_REG_BIT (mask, GR_REG (1));
+ 	  spill_size += 8;
+ 	  n_spilled += 1;
+ 	}
+     }
+   else
+     {
+       if (df_regs_ever_live_p (BR_REG (0))
+ 	  && ! call_used_or_fixed_reg_p (BR_REG (0)))
+ 	{
+ 	  SET_HARD_REG_BIT (mask, BR_REG (0));
+ 	  extra_spill_size += 8;
+ 	  n_spilled += 1;
+ 	}
+ 
+       if (df_regs_ever_live_p (AR_PFS_REGNUM))
+ 	{
+ 	  SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
+  	  current_frame_info.r[reg_save_ar_pfs] 
+             = find_gr_spill (reg_save_ar_pfs, 1);
+ 	  if (current_frame_info.r[reg_save_ar_pfs] == 0)
+ 	    {
+ 	      extra_spill_size += 8;
+ 	      n_spilled += 1;
+ 	    }
+ 	}
+     }
+ 
+   /* Unwind descriptor hackery: things are most efficient if we allocate
+      consecutive GR save registers for RP, PFS, FP in that order. However,
+      it is absolutely critical that FP get the only hard register that's
+      guaranteed to be free, so we allocated it first.  If all three did
+      happen to be allocated hard regs, and are consecutive, rearrange them
+      into the preferred order now.  
+      
+      If we have already emitted code for any of those registers,
+      then it's already too late to change.  */
+   min_regno = MIN (current_frame_info.r[reg_fp],
+ 		   MIN (current_frame_info.r[reg_save_b0],
+ 			current_frame_info.r[reg_save_ar_pfs]));
+   max_regno = MAX (current_frame_info.r[reg_fp],
+ 		   MAX (current_frame_info.r[reg_save_b0],
+ 			current_frame_info.r[reg_save_ar_pfs]));
+   if (min_regno > 0
+       && min_regno + 2 == max_regno
+       && (current_frame_info.r[reg_fp] == min_regno + 1
+ 	  || current_frame_info.r[reg_save_b0] == min_regno + 1
+ 	  || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
+       && (emitted_frame_related_regs[reg_save_b0] == 0
+ 	  || emitted_frame_related_regs[reg_save_b0] == min_regno)
+       && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
+ 	  || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
+       && (emitted_frame_related_regs[reg_fp] == 0
+ 	  || emitted_frame_related_regs[reg_fp] == min_regno + 2))
+     {
+       current_frame_info.r[reg_save_b0] = min_regno;
+       current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
+       current_frame_info.r[reg_fp] = min_regno + 2;
+     }
+ 
+   /* See if we need to store the predicate register block.  */
+   for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
+     if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno))
+       break;
+   if (regno <= PR_REG (63))
+     {
+       SET_HARD_REG_BIT (mask, PR_REG (0));
+       current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
+       if (current_frame_info.r[reg_save_pr] == 0)
+ 	{
+ 	  extra_spill_size += 8;
+ 	  n_spilled += 1;
+ 	}
+ 
+       /* ??? Mark them all as used so that register renaming and such
+ 	 are free to use them.  */
+       for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
+ 	df_set_regs_ever_live (regno, true);
+     }
+ 
+   /* If we're forced to use st8.spill, we're forced to save and restore
+      ar.unat as well.  The check for existing liveness allows inline asm
+      to touch ar.unat.  */
+   if (spilled_gr_p || cfun->machine->n_varargs
+       || df_regs_ever_live_p (AR_UNAT_REGNUM))
+     {
+       df_set_regs_ever_live (AR_UNAT_REGNUM, true);
+       SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
+       current_frame_info.r[reg_save_ar_unat] 
+         = find_gr_spill (reg_save_ar_unat, spill_size == 0);
+       if (current_frame_info.r[reg_save_ar_unat] == 0)
+ 	{
+ 	  extra_spill_size += 8;
+ 	  n_spilled += 1;
+ 	}
+     }
+ 
+   if (df_regs_ever_live_p (AR_LC_REGNUM))
+     {
+       SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
+       current_frame_info.r[reg_save_ar_lc] 
+         = find_gr_spill (reg_save_ar_lc, spill_size == 0);
+       if (current_frame_info.r[reg_save_ar_lc] == 0)
+ 	{
+ 	  extra_spill_size += 8;
+ 	  n_spilled += 1;
+ 	}
+     }
+ 
+   /* If we have an odd number of words of pretend arguments written to
+      the stack, then the FR save area will be unaligned.  We round the
+      size of this area up to keep things 16 byte aligned.  */
+   if (spilled_fr_p)
+     pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
+   else
+     pretend_args_size = crtl->args.pretend_args_size;
+ 
++  if (FRAME_GROWS_DOWNWARD)
++    size = IA64_STACK_ALIGN (size);
++
+   total_size = (spill_size + extra_spill_size + size + pretend_args_size
+ 		+ crtl->outgoing_args_size);
+   total_size = IA64_STACK_ALIGN (total_size);
+ 
+   /* We always use the 16-byte scratch area provided by the caller, but
+      if we are a leaf function, there's no one to which we need to provide
+      a scratch area.  However, if the function allocates dynamic stack space,
+      the dynamic offset is computed early and contains STACK_POINTER_OFFSET,
+      so we need to cope.  */
+   if (crtl->is_leaf && !cfun->calls_alloca)
+     total_size = MAX (0, total_size - 16);
+ 
+   current_frame_info.total_size = total_size;
+   current_frame_info.spill_cfa_off = pretend_args_size - 16;
+   current_frame_info.spill_size = spill_size;
+   current_frame_info.extra_spill_size = extra_spill_size;
+   current_frame_info.mask = mask;
+   current_frame_info.n_spilled = n_spilled;
+   current_frame_info.initialized = reload_completed;
+ }
+ 
+ /* Worker function for TARGET_CAN_ELIMINATE.  */
+ 
+ bool
+ ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+ {
+   return (to == BR_REG (0) ? crtl->is_leaf : true);
+ }
+ 
+ /* Compute the initial difference between the specified pair of registers.  */
+ 
+ HOST_WIDE_INT
+ ia64_initial_elimination_offset (int from, int to)
+ {
 -  HOST_WIDE_INT offset;
++  HOST_WIDE_INT offset, size = get_frame_size ();
+ 
 -  ia64_compute_frame_size (get_frame_size ());
++  ia64_compute_frame_size (size);
+   switch (from)
+     {
+     case FRAME_POINTER_REGNUM:
 -      switch (to)
 -	{
 -	case HARD_FRAME_POINTER_REGNUM:
 -	  offset = -current_frame_info.total_size;
 -	  if (!crtl->is_leaf || cfun->calls_alloca)
 -	    offset += 16 + crtl->outgoing_args_size;
 -	  break;
 -
 -	case STACK_POINTER_REGNUM:
 -	  offset = 0;
 -	  if (!crtl->is_leaf || cfun->calls_alloca)
 -	    offset += 16 + crtl->outgoing_args_size;
 -	  break;
 -
 -	default:
 -	  gcc_unreachable ();
 -	}
++      offset = FRAME_GROWS_DOWNWARD ? IA64_STACK_ALIGN (size) : 0;
++      if (!crtl->is_leaf || cfun->calls_alloca)
++	offset += 16 + crtl->outgoing_args_size;
++      if (to == HARD_FRAME_POINTER_REGNUM)
++	offset -= current_frame_info.total_size;
++      else
++	gcc_assert (to == STACK_POINTER_REGNUM);
+       break;
+ 
+     case ARG_POINTER_REGNUM:
+       /* Arguments start above the 16 byte save area, unless stdarg
+ 	 in which case we store through the 16 byte save area.  */
+       switch (to)
+ 	{
+ 	case HARD_FRAME_POINTER_REGNUM:
+ 	  offset = 16 - crtl->args.pretend_args_size;
+ 	  break;
+ 
+ 	case STACK_POINTER_REGNUM:
+ 	  offset = (current_frame_info.total_size
+ 		    + 16 - crtl->args.pretend_args_size);
+ 	  break;
+ 
+ 	default:
+ 	  gcc_unreachable ();
+ 	}
+       break;
+ 
+     default:
+       gcc_unreachable ();
+     }
+ 
+   return offset;
+ }
+ 
+ /* If there are more than a trivial number of register spills, we use
+    two interleaved iterators so that we can get two memory references
+    per insn group.
+ 
+    In order to simplify things in the prologue and epilogue expanders,
+    we use helper functions to fix up the memory references after the
+    fact with the appropriate offsets to a POST_MODIFY memory mode.
+    The following data structure tracks the state of the two iterators
+    while insns are being emitted.  */
+ 
+ struct spill_fill_data
+ {
+   rtx_insn *init_after;		/* point at which to emit initializations */
+   rtx init_reg[2];		/* initial base register */
+   rtx iter_reg[2];		/* the iterator registers */
+   rtx *prev_addr[2];		/* address of last memory use */
+   rtx_insn *prev_insn[2];	/* the insn corresponding to prev_addr */
+   HOST_WIDE_INT prev_off[2];	/* last offset */
+   int n_iter;			/* number of iterators in use */
+   int next_iter;		/* next iterator to use */
+   unsigned int save_gr_used_mask;
+ };
+ 
+ static struct spill_fill_data spill_fill_data;
+ 
+ static void
+ setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
+ {
+   int i;
+ 
+   spill_fill_data.init_after = get_last_insn ();
+   spill_fill_data.init_reg[0] = init_reg;
+   spill_fill_data.init_reg[1] = init_reg;
+   spill_fill_data.prev_addr[0] = NULL;
+   spill_fill_data.prev_addr[1] = NULL;
+   spill_fill_data.prev_insn[0] = NULL;
+   spill_fill_data.prev_insn[1] = NULL;
+   spill_fill_data.prev_off[0] = cfa_off;
+   spill_fill_data.prev_off[1] = cfa_off;
+   spill_fill_data.next_iter = 0;
+   spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
+ 
+   spill_fill_data.n_iter = 1 + (n_spills > 2);
+   for (i = 0; i < spill_fill_data.n_iter; ++i)
+     {
+       int regno = next_scratch_gr_reg ();
+       spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
+       current_frame_info.gr_used_mask |= 1 << regno;
+     }
+ }
+ 
+ static void
+ finish_spill_pointers (void)
+ {
+   current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
+ }
+ 
+ static rtx
+ spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
+ {
+   int iter = spill_fill_data.next_iter;
+   HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
+   rtx disp_rtx = GEN_INT (disp);
+   rtx mem;
+ 
+   if (spill_fill_data.prev_addr[iter])
+     {
+       if (satisfies_constraint_N (disp_rtx))
+ 	{
+ 	  *spill_fill_data.prev_addr[iter]
+ 	    = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
+ 				   gen_rtx_PLUS (DImode,
+ 						 spill_fill_data.iter_reg[iter],
+ 						 disp_rtx));
+ 	  add_reg_note (spill_fill_data.prev_insn[iter],
+ 			REG_INC, spill_fill_data.iter_reg[iter]);
+ 	}
+       else
+ 	{
+ 	  /* ??? Could use register post_modify for loads.  */
+ 	  if (!satisfies_constraint_I (disp_rtx))
+ 	    {
+ 	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
+ 	      emit_move_insn (tmp, disp_rtx);
+ 	      disp_rtx = tmp;
+ 	    }
+ 	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
+ 				 spill_fill_data.iter_reg[iter], disp_rtx));
+ 	}
+     }
+   /* Micro-optimization: if we've created a frame pointer, it's at
+      CFA 0, which may allow the real iterator to be initialized lower,
+      slightly increasing parallelism.  Also, if there are few saves
+      it may eliminate the iterator entirely.  */
+   else if (disp == 0
+ 	   && spill_fill_data.init_reg[iter] == stack_pointer_rtx
+ 	   && frame_pointer_needed)
+     {
+       mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
+       set_mem_alias_set (mem, get_varargs_alias_set ());
+       return mem;
+     }
+   else
+     {
+       rtx seq;
+       rtx_insn *insn;
+ 
+       if (disp == 0)
+ 	seq = gen_movdi (spill_fill_data.iter_reg[iter],
+ 			 spill_fill_data.init_reg[iter]);
+       else
+ 	{
+ 	  start_sequence ();
+ 
+ 	  if (!satisfies_constraint_I (disp_rtx))
+ 	    {
+ 	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
+ 	      emit_move_insn (tmp, disp_rtx);
+ 	      disp_rtx = tmp;
+ 	    }
+ 
+ 	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
+ 				 spill_fill_data.init_reg[iter],
+ 				 disp_rtx));
+ 
+ 	  seq = get_insns ();
+ 	  end_sequence ();
+ 	}
+ 
+       /* Careful for being the first insn in a sequence.  */
+       if (spill_fill_data.init_after)
+ 	insn = emit_insn_after (seq, spill_fill_data.init_after);
+       else
+ 	{
+ 	  rtx_insn *first = get_insns ();
+ 	  if (first)
+ 	    insn = emit_insn_before (seq, first);
+ 	  else
+ 	    insn = emit_insn (seq);
+ 	}
+       spill_fill_data.init_after = insn;
+     }
+ 
+   mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
+ 
+   /* ??? Not all of the spills are for varargs, but some of them are.
+      The rest of the spills belong in an alias set of their own.  But
+      it doesn't actually hurt to include them here.  */
+   set_mem_alias_set (mem, get_varargs_alias_set ());
+ 
+   spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
+   spill_fill_data.prev_off[iter] = cfa_off;
+ 
+   if (++iter >= spill_fill_data.n_iter)
+     iter = 0;
+   spill_fill_data.next_iter = iter;
+ 
+   return mem;
+ }
+ 
+ static void
+ do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
+ 	  rtx frame_reg)
+ {
+   int iter = spill_fill_data.next_iter;
+   rtx mem;
+   rtx_insn *insn;
+ 
+   mem = spill_restore_mem (reg, cfa_off);
+   insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
+   spill_fill_data.prev_insn[iter] = insn;
+ 
+   if (frame_reg)
+     {
+       rtx base;
+       HOST_WIDE_INT off;
+ 
+       RTX_FRAME_RELATED_P (insn) = 1;
+ 
+       /* Don't even pretend that the unwind code can intuit its way
+ 	 through a pair of interleaved post_modify iterators.  Just
+ 	 provide the correct answer.  */
+ 
+       if (frame_pointer_needed)
+ 	{
+ 	  base = hard_frame_pointer_rtx;
+ 	  off = - cfa_off;
+ 	}
+       else
+ 	{
+ 	  base = stack_pointer_rtx;
+ 	  off = current_frame_info.total_size - cfa_off;
+ 	}
+ 
+       add_reg_note (insn, REG_CFA_OFFSET,
+ 		    gen_rtx_SET (gen_rtx_MEM (GET_MODE (reg),
+ 					      plus_constant (Pmode,
+ 							     base, off)),
+ 				 frame_reg));
+     }
+ }
+ 
+ static void
+ do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
+ {
+   int iter = spill_fill_data.next_iter;
+   rtx_insn *insn;
+ 
+   insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
+ 				GEN_INT (cfa_off)));
+   spill_fill_data.prev_insn[iter] = insn;
+ }
+ 
+ /* Wrapper functions that discards the CONST_INT spill offset.  These
+    exist so that we can give gr_spill/gr_fill the offset they need and
+    use a consistent function interface.  */
+ 
+ static rtx
+ gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
+ {
+   return gen_movdi (dest, src);
+ }
+ 
+ static rtx
+ gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
+ {
+   return gen_fr_spill (dest, src);
+ }
+ 
+ static rtx
+ gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
+ {
+   return gen_fr_restore (dest, src);
+ }
+ 
+ #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
+ 
+ /* See Table 6.2 of the IA-64 Software Developer Manual, Volume 2.  */
+ #define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0)
+ 
+ /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
+    inclusive.  These are offsets from the current stack pointer.  BS_SIZE
+    is the size of the backing store.  ??? This clobbers r2 and r3.  */
+ 
+ static void
+ ia64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
+ 			     int bs_size)
+ {
+   rtx r2 = gen_rtx_REG (Pmode, GR_REG (2));
+   rtx r3 = gen_rtx_REG (Pmode, GR_REG (3));
+   rtx p6 = gen_rtx_REG (BImode, PR_REG (6));
+ 
+   /* On the IA-64 there is a second stack in memory, namely the Backing Store
+      of the Register Stack Engine.  We also need to probe it after checking
+      that the 2 stacks don't overlap.  */
+   emit_insn (gen_bsp_value (r3));
+   emit_move_insn (r2, GEN_INT (-(first + size)));
+ 
+   /* Compare current value of BSP and SP registers.  */
+   emit_insn (gen_rtx_SET (p6, gen_rtx_fmt_ee (LTU, BImode,
+ 					      r3, stack_pointer_rtx)));
+ 
+   /* Compute the address of the probe for the Backing Store (which grows
+      towards higher addresses).  We probe only at the first offset of
+      the next page because some OS (eg Linux/ia64) only extend the
+      backing store when this specific address is hit (but generate a SEGV
+      on other address).  Page size is the worst case (4KB).  The reserve
+      size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough.
+      Also compute the address of the last probe for the memory stack
+      (which grows towards lower addresses).  */
+   emit_insn (gen_rtx_SET (r3, plus_constant (Pmode, r3, 4095)));
+   emit_insn (gen_rtx_SET (r2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
+ 
+   /* Compare them and raise SEGV if the former has topped the latter.  */
+   emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+ 				gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
+ 				gen_rtx_SET (p6, gen_rtx_fmt_ee (GEU, BImode,
+ 								 r3, r2))));
+   emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, r3, GEN_INT (12),
+ 						const0_rtx),
+ 			  const0_rtx));
+   emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+ 				gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
+ 				gen_rtx_TRAP_IF (VOIDmode, const1_rtx,
+ 						 GEN_INT (11))));
+ 
+   /* Probe the Backing Store if necessary.  */
+   if (bs_size > 0)
+     emit_stack_probe (r3);
+ 
+   /* Probe the memory stack if necessary.  */
+   if (size == 0)
+     ;
+ 
+   /* See if we have a constant small number of probes to generate.  If so,
+      that's the easy case.  */
+   else if (size <= PROBE_INTERVAL)
+     emit_stack_probe (r2);
+ 
+   /* The run-time loop is made up of 9 insns in the generic case while this
+      compile-time loop is made up of 5+2*(n-2) insns for n # of intervals.  */
+   else if (size <= 4 * PROBE_INTERVAL)
+     {
+       HOST_WIDE_INT i;
+ 
+       emit_move_insn (r2, GEN_INT (-(first + PROBE_INTERVAL)));
+       emit_insn (gen_rtx_SET (r2,
+ 			      gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
+       emit_stack_probe (r2);
+ 
+       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
+ 	 it exceeds SIZE.  If only two probes are needed, this will not
+ 	 generate any code.  Then probe at FIRST + SIZE.  */
+       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
+ 	{
+ 	  emit_insn (gen_rtx_SET (r2,
+ 				  plus_constant (Pmode, r2, -PROBE_INTERVAL)));
+ 	  emit_stack_probe (r2);
+ 	}
+ 
+       emit_insn (gen_rtx_SET (r2,
+ 			      plus_constant (Pmode, r2,
+ 					     (i - PROBE_INTERVAL) - size)));
+       emit_stack_probe (r2);
+     }
+ 
+   /* Otherwise, do the same as above, but in a loop.  Note that we must be
+      extra careful with variables wrapping around because we might be at
+      the very top (or the very bottom) of the address space and we have
+      to be able to handle this case properly; in particular, we use an
+      equality test for the loop condition.  */
+   else
+     {
+       HOST_WIDE_INT rounded_size;
+ 
+       emit_move_insn (r2, GEN_INT (-first));
+ 
+ 
+       /* Step 1: round SIZE to the previous multiple of the interval.  */
+ 
+       rounded_size = size & -PROBE_INTERVAL;
+ 
+ 
+       /* Step 2: compute initial and final value of the loop counter.  */
+ 
+       /* TEST_ADDR = SP + FIRST.  */
+       emit_insn (gen_rtx_SET (r2,
+ 			      gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
+ 
+       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
+       if (rounded_size > (1 << 21))
+ 	{
+ 	  emit_move_insn (r3, GEN_INT (-rounded_size));
+ 	  emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2, r3)));
+ 	}
+       else
+         emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2,
+ 						  GEN_INT (-rounded_size))));
+ 
+ 
+       /* Step 3: the loop
+ 
+ 	 do
+ 	   {
+ 	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
+ 	     probe at TEST_ADDR
+ 	   }
+ 	 while (TEST_ADDR != LAST_ADDR)
+ 
+ 	 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
+ 	 until it is equal to ROUNDED_SIZE.  */
+ 
+       emit_insn (gen_probe_stack_range (r2, r2, r3));
+ 
+ 
+       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
+ 	 that SIZE is equal to ROUNDED_SIZE.  */
+ 
+       /* TEMP = SIZE - ROUNDED_SIZE.  */
+       if (size != rounded_size)
+ 	{
+ 	  emit_insn (gen_rtx_SET (r2, plus_constant (Pmode, r2,
+ 						     rounded_size - size)));
+ 	  emit_stack_probe (r2);
+ 	}
+     }
+ 
+   /* Make sure nothing is scheduled before we are done.  */
+   emit_insn (gen_blockage ());
+ }
+ 
+ /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
+    absolute addresses.  */
+ 
+ const char *
+ output_probe_stack_range (rtx reg1, rtx reg2)
+ {
+   static int labelno = 0;
+   char loop_lab[32];
+   rtx xops[3];
+ 
+   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
+ 
+   /* Loop.  */
+   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
+ 
+   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
+   xops[0] = reg1;
+   xops[1] = GEN_INT (-PROBE_INTERVAL);
+   output_asm_insn ("addl %0 = %1, %0", xops);
+   fputs ("\t;;\n", asm_out_file);
+ 
+   /* Probe at TEST_ADDR.  */
+   output_asm_insn ("probe.w.fault %0, 0", xops);
+ 
+   /* Test if TEST_ADDR == LAST_ADDR.  */
+   xops[1] = reg2;
+   xops[2] = gen_rtx_REG (BImode, PR_REG (6));
+   output_asm_insn ("cmp.eq %2, %I2 = %0, %1", xops);
+ 
+   /* Branch.  */
+   fprintf (asm_out_file, "\t(%s) br.cond.dpnt ", reg_names [PR_REG (7)]);
+   assemble_name_raw (asm_out_file, loop_lab);
+   fputc ('\n', asm_out_file);
+ 
+   return "";
+ }
+ 
+ /* Called after register allocation to add any instructions needed for the
+    prologue.  Using a prologue insn is favored compared to putting all of the
+    instructions in output_function_prologue(), since it allows the scheduler
+    to intermix instructions with the saves of the caller saved registers.  In
+    some cases, it might be necessary to emit a barrier instruction as the last
+    insn to prevent such scheduling.
+ 
+    Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
+    so that the debug info generation code can handle them properly.
+ 
+    The register save area is laid out like so:
+    cfa+16
+ 	[ varargs spill area ]
+ 	[ fr register spill area ]
+ 	[ br register spill area ]
+ 	[ ar register spill area ]
+ 	[ pr register spill area ]
+ 	[ gr register spill area ] */
+ 
+ /* ??? Get inefficient code when the frame size is larger than can fit in an
+    adds instruction.  */
+ 
+ void
+ ia64_expand_prologue (void)
+ {
+   rtx_insn *insn;
+   rtx ar_pfs_save_reg, ar_unat_save_reg;
+   int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
+   rtx reg, alt_reg;
+ 
+   ia64_compute_frame_size (get_frame_size ());
+   last_scratch_gr_reg = 15;
+ 
+   if (flag_stack_usage_info)
+     current_function_static_stack_size = current_frame_info.total_size;
+ 
+   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
+       || flag_stack_clash_protection)
+     {
+       HOST_WIDE_INT size = current_frame_info.total_size;
+       int bs_size = BACKING_STORE_SIZE (current_frame_info.n_input_regs
+ 					  + current_frame_info.n_local_regs);
+ 
+       if (crtl->is_leaf && !cfun->calls_alloca)
+ 	{
+ 	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
+ 	    ia64_emit_probe_stack_range (get_stack_check_protect (),
+ 					 size - get_stack_check_protect (),
+ 					 bs_size);
+ 	  else if (size + bs_size > get_stack_check_protect ())
+ 	    ia64_emit_probe_stack_range (get_stack_check_protect (),
+ 					 0, bs_size);
+ 	}
+       else if (size + bs_size > 0)
+ 	ia64_emit_probe_stack_range (get_stack_check_protect (), size, bs_size);
+     }
+ 
+   if (dump_file) 
+     {
+       fprintf (dump_file, "ia64 frame related registers "
+                "recorded in current_frame_info.r[]:\n");
+ #define PRINTREG(a) if (current_frame_info.r[a]) \
+         fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
+       PRINTREG(reg_fp);
+       PRINTREG(reg_save_b0);
+       PRINTREG(reg_save_pr);
+       PRINTREG(reg_save_ar_pfs);
+       PRINTREG(reg_save_ar_unat);
+       PRINTREG(reg_save_ar_lc);
+       PRINTREG(reg_save_gp);
+ #undef PRINTREG
+     }
+ 
+   /* If there is no epilogue, then we don't need some prologue insns.
+      We need to avoid emitting the dead prologue insns, because flow
+      will complain about them.  */
+   if (optimize)
+     {
+       edge e;
+       edge_iterator ei;
+ 
+       FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
+ 	if ((e->flags & EDGE_FAKE) == 0
+ 	    && (e->flags & EDGE_FALLTHRU) != 0)
+ 	  break;
+       epilogue_p = (e != NULL);
+     }
+   else
+     epilogue_p = 1;
+ 
+   /* Set the local, input, and output register names.  We need to do this
+      for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
+      half.  If we use in/loc/out register names, then we get assembler errors
+      in crtn.S because there is no alloc insn or regstk directive in there.  */
+   if (! TARGET_REG_NAMES)
+     {
+       int inputs = current_frame_info.n_input_regs;
+       int locals = current_frame_info.n_local_regs;
+       int outputs = current_frame_info.n_output_regs;
+ 
+       for (i = 0; i < inputs; i++)
+ 	reg_names[IN_REG (i)] = ia64_reg_numbers[i];
+       for (i = 0; i < locals; i++)
+ 	reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
+       for (i = 0; i < outputs; i++)
+ 	reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
+     }
+ 
+   /* Set the frame pointer register name.  The regnum is logically loc79,
+      but of course we'll not have allocated that many locals.  Rather than
+      worrying about renumbering the existing rtxs, we adjust the name.  */
+   /* ??? This code means that we can never use one local register when
+      there is a frame pointer.  loc79 gets wasted in this case, as it is
+      renamed to a register that will never be used.  See also the try_locals
+      code in find_gr_spill.  */
+   if (current_frame_info.r[reg_fp])
+     {
+       const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
+       reg_names[HARD_FRAME_POINTER_REGNUM]
+ 	= reg_names[current_frame_info.r[reg_fp]];
+       reg_names[current_frame_info.r[reg_fp]] = tmp;
+     }
+ 
+   /* We don't need an alloc instruction if we've used no outputs or locals.  */
+   if (current_frame_info.n_local_regs == 0
+       && current_frame_info.n_output_regs == 0
+       && current_frame_info.n_input_regs <= crtl->args.info.int_regs
+       && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
+     {
+       /* If there is no alloc, but there are input registers used, then we
+ 	 need a .regstk directive.  */
+       current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
+       ar_pfs_save_reg = NULL_RTX;
+     }
+   else
+     {
+       current_frame_info.need_regstk = 0;
+ 
+       if (current_frame_info.r[reg_save_ar_pfs])
+         {
+ 	  regno = current_frame_info.r[reg_save_ar_pfs];
+ 	  reg_emitted (reg_save_ar_pfs);
+ 	}
+       else
+ 	regno = next_scratch_gr_reg ();
+       ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
+ 
+       insn = emit_insn (gen_alloc (ar_pfs_save_reg,
+ 				   GEN_INT (current_frame_info.n_input_regs),
+ 				   GEN_INT (current_frame_info.n_local_regs),
+ 				   GEN_INT (current_frame_info.n_output_regs),
+ 				   GEN_INT (current_frame_info.n_rotate_regs)));
+       if (current_frame_info.r[reg_save_ar_pfs])
+ 	{
+ 	  RTX_FRAME_RELATED_P (insn) = 1;
+ 	  add_reg_note (insn, REG_CFA_REGISTER,
+ 			gen_rtx_SET (ar_pfs_save_reg,
+ 				     gen_rtx_REG (DImode, AR_PFS_REGNUM)));
+ 	}
+     }
+ 
+   /* Set up frame pointer, stack pointer, and spill iterators.  */
+ 
+   n_varargs = cfun->machine->n_varargs;
+   setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
+ 			stack_pointer_rtx, 0);
+ 
+   if (frame_pointer_needed)
+     {
+       insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
+       RTX_FRAME_RELATED_P (insn) = 1;
+ 
+       /* Force the unwind info to recognize this as defining a new CFA,
+ 	 rather than some temp register setup.  */
+       add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX);
+     }
+ 
+   if (current_frame_info.total_size != 0)
+     {
+       rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
+       rtx offset;
+ 
+       if (satisfies_constraint_I (frame_size_rtx))
+ 	offset = frame_size_rtx;
+       else
+ 	{
+ 	  regno = next_scratch_gr_reg ();
+ 	  offset = gen_rtx_REG (DImode, regno);
+ 	  emit_move_insn (offset, frame_size_rtx);
+ 	}
+ 
+       insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
+ 				    stack_pointer_rtx, offset));
+ 
+       if (! frame_pointer_needed)
+ 	{
+ 	  RTX_FRAME_RELATED_P (insn) = 1;
+ 	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
+ 			gen_rtx_SET (stack_pointer_rtx,
+ 				     gen_rtx_PLUS (DImode,
+ 						   stack_pointer_rtx,
+ 						   frame_size_rtx)));
+ 	}
+ 
+       /* ??? At this point we must generate a magic insn that appears to
+ 	 modify the stack pointer, the frame pointer, and all spill
+ 	 iterators.  This would allow the most scheduling freedom.  For
+ 	 now, just hard stop.  */
+       emit_insn (gen_blockage ());
+     }
+ 
+   /* Must copy out ar.unat before doing any integer spills.  */
+   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
+     {
+       if (current_frame_info.r[reg_save_ar_unat])
+         {
+ 	  ar_unat_save_reg
+ 	    = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
+ 	  reg_emitted (reg_save_ar_unat);
+ 	}
+       else
+ 	{
+ 	  alt_regno = next_scratch_gr_reg ();
+ 	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
+ 	  current_frame_info.gr_used_mask |= 1 << alt_regno;
+ 	}
+ 
+       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
+       insn = emit_move_insn (ar_unat_save_reg, reg);
+       if (current_frame_info.r[reg_save_ar_unat])
+ 	{
+ 	  RTX_FRAME_RELATED_P (insn) = 1;
+ 	  add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
+ 	}
+ 
+       /* Even if we're not going to generate an epilogue, we still
+ 	 need to save the register so that EH works.  */
+       if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
+ 	emit_insn (gen_prologue_use (ar_unat_save_reg));
+     }
+   else
+     ar_unat_save_reg = NULL_RTX;
+ 
+   /* Spill all varargs registers.  Do this before spilling any GR registers,
+      since we want the UNAT bits for the GR registers to override the UNAT
+      bits from varargs, which we don't care about.  */
+ 
+   cfa_off = -16;
+   for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
+     {
+       reg = gen_rtx_REG (DImode, regno);
+       do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
+     }
+ 
+   /* Locate the bottom of the register save area.  */
+   cfa_off = (current_frame_info.spill_cfa_off
+ 	     + current_frame_info.spill_size
+ 	     + current_frame_info.extra_spill_size);
+ 
+   /* Save the predicate register block either in a register or in memory.  */
+   if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
+     {
+       reg = gen_rtx_REG (DImode, PR_REG (0));
+       if (current_frame_info.r[reg_save_pr] != 0)
+ 	{
+ 	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
+ 	  reg_emitted (reg_save_pr);
+ 	  insn = emit_move_insn (alt_reg, reg);
+ 
+ 	  /* ??? Denote pr spill/fill by a DImode move that modifies all
+ 	     64 hard registers.  */
+ 	  RTX_FRAME_RELATED_P (insn) = 1;
+ 	  add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
+ 
+ 	  /* Even if we're not going to generate an epilogue, we still
+ 	     need to save the register so that EH works.  */
+ 	  if (! epilogue_p)
+ 	    emit_insn (gen_prologue_use (alt_reg));
+ 	}
+       else
+ 	{
+ 	  alt_regno = next_scratch_gr_reg ();
+ 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
+ 	  insn = emit_move_insn (alt_reg, reg);
+ 	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
+ 	  cfa_off -= 8;
+ 	}
+     }
+ 
+   /* Handle AR regs in numerical order.  All of them get special handling.  */
+   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
+       && current_frame_info.r[reg_save_ar_unat] == 0)
+     {
+       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
+       do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
+       cfa_off -= 8;
+     }
+ 
+   /* The alloc insn already copied ar.pfs into a general register.  The
+      only thing we have to do now is copy that register to a stack slot
+      if we'd not allocated a local register for the job.  */
+   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
+       && current_frame_info.r[reg_save_ar_pfs] == 0)
+     {
+       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
+       do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
+       cfa_off -= 8;
+     }
+ 
+   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
+     {
+       reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
+       if (current_frame_info.r[reg_save_ar_lc] != 0)
+ 	{
+ 	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
+ 	  reg_emitted (reg_save_ar_lc);
+ 	  insn = emit_move_insn (alt_reg, reg);
+ 	  RTX_FRAME_RELATED_P (insn) = 1;
+ 	  add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
+ 
+ 	  /* Even if we're not going to generate an epilogue, we still
+ 	     need to save the register so that EH works.  */
+ 	  if (! epilogue_p)
+ 	    emit_insn (gen_prologue_use (alt_reg));
+ 	}
+       else
+ 	{
+ 	  alt_regno = next_scratch_gr_reg ();
+ 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
+ 	  emit_move_insn (alt_reg, reg);
+ 	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
+ 	  cfa_off -= 8;
+ 	}
+     }
+ 
+   /* Save the return pointer.  */
+   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
+     {
+       reg = gen_rtx_REG (DImode, BR_REG (0));
+       if (current_frame_info.r[reg_save_b0] != 0)
+ 	{
+           alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
+           reg_emitted (reg_save_b0);
+ 	  insn = emit_move_insn (alt_reg, reg);
+ 	  RTX_FRAME_RELATED_P (insn) = 1;
+ 	  add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (alt_reg, pc_rtx));
+ 
+ 	  /* Even if we're not going to generate an epilogue, we still
+ 	     need to save the register so that EH works.  */
+ 	  if (! epilogue_p)
+ 	    emit_insn (gen_prologue_use (alt_reg));
+ 	}
+       else
+ 	{
+ 	  alt_regno = next_scratch_gr_reg ();
+ 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
+ 	  emit_move_insn (alt_reg, reg);
+ 	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
+ 	  cfa_off -= 8;
+ 	}
+     }
+ 
+   if (current_frame_info.r[reg_save_gp])
+     {
+       reg_emitted (reg_save_gp);
+       insn = emit_move_insn (gen_rtx_REG (DImode,
+ 					  current_frame_info.r[reg_save_gp]),
+ 			     pic_offset_table_rtx);
+     }
+ 
+   /* We should now be at the base of the gr/br/fr spill area.  */
+   gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
+ 			  + current_frame_info.spill_size));
+ 
+   /* Spill all general registers.  */
+   for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
+     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
+       {
+ 	reg = gen_rtx_REG (DImode, regno);
+ 	do_spill (gen_gr_spill, reg, cfa_off, reg);
+ 	cfa_off -= 8;
+       }
+ 
+   /* Spill the rest of the BR registers.  */
+   for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
+     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
+       {
+ 	alt_regno = next_scratch_gr_reg ();
+ 	alt_reg = gen_rtx_REG (DImode, alt_regno);
+ 	reg = gen_rtx_REG (DImode, regno);
+ 	emit_move_insn (alt_reg, reg);
+ 	do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
+ 	cfa_off -= 8;
+       }
+ 
+   /* Align the frame and spill all FR registers.  */
+   for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
+     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
+       {
+         gcc_assert (!(cfa_off & 15));
+ 	reg = gen_rtx_REG (XFmode, regno);
+ 	do_spill (gen_fr_spill_x, reg, cfa_off, reg);
+ 	cfa_off -= 16;
+       }
+ 
+   gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
+ 
+   finish_spill_pointers ();
+ }
+ 
+ /* Output the textual info surrounding the prologue.  */
+ 
+ void
+ ia64_start_function (FILE *file, const char *fnname,
+ 		     tree decl ATTRIBUTE_UNUSED)
+ {
+ #if TARGET_ABI_OPEN_VMS
+   vms_start_function (fnname);
+ #endif
+ 
+   fputs ("\t.proc ", file);
+   assemble_name (file, fnname);
+   fputc ('\n', file);
+   ASM_OUTPUT_LABEL (file, fnname);
+ }
+ 
+ /* Called after register allocation to add any instructions needed for the
+    epilogue.  Using an epilogue insn is favored compared to putting all of the
+    instructions in output_function_prologue(), since it allows the scheduler
+    to intermix instructions with the saves of the caller saved registers.  In
+    some cases, it might be necessary to emit a barrier instruction as the last
+    insn to prevent such scheduling.  */
+ 
+ void
+ ia64_expand_epilogue (int sibcall_p)
+ {
+   rtx_insn *insn;
+   rtx reg, alt_reg, ar_unat_save_reg;
+   int regno, alt_regno, cfa_off;
+ 
+   ia64_compute_frame_size (get_frame_size ());
+ 
+   /* If there is a frame pointer, then we use it instead of the stack
+      pointer, so that the stack pointer does not need to be valid when
+      the epilogue starts.  See EXIT_IGNORE_STACK.  */
+   if (frame_pointer_needed)
+     setup_spill_pointers (current_frame_info.n_spilled,
+ 			  hard_frame_pointer_rtx, 0);
+   else
+     setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
+ 			  current_frame_info.total_size);
+ 
+   if (current_frame_info.total_size != 0)
+     {
+       /* ??? At this point we must generate a magic insn that appears to
+          modify the spill iterators and the frame pointer.  This would
+ 	 allow the most scheduling freedom.  For now, just hard stop.  */
+       emit_insn (gen_blockage ());
+     }
+ 
+   /* Locate the bottom of the register save area.  */
+   cfa_off = (current_frame_info.spill_cfa_off
+ 	     + current_frame_info.spill_size
+ 	     + current_frame_info.extra_spill_size);
+ 
+   /* Restore the predicate registers.  */
+   if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
+     {
+       if (current_frame_info.r[reg_save_pr] != 0)
+         {
+ 	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
+ 	  reg_emitted (reg_save_pr);
+ 	}
+       else
+ 	{
+ 	  alt_regno = next_scratch_gr_reg ();
+ 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
+ 	  do_restore (gen_movdi_x, alt_reg, cfa_off);
+ 	  cfa_off -= 8;
+ 	}
+       reg = gen_rtx_REG (DImode, PR_REG (0));
+       emit_move_insn (reg, alt_reg);
+     }
+ 
+   /* Restore the application registers.  */
+ 
+   /* Load the saved unat from the stack, but do not restore it until
+      after the GRs have been restored.  */
+   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
+     {
+       if (current_frame_info.r[reg_save_ar_unat] != 0)
+         {
+           ar_unat_save_reg
+ 	    = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
+ 	  reg_emitted (reg_save_ar_unat);
+ 	}
+       else
+ 	{
+ 	  alt_regno = next_scratch_gr_reg ();
+ 	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
+ 	  current_frame_info.gr_used_mask |= 1 << alt_regno;
+ 	  do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
+ 	  cfa_off -= 8;
+ 	}
+     }
+   else
+     ar_unat_save_reg = NULL_RTX;
+ 
+   if (current_frame_info.r[reg_save_ar_pfs] != 0)
+     {
+       reg_emitted (reg_save_ar_pfs);
+       alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
+       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
+       emit_move_insn (reg, alt_reg);
+     }
+   else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
+     {
+       alt_regno = next_scratch_gr_reg ();
+       alt_reg = gen_rtx_REG (DImode, alt_regno);
+       do_restore (gen_movdi_x, alt_reg, cfa_off);
+       cfa_off -= 8;
+       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
+       emit_move_insn (reg, alt_reg);
+     }
+ 
+   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
+     {
+       if (current_frame_info.r[reg_save_ar_lc] != 0)
+         {
+ 	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
+           reg_emitted (reg_save_ar_lc);
+ 	}
+       else
+ 	{
+ 	  alt_regno = next_scratch_gr_reg ();
+ 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
+ 	  do_restore (gen_movdi_x, alt_reg, cfa_off);
+ 	  cfa_off -= 8;
+ 	}
+       reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
+       emit_move_insn (reg, alt_reg);
+     }
+ 
+   /* Restore the return pointer.  */
+   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
+     {
+       if (current_frame_info.r[reg_save_b0] != 0)
+         {
+          alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
+          reg_emitted (reg_save_b0);
+         }
+       else
+ 	{
+ 	  alt_regno = next_scratch_gr_reg ();
+ 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
+ 	  do_restore (gen_movdi_x, alt_reg, cfa_off);
+ 	  cfa_off -= 8;
+ 	}
+       reg = gen_rtx_REG (DImode, BR_REG (0));
+       emit_move_insn (reg, alt_reg);
+     }
+ 
+   /* We should now be at the base of the gr/br/fr spill area.  */
+   gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
+ 			  + current_frame_info.spill_size));
+ 
+   /* The GP may be stored on the stack in the prologue, but it's
+      never restored in the epilogue.  Skip the stack slot.  */
+   if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
+     cfa_off -= 8;
+ 
+   /* Restore all general registers.  */
+   for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
+     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
+       {
+ 	reg = gen_rtx_REG (DImode, regno);
+ 	do_restore (gen_gr_restore, reg, cfa_off);
+ 	cfa_off -= 8;
+       }
+ 
+   /* Restore the branch registers.  */
+   for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
+     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
+       {
+ 	alt_regno = next_scratch_gr_reg ();
+ 	alt_reg = gen_rtx_REG (DImode, alt_regno);
+ 	do_restore (gen_movdi_x, alt_reg, cfa_off);
+ 	cfa_off -= 8;
+ 	reg = gen_rtx_REG (DImode, regno);
+ 	emit_move_insn (reg, alt_reg);
+       }
+ 
+   /* Restore floating point registers.  */
+   for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
+     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
+       {
+         gcc_assert (!(cfa_off & 15));
+ 	reg = gen_rtx_REG (XFmode, regno);
+ 	do_restore (gen_fr_restore_x, reg, cfa_off);
+ 	cfa_off -= 16;
+       }
+ 
+   /* Restore ar.unat for real.  */
+   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
+     {
+       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
+       emit_move_insn (reg, ar_unat_save_reg);
+     }
+ 
+   gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
+ 
+   finish_spill_pointers ();
+ 
+   if (current_frame_info.total_size
+       || cfun->machine->ia64_eh_epilogue_sp
+       || frame_pointer_needed)
+     {
+       /* ??? At this point we must generate a magic insn that appears to
+          modify the spill iterators, the stack pointer, and the frame
+ 	 pointer.  This would allow the most scheduling freedom.  For now,
+ 	 just hard stop.  */
+       emit_insn (gen_blockage ());
+     }
+ 
+   if (cfun->machine->ia64_eh_epilogue_sp)
+     emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
+   else if (frame_pointer_needed)
+     {
+       insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
+       RTX_FRAME_RELATED_P (insn) = 1;
+       add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
+     }
+   else if (current_frame_info.total_size)
+     {
+       rtx offset, frame_size_rtx;
+ 
+       frame_size_rtx = GEN_INT (current_frame_info.total_size);
+       if (satisfies_constraint_I (frame_size_rtx))
+ 	offset = frame_size_rtx;
+       else
+ 	{
+ 	  regno = next_scratch_gr_reg ();
+ 	  offset = gen_rtx_REG (DImode, regno);
+ 	  emit_move_insn (offset, frame_size_rtx);
+ 	}
+ 
+       insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
+ 				    offset));
+ 
+       RTX_FRAME_RELATED_P (insn) = 1;
+       add_reg_note (insn, REG_CFA_ADJUST_CFA,
+ 		    gen_rtx_SET (stack_pointer_rtx,
+ 				 gen_rtx_PLUS (DImode,
+ 					       stack_pointer_rtx,
+ 					       frame_size_rtx)));
+     }
+ 
+   if (cfun->machine->ia64_eh_epilogue_bsp)
+     emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
+ 
+   if (! sibcall_p)
+     emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
+   else
+     {
+       int fp = GR_REG (2);
+       /* We need a throw away register here, r0 and r1 are reserved,
+ 	 so r2 is the first available call clobbered register.  If
+ 	 there was a frame_pointer register, we may have swapped the
+ 	 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
+ 	 sure we're using the string "r2" when emitting the register
+ 	 name for the assembler.  */
+       if (current_frame_info.r[reg_fp] 
+           && current_frame_info.r[reg_fp] == GR_REG (2))
+ 	fp = HARD_FRAME_POINTER_REGNUM;
+ 
+       /* We must emit an alloc to force the input registers to become output
+ 	 registers.  Otherwise, if the callee tries to pass its parameters
+ 	 through to another call without an intervening alloc, then these
+ 	 values get lost.  */
+       /* ??? We don't need to preserve all input registers.  We only need to
+ 	 preserve those input registers used as arguments to the sibling call.
+ 	 It is unclear how to compute that number here.  */
+       if (current_frame_info.n_input_regs != 0)
+ 	{
+ 	  rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
+ 
+ 	  insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
+ 				const0_rtx, const0_rtx,
+ 				n_inputs, const0_rtx));
+ 	  RTX_FRAME_RELATED_P (insn) = 1;
+ 
+ 	  /* ??? We need to mark the alloc as frame-related so that it gets
+ 	     passed into ia64_asm_unwind_emit for ia64-specific unwinding.
+ 	     But there's nothing dwarf2 related to be done wrt the register
+ 	     windows.  If we do nothing, dwarf2out will abort on the UNSPEC;
+ 	     the empty parallel means dwarf2out will not see anything.  */
+ 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+ 			gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (0)));
+ 	}
+     }
+ }
+ 
+ /* Return 1 if br.ret can do all the work required to return from a
+    function.  */
+ 
+ int
+ ia64_direct_return (void)
+ {
+   if (reload_completed && ! frame_pointer_needed)
+     {
+       ia64_compute_frame_size (get_frame_size ());
+ 
+       return (current_frame_info.total_size == 0
+ 	      && current_frame_info.n_spilled == 0
+ 	      && current_frame_info.r[reg_save_b0] == 0
+ 	      && current_frame_info.r[reg_save_pr] == 0
+ 	      && current_frame_info.r[reg_save_ar_pfs] == 0
+ 	      && current_frame_info.r[reg_save_ar_unat] == 0
+ 	      && current_frame_info.r[reg_save_ar_lc] == 0);
+     }
+   return 0;
+ }
+ 
+ /* Return the magic cookie that we use to hold the return address
+    during early compilation.  */
+ 
+ rtx
+ ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
+ {
+   if (count != 0)
+     return NULL;
+   return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
+ }
+ 
+ /* Split this value after reload, now that we know where the return
+    address is saved.  */
+ 
+ void
+ ia64_split_return_addr_rtx (rtx dest)
+ {
+   rtx src;
+ 
+   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
+     {
+       if (current_frame_info.r[reg_save_b0] != 0)
+         {
+ 	  src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
+ 	  reg_emitted (reg_save_b0);
+ 	}
+       else
+ 	{
+ 	  HOST_WIDE_INT off;
+ 	  unsigned int regno;
+ 	  rtx off_r;
+ 
+ 	  /* Compute offset from CFA for BR0.  */
+ 	  /* ??? Must be kept in sync with ia64_expand_prologue.  */
+ 	  off = (current_frame_info.spill_cfa_off
+ 		 + current_frame_info.spill_size);
+ 	  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
+ 	    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
+ 	      off -= 8;
+ 
+ 	  /* Convert CFA offset to a register based offset.  */
+ 	  if (frame_pointer_needed)
+ 	    src = hard_frame_pointer_rtx;
+ 	  else
+ 	    {
+ 	      src = stack_pointer_rtx;
+ 	      off += current_frame_info.total_size;
+ 	    }
+ 
+ 	  /* Load address into scratch register.  */
+ 	  off_r = GEN_INT (off);
+ 	  if (satisfies_constraint_I (off_r))
+ 	    emit_insn (gen_adddi3 (dest, src, off_r));
+ 	  else
+ 	    {
+ 	      emit_move_insn (dest, off_r);
+ 	      emit_insn (gen_adddi3 (dest, src, dest));
+ 	    }
+ 
+ 	  src = gen_rtx_MEM (Pmode, dest);
+ 	}
+     }
+   else
+     src = gen_rtx_REG (DImode, BR_REG (0));
+ 
+   emit_move_insn (dest, src);
+ }
+ 
+ int
+ ia64_hard_regno_rename_ok (int from, int to)
+ {
+   /* Don't clobber any of the registers we reserved for the prologue.  */
+   unsigned int r;
+ 
+   for (r = reg_fp; r <= reg_save_ar_lc; r++)
+     if (to == current_frame_info.r[r] 
+         || from == current_frame_info.r[r]
+         || to == emitted_frame_related_regs[r]
+         || from == emitted_frame_related_regs[r])
+       return 0;
+ 
+   /* Don't use output registers outside the register frame.  */
+   if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
+     return 0;
+ 
+   /* Retain even/oddness on predicate register pairs.  */
+   if (PR_REGNO_P (from) && PR_REGNO_P (to))
+     return (from & 1) == (to & 1);
+ 
+   return 1;
+ }
+ 
+ /* Implement TARGET_HARD_REGNO_NREGS.
+ 
+    ??? We say that BImode PR values require two registers.  This allows us to
+    easily store the normal and inverted values.  We use CCImode to indicate
+    a single predicate register.  */
+ 
+ static unsigned int
+ ia64_hard_regno_nregs (unsigned int regno, machine_mode mode)
+ {
+   if (regno == PR_REG (0) && mode == DImode)
+     return 64;
+   if (PR_REGNO_P (regno) && (mode) == BImode)
+     return 2;
+   if ((PR_REGNO_P (regno) || GR_REGNO_P (regno)) && mode == CCImode)
+     return 1;
+   if (FR_REGNO_P (regno) && mode == XFmode)
+     return 1;
+   if (FR_REGNO_P (regno) && mode == RFmode)
+     return 1;
+   if (FR_REGNO_P (regno) && mode == XCmode)
+     return 2;
+   return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
+ }
+ 
+ /* Implement TARGET_HARD_REGNO_MODE_OK.  */
+ 
+ static bool
+ ia64_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
+ {
+   if (FR_REGNO_P (regno))
+     return (GET_MODE_CLASS (mode) != MODE_CC
+ 	    && mode != BImode
+ 	    && mode != TFmode);
+ 
+   if (PR_REGNO_P (regno))
+     return mode == BImode || GET_MODE_CLASS (mode) == MODE_CC;
+ 
+   if (GR_REGNO_P (regno))
+     return mode != XFmode && mode != XCmode && mode != RFmode;
+ 
+   if (AR_REGNO_P (regno))
+     return mode == DImode;
+ 
+   if (BR_REGNO_P (regno))
+     return mode == DImode;
+ 
+   return false;
+ }
+ 
+ /* Implement TARGET_MODES_TIEABLE_P.
+ 
+    Don't tie integer and FP modes, as that causes us to get integer registers
+    allocated for FP instructions.  XFmode only supported in FP registers so
+    we can't tie it with any other modes.  */
+ 
+ static bool
+ ia64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
+ {
+   return (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2)
+ 	  && ((mode1 == XFmode || mode1 == XCmode || mode1 == RFmode)
+ 	      == (mode2 == XFmode || mode2 == XCmode || mode2 == RFmode))
+ 	  && (mode1 == BImode) == (mode2 == BImode));
+ }
+ 
+ /* Target hook for assembling integer objects.  Handle word-sized
+    aligned objects and detect the cases when @fptr is needed.  */
+ 
+ static bool
+ ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
+ {
+   if (size == POINTER_SIZE / BITS_PER_UNIT
+       && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
+       && GET_CODE (x) == SYMBOL_REF
+       && SYMBOL_REF_FUNCTION_P (x))
+     {
+       static const char * const directive[2][2] = {
+ 	  /* 64-bit pointer */  /* 32-bit pointer */
+ 	{ "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("},	/* unaligned */
+ 	{ "\tdata8\t@fptr(",    "\tdata4\t@fptr("}	/* aligned */
+       };
+       fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
+       output_addr_const (asm_out_file, x);
+       fputs (")\n", asm_out_file);
+       return true;
+     }
+   return default_assemble_integer (x, size, aligned_p);
+ }
+ 
+ /* Emit the function prologue.  */
+ 
+ static void
+ ia64_output_function_prologue (FILE *file)
+ {
+   int mask, grsave, grsave_prev;
+ 
+   if (current_frame_info.need_regstk)
+     fprintf (file, "\t.regstk %d, %d, %d, %d\n",
+ 	     current_frame_info.n_input_regs,
+ 	     current_frame_info.n_local_regs,
+ 	     current_frame_info.n_output_regs,
+ 	     current_frame_info.n_rotate_regs);
+ 
+   if (ia64_except_unwind_info (&global_options) != UI_TARGET)
+     return;
+ 
+   /* Emit the .prologue directive.  */
+ 
+   mask = 0;
+   grsave = grsave_prev = 0;
+   if (current_frame_info.r[reg_save_b0] != 0)
+     {
+       mask |= 8;
+       grsave = grsave_prev = current_frame_info.r[reg_save_b0];
+     }
+   if (current_frame_info.r[reg_save_ar_pfs] != 0
+       && (grsave_prev == 0
+ 	  || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
+     {
+       mask |= 4;
+       if (grsave_prev == 0)
+ 	grsave = current_frame_info.r[reg_save_ar_pfs];
+       grsave_prev = current_frame_info.r[reg_save_ar_pfs];
+     }
+   if (current_frame_info.r[reg_fp] != 0
+       && (grsave_prev == 0
+ 	  || current_frame_info.r[reg_fp] == grsave_prev + 1))
+     {
+       mask |= 2;
+       if (grsave_prev == 0)
+ 	grsave = HARD_FRAME_POINTER_REGNUM;
+       grsave_prev = current_frame_info.r[reg_fp];
+     }
+   if (current_frame_info.r[reg_save_pr] != 0
+       && (grsave_prev == 0
+ 	  || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
+     {
+       mask |= 1;
+       if (grsave_prev == 0)
+ 	grsave = current_frame_info.r[reg_save_pr];
+     }
+ 
+   if (mask && TARGET_GNU_AS)
+     fprintf (file, "\t.prologue %d, %d\n", mask,
+ 	     ia64_dbx_register_number (grsave));
+   else
+     fputs ("\t.prologue\n", file);
+ 
+   /* Emit a .spill directive, if necessary, to relocate the base of
+      the register spill area.  */
+   if (current_frame_info.spill_cfa_off != -16)
+     fprintf (file, "\t.spill %ld\n",
+ 	     (long) (current_frame_info.spill_cfa_off
+ 		     + current_frame_info.spill_size));
+ }
+ 
+ /* Emit the .body directive at the scheduled end of the prologue.  */
+ 
+ static void
+ ia64_output_function_end_prologue (FILE *file)
+ {
+   if (ia64_except_unwind_info (&global_options) != UI_TARGET)
+     return;
+ 
+   fputs ("\t.body\n", file);
+ }
+ 
+ /* Emit the function epilogue.  */
+ 
+ static void
+ ia64_output_function_epilogue (FILE *)
+ {
+   int i;
+ 
+   if (current_frame_info.r[reg_fp])
+     {
+       const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
+       reg_names[HARD_FRAME_POINTER_REGNUM]
+ 	= reg_names[current_frame_info.r[reg_fp]];
+       reg_names[current_frame_info.r[reg_fp]] = tmp;
+       reg_emitted (reg_fp);
+     }
+   if (! TARGET_REG_NAMES)
+     {
+       for (i = 0; i < current_frame_info.n_input_regs; i++)
+ 	reg_names[IN_REG (i)] = ia64_input_reg_names[i];
+       for (i = 0; i < current_frame_info.n_local_regs; i++)
+ 	reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
+       for (i = 0; i < current_frame_info.n_output_regs; i++)
+ 	reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
+     }
+ 
+   current_frame_info.initialized = 0;
+ }
+ 
+ int
+ ia64_dbx_register_number (int regno)
+ {
+   /* In ia64_expand_prologue we quite literally renamed the frame pointer
+      from its home at loc79 to something inside the register frame.  We
+      must perform the same renumbering here for the debug info.  */
+   if (current_frame_info.r[reg_fp])
+     {
+       if (regno == HARD_FRAME_POINTER_REGNUM)
+ 	regno = current_frame_info.r[reg_fp];
+       else if (regno == current_frame_info.r[reg_fp])
+ 	regno = HARD_FRAME_POINTER_REGNUM;
+     }
+ 
+   if (IN_REGNO_P (regno))
+     return 32 + regno - IN_REG (0);
+   else if (LOC_REGNO_P (regno))
+     return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
+   else if (OUT_REGNO_P (regno))
+     return (32 + current_frame_info.n_input_regs
+ 	    + current_frame_info.n_local_regs + regno - OUT_REG (0));
+   else
+     return regno;
+ }
+ 
+ /* Implement TARGET_TRAMPOLINE_INIT.
+ 
+    The trampoline should set the static chain pointer to value placed
+    into the trampoline and should branch to the specified routine.
+    To make the normal indirect-subroutine calling convention work,
+    the trampoline must look like a function descriptor; the first
+    word being the target address and the second being the target's
+    global pointer.
+ 
+    We abuse the concept of a global pointer by arranging for it
+    to point to the data we need to load.  The complete trampoline
+    has the following form:
+ 
+ 		+-------------------+ \
+ 	TRAMP:	| __ia64_trampoline | |
+ 		+-------------------+  > fake function descriptor
+ 		| TRAMP+16          | |
+ 		+-------------------+ /
+ 		| target descriptor |
+ 		+-------------------+
+ 		| static link	    |
+ 		+-------------------+
+ */
+ 
+ static void
+ ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
+ {
+   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+   rtx addr, addr_reg, tramp, eight = GEN_INT (8);
+ 
+   /* The Intel assembler requires that the global __ia64_trampoline symbol
+      be declared explicitly */
+   if (!TARGET_GNU_AS)
+     {
+       static bool declared_ia64_trampoline = false;
+ 
+       if (!declared_ia64_trampoline)
+ 	{
+ 	  declared_ia64_trampoline = true;
+ 	  (*targetm.asm_out.globalize_label) (asm_out_file,
+ 					      "__ia64_trampoline");
+ 	}
+     }
+ 
+   /* Make sure addresses are Pmode even if we are in ILP32 mode. */
+   addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
+   fnaddr = convert_memory_address (Pmode, fnaddr);
+   static_chain = convert_memory_address (Pmode, static_chain);
+ 
+   /* Load up our iterator.  */
+   addr_reg = copy_to_reg (addr);
+   m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
+ 
+   /* The first two words are the fake descriptor:
+      __ia64_trampoline, ADDR+16.  */
+   tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
+   if (TARGET_ABI_OPEN_VMS)
+     {
+       /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
+ 	 in the Macro-32 compiler) and changed the semantics of the LTOFF22
+ 	 relocation against function symbols to make it identical to the
+ 	 LTOFF_FPTR22 relocation.  Emit the latter directly to stay within
+ 	 strict ELF and dereference to get the bare code address.  */
+       rtx reg = gen_reg_rtx (Pmode);
+       SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
+       emit_move_insn (reg, tramp);
+       emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
+       tramp = reg;
+    }
+   emit_move_insn (m_tramp, tramp);
+   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
+   m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
+ 
+   emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (Pmode, addr, 16)));
+   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
+   m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
+ 
+   /* The third word is the target descriptor.  */
+   emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
+   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
+   m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
+ 
+   /* The fourth word is the static chain.  */
+   emit_move_insn (m_tramp, static_chain);
+ }
+ 
+ /* Do any needed setup for a variadic function.  CUM has not been updated
+    for the last named argument, which is given by ARG.
+ 
+    We generate the actual spill instructions during prologue generation.  */
+ 
+ static void
+ ia64_setup_incoming_varargs (cumulative_args_t cum,
+ 			     const function_arg_info &arg,
+ 			     int *pretend_size,
+ 			     int second_time ATTRIBUTE_UNUSED)
+ {
+   CUMULATIVE_ARGS next_cum = *get_cumulative_args (cum);
+ 
+   /* Skip the current argument.  */
+   ia64_function_arg_advance (pack_cumulative_args (&next_cum), arg);
+ 
+   if (next_cum.words < MAX_ARGUMENT_SLOTS)
+     {
+       int n = MAX_ARGUMENT_SLOTS - next_cum.words;
+       *pretend_size = n * UNITS_PER_WORD;
+       cfun->machine->n_varargs = n;
+     }
+ }
+ 
+ /* Check whether TYPE is a homogeneous floating point aggregate.  If
+    it is, return the mode of the floating point type that appears
+    in all leafs.  If it is not, return VOIDmode.
+ 
+    An aggregate is a homogeneous floating point aggregate is if all
+    fields/elements in it have the same floating point type (e.g,
+    SFmode).  128-bit quad-precision floats are excluded.
+ 
+    Variable sized aggregates should never arrive here, since we should
+    have already decided to pass them by reference.  Top-level zero-sized
+    aggregates are excluded because our parallels crash the middle-end.  */
+ 
+ static machine_mode
+ hfa_element_mode (const_tree type, bool nested)
+ {
+   machine_mode element_mode = VOIDmode;
+   machine_mode mode;
+   enum tree_code code = TREE_CODE (type);
+   int know_element_mode = 0;
+   tree t;
+ 
+   if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
+     return VOIDmode;
+ 
+   switch (code)
+     {
+     case VOID_TYPE:	case INTEGER_TYPE:	case ENUMERAL_TYPE:
+     case BOOLEAN_TYPE:	case POINTER_TYPE:
+     case OFFSET_TYPE:	case REFERENCE_TYPE:	case METHOD_TYPE:
+     case LANG_TYPE:		case FUNCTION_TYPE:
+       return VOIDmode;
+ 
+       /* Fortran complex types are supposed to be HFAs, so we need to handle
+ 	 gcc's COMPLEX_TYPEs as HFAs.  We need to exclude the integral complex
+ 	 types though.  */
+     case COMPLEX_TYPE:
+       if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
+ 	  && TYPE_MODE (type) != TCmode)
+ 	return GET_MODE_INNER (TYPE_MODE (type));
+       else
+ 	return VOIDmode;
+ 
+     case REAL_TYPE:
+       /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
+ 	 mode if this is contained within an aggregate.  */
+       if (nested && TYPE_MODE (type) != TFmode)
+ 	return TYPE_MODE (type);
+       else
+ 	return VOIDmode;
+ 
+     case ARRAY_TYPE:
+       return hfa_element_mode (TREE_TYPE (type), 1);
+ 
+     case RECORD_TYPE:
+     case UNION_TYPE:
+     case QUAL_UNION_TYPE:
+       for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t))
+ 	{
+ 	  if (TREE_CODE (t) != FIELD_DECL || DECL_FIELD_ABI_IGNORED (t))
+ 	    continue;
+ 
+ 	  mode = hfa_element_mode (TREE_TYPE (t), 1);
+ 	  if (know_element_mode)
+ 	    {
+ 	      if (mode != element_mode)
+ 		return VOIDmode;
+ 	    }
+ 	  else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
+ 	    return VOIDmode;
+ 	  else
+ 	    {
+ 	      know_element_mode = 1;
+ 	      element_mode = mode;
+ 	    }
+ 	}
+       return element_mode;
+ 
+     default:
+       /* If we reach here, we probably have some front-end specific type
+ 	 that the backend doesn't know about.  This can happen via the
+ 	 aggregate_value_p call in init_function_start.  All we can do is
+ 	 ignore unknown tree types.  */
+       return VOIDmode;
+     }
+ 
+   return VOIDmode;
+ }
+ 
+ /* Return the number of words required to hold a quantity of TYPE and MODE
+    when passed as an argument.  */
+ static int
+ ia64_function_arg_words (const_tree type, machine_mode mode)
+ {
+   int words;
+ 
+   if (mode == BLKmode)
+     words = int_size_in_bytes (type);
+   else
+     words = GET_MODE_SIZE (mode);
+ 
+   return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;  /* round up */
+ }
+ 
+ /* Return the number of registers that should be skipped so the current
+    argument (described by TYPE and WORDS) will be properly aligned.
+ 
+    Integer and float arguments larger than 8 bytes start at the next
+    even boundary.  Aggregates larger than 8 bytes start at the next
+    even boundary if the aggregate has 16 byte alignment.  Note that
+    in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
+    but are still to be aligned in registers.
+ 
+    ??? The ABI does not specify how to handle aggregates with
+    alignment from 9 to 15 bytes, or greater than 16.  We handle them
+    all as if they had 16 byte alignment.  Such aggregates can occur
+    only if gcc extensions are used.  */
+ static int
+ ia64_function_arg_offset (const CUMULATIVE_ARGS *cum,
+ 			  const_tree type, int words)
+ {
+   /* No registers are skipped on VMS.  */
+   if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
+     return 0;
+ 
+   if (type
+       && TREE_CODE (type) != INTEGER_TYPE
+       && TREE_CODE (type) != REAL_TYPE)
+     return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
+   else
+     return words > 1;
+ }
+ 
+ /* Return rtx for register where argument is passed, or zero if it is passed
+    on the stack.  */
+ /* ??? 128-bit quad-precision floats are always passed in general
+    registers.  */
+ 
+ static rtx
+ ia64_function_arg_1 (cumulative_args_t cum_v, const function_arg_info &arg,
+ 		     bool incoming)
+ {
+   const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+ 
+   int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
+   int words = ia64_function_arg_words (arg.type, arg.mode);
+   int offset = ia64_function_arg_offset (cum, arg.type, words);
+   machine_mode hfa_mode = VOIDmode;
+ 
+   /* For OPEN VMS, emit the instruction setting up the argument register here,
+      when we know this will be together with the other arguments setup related
+      insns.  This is not the conceptually best place to do this, but this is
+      the easiest as we have convenient access to cumulative args info.  */
+ 
+   if (TARGET_ABI_OPEN_VMS && arg.end_marker_p ())
+     {
+       unsigned HOST_WIDE_INT regval = cum->words;
+       int i;
+ 
+       for (i = 0; i < 8; i++)
+ 	regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
+ 
+       emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
+ 		      GEN_INT (regval));
+     }
+ 
+   /* If all argument slots are used, then it must go on the stack.  */
+   if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
+     return 0;
+ 
+   /* On OpenVMS argument is either in Rn or Fn.  */
+   if (TARGET_ABI_OPEN_VMS)
+     {
+       if (FLOAT_MODE_P (arg.mode))
+ 	return gen_rtx_REG (arg.mode, FR_ARG_FIRST + cum->words);
+       else
+ 	return gen_rtx_REG (arg.mode, basereg + cum->words);
+     }
+ 
+   /* Check for and handle homogeneous FP aggregates.  */
+   if (arg.type)
+     hfa_mode = hfa_element_mode (arg.type, 0);
+ 
+   /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
+      and unprototyped hfas are passed specially.  */
+   if (hfa_mode != VOIDmode && (! cum->prototype || arg.named))
+     {
+       rtx loc[16];
+       int i = 0;
+       int fp_regs = cum->fp_regs;
+       int int_regs = cum->words + offset;
+       int hfa_size = GET_MODE_SIZE (hfa_mode);
+       int byte_size;
+       int args_byte_size;
+ 
+       /* If prototyped, pass it in FR regs then GR regs.
+ 	 If not prototyped, pass it in both FR and GR regs.
+ 
+ 	 If this is an SFmode aggregate, then it is possible to run out of
+ 	 FR regs while GR regs are still left.  In that case, we pass the
+ 	 remaining part in the GR regs.  */
+ 
+       /* Fill the FP regs.  We do this always.  We stop if we reach the end
+ 	 of the argument, the last FP register, or the last argument slot.  */
+ 
+       byte_size = arg.promoted_size_in_bytes ();
+       args_byte_size = int_regs * UNITS_PER_WORD;
+       offset = 0;
+       for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
+ 	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
+ 	{
+ 	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
+ 				      gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
+ 							      + fp_regs)),
+ 				      GEN_INT (offset));
+ 	  offset += hfa_size;
+ 	  args_byte_size += hfa_size;
+ 	  fp_regs++;
+ 	}
+ 
+       /* If no prototype, then the whole thing must go in GR regs.  */
+       if (! cum->prototype)
+ 	offset = 0;
+       /* If this is an SFmode aggregate, then we might have some left over
+ 	 that needs to go in GR regs.  */
+       else if (byte_size != offset)
+ 	int_regs += offset / UNITS_PER_WORD;
+ 
+       /* Fill in the GR regs.  We must use DImode here, not the hfa mode.  */
+ 
+       for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
+ 	{
+ 	  machine_mode gr_mode = DImode;
+ 	  unsigned int gr_size;
+ 
+ 	  /* If we have an odd 4 byte hunk because we ran out of FR regs,
+ 	     then this goes in a GR reg left adjusted/little endian, right
+ 	     adjusted/big endian.  */
+ 	  /* ??? Currently this is handled wrong, because 4-byte hunks are
+ 	     always right adjusted/little endian.  */
+ 	  if (offset & 0x4)
+ 	    gr_mode = SImode;
+ 	  /* If we have an even 4 byte hunk because the aggregate is a
+ 	     multiple of 4 bytes in size, then this goes in a GR reg right
+ 	     adjusted/little endian.  */
+ 	  else if (byte_size - offset == 4)
+ 	    gr_mode = SImode;
+ 
+ 	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
+ 				      gen_rtx_REG (gr_mode, (basereg
+ 							     + int_regs)),
+ 				      GEN_INT (offset));
+ 
+ 	  gr_size = GET_MODE_SIZE (gr_mode);
+ 	  offset += gr_size;
+ 	  if (gr_size == UNITS_PER_WORD
+ 	      || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
+ 	    int_regs++;
+ 	  else if (gr_size > UNITS_PER_WORD)
+ 	    int_regs += gr_size / UNITS_PER_WORD;
+ 	}
+       return gen_rtx_PARALLEL (arg.mode, gen_rtvec_v (i, loc));
+     }
+   
+   /* Integral and aggregates go in general registers.  If we have run out of
+      FR registers, then FP values must also go in general registers.  This can
+      happen when we have a SFmode HFA.  */
+   else if (arg.mode == TFmode || arg.mode == TCmode
+ 	   || !FLOAT_MODE_P (arg.mode)
+ 	   || cum->fp_regs == MAX_ARGUMENT_SLOTS)
+     {
+       int byte_size = arg.promoted_size_in_bytes ();
+       if (BYTES_BIG_ENDIAN
+ 	  && (arg.mode == BLKmode || arg.aggregate_type_p ())
+ 	  && byte_size < UNITS_PER_WORD
+ 	  && byte_size > 0)
+ 	{
+ 	  rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
+ 					  gen_rtx_REG (DImode,
+ 						       (basereg + cum->words
+ 							+ offset)),
+ 					  const0_rtx);
+ 	  return gen_rtx_PARALLEL (arg.mode, gen_rtvec (1, gr_reg));
+ 	}
+       else
+ 	return gen_rtx_REG (arg.mode, basereg + cum->words + offset);
+ 
+     }
+ 
+   /* If there is a prototype, then FP values go in a FR register when
+      named, and in a GR register when unnamed.  */
+   else if (cum->prototype)
+     {
+       if (arg.named)
+ 	return gen_rtx_REG (arg.mode, FR_ARG_FIRST + cum->fp_regs);
+       /* In big-endian mode, an anonymous SFmode value must be represented
+          as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
+ 	 the value into the high half of the general register.  */
+       else if (BYTES_BIG_ENDIAN && arg.mode == SFmode)
+ 	return gen_rtx_PARALLEL (arg.mode,
+ 		 gen_rtvec (1,
+                    gen_rtx_EXPR_LIST (VOIDmode,
+ 		     gen_rtx_REG (DImode, basereg + cum->words + offset),
+ 				      const0_rtx)));
+       else
+ 	return gen_rtx_REG (arg.mode, basereg + cum->words + offset);
+     }
+   /* If there is no prototype, then FP values go in both FR and GR
+      registers.  */
+   else
+     {
+       /* See comment above.  */
+       machine_mode inner_mode =
+ 	(BYTES_BIG_ENDIAN && arg.mode == SFmode) ? DImode : arg.mode;
+ 
+       rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
+ 				      gen_rtx_REG (arg.mode, (FR_ARG_FIRST
+ 							  + cum->fp_regs)),
+ 				      const0_rtx);
+       rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
+ 				      gen_rtx_REG (inner_mode,
+ 						   (basereg + cum->words
+ 						    + offset)),
+ 				      const0_rtx);
+ 
+       return gen_rtx_PARALLEL (arg.mode, gen_rtvec (2, fp_reg, gr_reg));
+     }
+ }
+ 
+ /* Implement TARGET_FUNCION_ARG target hook.  */
+ 
+ static rtx
+ ia64_function_arg (cumulative_args_t cum, const function_arg_info &arg)
+ {
+   return ia64_function_arg_1 (cum, arg, false);
+ }
+ 
+ /* Implement TARGET_FUNCION_INCOMING_ARG target hook.  */
+ 
+ static rtx
+ ia64_function_incoming_arg (cumulative_args_t cum,
+ 			    const function_arg_info &arg)
+ {
+   return ia64_function_arg_1 (cum, arg, true);
+ }
+ 
+ /* Return number of bytes, at the beginning of the argument, that must be
+    put in registers.  0 is the argument is entirely in registers or entirely
+    in memory.  */
+ 
+ static int
+ ia64_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
+ {
+   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+ 
+   int words = ia64_function_arg_words (arg.type, arg.mode);
+   int offset = ia64_function_arg_offset (cum, arg.type, words);
+ 
+   /* If all argument slots are used, then it must go on the stack.  */
+   if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
+     return 0;
+ 
+   /* It doesn't matter whether the argument goes in FR or GR regs.  If
+      it fits within the 8 argument slots, then it goes entirely in
+      registers.  If it extends past the last argument slot, then the rest
+      goes on the stack.  */
+ 
+   if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
+     return 0;
+ 
+   return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
+ }
+ 
+ /* Return ivms_arg_type based on machine_mode.  */
+ 
+ static enum ivms_arg_type
+ ia64_arg_type (machine_mode mode)
+ {
+   switch (mode)
+     {
+     case E_SFmode:
+       return FS;
+     case E_DFmode:
+       return FT;
+     default:
+       return I64;
+     }
+ }
+ 
+ /* Update CUM to point after this argument.  This is patterned after
+    ia64_function_arg.  */
+ 
+ static void
+ ia64_function_arg_advance (cumulative_args_t cum_v,
+ 			   const function_arg_info &arg)
+ {
+   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+   int words = ia64_function_arg_words (arg.type, arg.mode);
+   int offset = ia64_function_arg_offset (cum, arg.type, words);
+   machine_mode hfa_mode = VOIDmode;
+ 
+   /* If all arg slots are already full, then there is nothing to do.  */
+   if (cum->words >= MAX_ARGUMENT_SLOTS)
+     {
+       cum->words += words + offset;
+       return;
+     }
+ 
+   cum->atypes[cum->words] = ia64_arg_type (arg.mode);
+   cum->words += words + offset;
+ 
+   /* On OpenVMS argument is either in Rn or Fn.  */
+   if (TARGET_ABI_OPEN_VMS)
+     {
+       cum->int_regs = cum->words;
+       cum->fp_regs = cum->words;
+       return;
+     }
+ 
+   /* Check for and handle homogeneous FP aggregates.  */
+   if (arg.type)
+     hfa_mode = hfa_element_mode (arg.type, 0);
+ 
+   /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
+      and unprototyped hfas are passed specially.  */
+   if (hfa_mode != VOIDmode && (! cum->prototype || arg.named))
+     {
+       int fp_regs = cum->fp_regs;
+       /* This is the original value of cum->words + offset.  */
+       int int_regs = cum->words - words;
+       int hfa_size = GET_MODE_SIZE (hfa_mode);
+       int byte_size;
+       int args_byte_size;
+ 
+       /* If prototyped, pass it in FR regs then GR regs.
+ 	 If not prototyped, pass it in both FR and GR regs.
+ 
+ 	 If this is an SFmode aggregate, then it is possible to run out of
+ 	 FR regs while GR regs are still left.  In that case, we pass the
+ 	 remaining part in the GR regs.  */
+ 
+       /* Fill the FP regs.  We do this always.  We stop if we reach the end
+ 	 of the argument, the last FP register, or the last argument slot.  */
+ 
+       byte_size = arg.promoted_size_in_bytes ();
+       args_byte_size = int_regs * UNITS_PER_WORD;
+       offset = 0;
+       for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
+ 	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
+ 	{
+ 	  offset += hfa_size;
+ 	  args_byte_size += hfa_size;
+ 	  fp_regs++;
+ 	}
+ 
+       cum->fp_regs = fp_regs;
+     }
+ 
+   /* Integral and aggregates go in general registers.  So do TFmode FP values.
+      If we have run out of FR registers, then other FP values must also go in
+      general registers.  This can happen when we have a SFmode HFA.  */
+   else if (arg.mode == TFmode || arg.mode == TCmode
+            || !FLOAT_MODE_P (arg.mode)
+ 	   || cum->fp_regs == MAX_ARGUMENT_SLOTS)
+     cum->int_regs = cum->words;
+ 
+   /* If there is a prototype, then FP values go in a FR register when
+      named, and in a GR register when unnamed.  */
+   else if (cum->prototype)
+     {
+       if (! arg.named)
+ 	cum->int_regs = cum->words;
+       else
+ 	/* ??? Complex types should not reach here.  */
+ 	cum->fp_regs
+ 	  += (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
+     }
+   /* If there is no prototype, then FP values go in both FR and GR
+      registers.  */
+   else
+     {
+       /* ??? Complex types should not reach here.  */
+       cum->fp_regs
+ 	+= (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
+       cum->int_regs = cum->words;
+     }
+ }
+ 
+ /* Arguments with alignment larger than 8 bytes start at the next even
+    boundary.  On ILP32 HPUX, TFmode arguments start on next even boundary
+    even though their normal alignment is 8 bytes.  See ia64_function_arg.  */
+ 
+ static unsigned int
+ ia64_function_arg_boundary (machine_mode mode, const_tree type)
+ {
+   if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
+     return PARM_BOUNDARY * 2;
+ 
+   if (type)
+     {
+       if (TYPE_ALIGN (type) > PARM_BOUNDARY)
+         return PARM_BOUNDARY * 2;
+       else
+         return PARM_BOUNDARY;
+     }
+ 
+   if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
+     return PARM_BOUNDARY * 2;
+   else
+     return PARM_BOUNDARY;
+ }
+ 
+ /* True if it is OK to do sibling call optimization for the specified
+    call expression EXP.  DECL will be the called function, or NULL if
+    this is an indirect call.  */
+ static bool
+ ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+ {
+   /* We can't perform a sibcall if the current function has the syscall_linkage
+      attribute.  */
+   if (lookup_attribute ("syscall_linkage",
+ 			TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
+     return false;
+ 
+   /* We must always return with our current GP.  This means we can
+      only sibcall to functions defined in the current module unless
+      TARGET_CONST_GP is set to true.  */
+   return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
+ }
+ 
+ 
+ /* Implement va_arg.  */
+ 
+ static tree
+ ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
+ 		      gimple_seq *post_p)
+ {
+   /* Variable sized types are passed by reference.  */
+   if (pass_va_arg_by_reference (type))
+     {
+       tree ptrtype = build_pointer_type (type);
+       tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
+       return build_va_arg_indirect_ref (addr);
+     }
+ 
+   /* Aggregate arguments with alignment larger than 8 bytes start at
+      the next even boundary.  Integer and floating point arguments
+      do so if they are larger than 8 bytes, whether or not they are
+      also aligned larger than 8 bytes.  */
+   if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
+       ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
+     {
+       tree t = fold_build_pointer_plus_hwi (valist, 2 * UNITS_PER_WORD - 1);
+       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
+ 		  build_int_cst (TREE_TYPE (t), -2 * UNITS_PER_WORD));
+       gimplify_assign (unshare_expr (valist), t, pre_p);
+     }
+ 
+   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
+ }
+ 
+ /* Return 1 if function return value returned in memory.  Return 0 if it is
+    in a register.  */
+ 
+ static bool
+ ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
+ {
+   machine_mode mode;
+   machine_mode hfa_mode;
+   HOST_WIDE_INT byte_size;
+ 
+   mode = TYPE_MODE (valtype);
+   byte_size = GET_MODE_SIZE (mode);
+   if (mode == BLKmode)
+     {
+       byte_size = int_size_in_bytes (valtype);
+       if (byte_size < 0)
+ 	return true;
+     }
+ 
+   /* Hfa's with up to 8 elements are returned in the FP argument registers.  */
+ 
+   hfa_mode = hfa_element_mode (valtype, 0);
+   if (hfa_mode != VOIDmode)
+     {
+       int hfa_size = GET_MODE_SIZE (hfa_mode);
+ 
+       if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
+ 	return true;
+       else
+ 	return false;
+     }
+   else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
+     return true;
+   else
+     return false;
+ }
+ 
+ /* Return rtx for register that holds the function return value.  */
+ 
+ static rtx
+ ia64_function_value (const_tree valtype,
+ 		     const_tree fn_decl_or_type,
+ 		     bool outgoing ATTRIBUTE_UNUSED)
+ {
+   machine_mode mode;
+   machine_mode hfa_mode;
+   int unsignedp;
+   const_tree func = fn_decl_or_type;
+ 
+   if (fn_decl_or_type
+       && !DECL_P (fn_decl_or_type))
+     func = NULL;
+   
+   mode = TYPE_MODE (valtype);
+   hfa_mode = hfa_element_mode (valtype, 0);
+ 
+   if (hfa_mode != VOIDmode)
+     {
+       rtx loc[8];
+       int i;
+       int hfa_size;
+       int byte_size;
+       int offset;
+ 
+       hfa_size = GET_MODE_SIZE (hfa_mode);
+       byte_size = ((mode == BLKmode)
+ 		   ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
+       offset = 0;
+       for (i = 0; offset < byte_size; i++)
+ 	{
+ 	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
+ 				      gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
+ 				      GEN_INT (offset));
+ 	  offset += hfa_size;
+ 	}
+       return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
+     }
+   else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
+     return gen_rtx_REG (mode, FR_ARG_FIRST);
+   else
+     {
+       bool need_parallel = false;
+ 
+       /* In big-endian mode, we need to manage the layout of aggregates
+ 	 in the registers so that we get the bits properly aligned in
+ 	 the highpart of the registers.  */
+       if (BYTES_BIG_ENDIAN
+ 	  && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
+ 	need_parallel = true;
+ 
+       /* Something like struct S { long double x; char a[0] } is not an
+ 	 HFA structure, and therefore doesn't go in fp registers.  But
+ 	 the middle-end will give it XFmode anyway, and XFmode values
+ 	 don't normally fit in integer registers.  So we need to smuggle
+ 	 the value inside a parallel.  */
+       else if (mode == XFmode || mode == XCmode || mode == RFmode)
+ 	need_parallel = true;
+ 
+       if (need_parallel)
+ 	{
+ 	  rtx loc[8];
+ 	  int offset;
+ 	  int bytesize;
+ 	  int i;
+ 
+ 	  offset = 0;
+ 	  bytesize = int_size_in_bytes (valtype);
+ 	  /* An empty PARALLEL is invalid here, but the return value
+ 	     doesn't matter for empty structs.  */
+ 	  if (bytesize == 0)
+ 	    return gen_rtx_REG (mode, GR_RET_FIRST);
+ 	  for (i = 0; offset < bytesize; i++)
+ 	    {
+ 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
+ 					  gen_rtx_REG (DImode,
+ 						       GR_RET_FIRST + i),
+ 					  GEN_INT (offset));
+ 	      offset += UNITS_PER_WORD;
+ 	    }
+ 	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
+ 	}
+ 
+       mode = promote_function_mode (valtype, mode, &unsignedp,
+                                     func ? TREE_TYPE (func) : NULL_TREE,
+                                     true);
+ 
+       return gen_rtx_REG (mode, GR_RET_FIRST);
+     }
+ }
+ 
+ /* Worker function for TARGET_LIBCALL_VALUE.  */
+ 
+ static rtx
+ ia64_libcall_value (machine_mode mode,
+ 		    const_rtx fun ATTRIBUTE_UNUSED)
+ {
+   return gen_rtx_REG (mode,
+ 		      (((GET_MODE_CLASS (mode) == MODE_FLOAT
+ 			 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
+ 			&& (mode) != TFmode)
+ 		       ? FR_RET_FIRST : GR_RET_FIRST));
+ }
+ 
+ /* Worker function for FUNCTION_VALUE_REGNO_P.  */
+ 
+ static bool
+ ia64_function_value_regno_p (const unsigned int regno)
+ {
+   return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST)
+           || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
+ }
+ 
+ /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
+    We need to emit DTP-relative relocations.  */
+ 
+ static void
+ ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
+ {
+   gcc_assert (size == 4 || size == 8);
+   if (size == 4)
+     fputs ("\tdata4.ua\t@dtprel(", file);
+   else
+     fputs ("\tdata8.ua\t@dtprel(", file);
+   output_addr_const (file, x);
+   fputs (")", file);
+ }
+ 
+ /* Print a memory address as an operand to reference that memory location.  */
+ 
+ /* ??? Do we need this?  It gets used only for 'a' operands.  We could perhaps
+    also call this from ia64_print_operand for memory addresses.  */
+ 
+ static void
+ ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
+ 			    machine_mode /*mode*/,
+ 			    rtx address ATTRIBUTE_UNUSED)
+ {
+ }
+ 
+ /* Print an operand to an assembler instruction.
+    C	Swap and print a comparison operator.
+    D	Print an FP comparison operator.
+    E    Print 32 - constant, for SImode shifts as extract.
+    e    Print 64 - constant, for DImode rotates.
+    F	A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
+         a floating point register emitted normally.
+    G	A floating point constant.
+    I	Invert a predicate register by adding 1.
+    J    Select the proper predicate register for a condition.
+    j    Select the inverse predicate register for a condition.
+    O	Append .acq for volatile load.
+    P	Postincrement of a MEM.
+    Q	Append .rel for volatile store.
+    R	Print .s .d or nothing for a single, double or no truncation.
+    S	Shift amount for shladd instruction.
+    T	Print an 8-bit sign extended number (K) as a 32-bit unsigned number
+ 	for Intel assembler.
+    U	Print an 8-bit sign extended number (K) as a 64-bit unsigned number
+ 	for Intel assembler.
+    X	A pair of floating point registers.
+    r	Print register name, or constant 0 as r0.  HP compatibility for
+ 	Linux kernel.
+    v    Print vector constant value as an 8-byte integer value.  */
+ 
+ static void
+ ia64_print_operand (FILE * file, rtx x, int code)
+ {
+   const char *str;
+ 
+   switch (code)
+     {
+     case 0:
+       /* Handled below.  */
+       break;
+ 
+     case 'C':
+       {
+ 	enum rtx_code c = swap_condition (GET_CODE (x));
+ 	fputs (GET_RTX_NAME (c), file);
+ 	return;
+       }
+ 
+     case 'D':
+       switch (GET_CODE (x))
+ 	{
+ 	case NE:
+ 	  str = "neq";
+ 	  break;
+ 	case UNORDERED:
+ 	  str = "unord";
+ 	  break;
+ 	case ORDERED:
+ 	  str = "ord";
+ 	  break;
+ 	case UNLT:
+ 	  str = "nge";
+ 	  break;
+ 	case UNLE:
+ 	  str = "ngt";
+ 	  break;
+ 	case UNGT:
+ 	  str = "nle";
+ 	  break;
+ 	case UNGE:
+ 	  str = "nlt";
+ 	  break;
+ 	case UNEQ:
+ 	case LTGT:
+ 	  gcc_unreachable ();
+ 	default:
+ 	  str = GET_RTX_NAME (GET_CODE (x));
+ 	  break;
+ 	}
+       fputs (str, file);
+       return;
+ 
+     case 'E':
+       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
+       return;
+ 
+     case 'e':
+       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
+       return;
+ 
+     case 'F':
+       if (x == CONST0_RTX (GET_MODE (x)))
+ 	str = reg_names [FR_REG (0)];
+       else if (x == CONST1_RTX (GET_MODE (x)))
+ 	str = reg_names [FR_REG (1)];
+       else
+ 	{
+ 	  gcc_assert (GET_CODE (x) == REG);
+ 	  str = reg_names [REGNO (x)];
+ 	}
+       fputs (str, file);
+       return;
+ 
+     case 'G':
+       {
+ 	long val[4];
+ 	real_to_target (val, CONST_DOUBLE_REAL_VALUE (x), GET_MODE (x));
+ 	if (GET_MODE (x) == SFmode)
+ 	  fprintf (file, "0x%08lx", val[0] & 0xffffffff);
+ 	else if (GET_MODE (x) == DFmode)
+ 	  fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
+ 					  & 0xffffffff,
+ 					 (WORDS_BIG_ENDIAN ? val[1] : val[0])
+ 					  & 0xffffffff);
+ 	else
+ 	  output_operand_lossage ("invalid %%G mode");
+       }
+       return;
+ 
+     case 'I':
+       fputs (reg_names [REGNO (x) + 1], file);
+       return;
+ 
+     case 'J':
+     case 'j':
+       {
+ 	unsigned int regno = REGNO (XEXP (x, 0));
+ 	if (GET_CODE (x) == EQ)
+ 	  regno += 1;
+ 	if (code == 'j')
+ 	  regno ^= 1;
+         fputs (reg_names [regno], file);
+       }
+       return;
+ 
+     case 'O':
+       if (MEM_VOLATILE_P (x))
+ 	fputs(".acq", file);
+       return;
+ 
+     case 'P':
+       {
+ 	HOST_WIDE_INT value;
+ 
+ 	switch (GET_CODE (XEXP (x, 0)))
+ 	  {
+ 	  default:
+ 	    return;
+ 
+ 	  case POST_MODIFY:
+ 	    x = XEXP (XEXP (XEXP (x, 0), 1), 1);
+ 	    if (GET_CODE (x) == CONST_INT)
+ 	      value = INTVAL (x);
+ 	    else
+ 	      {
+ 		gcc_assert (GET_CODE (x) == REG);
+ 		fprintf (file, ", %s", reg_names[REGNO (x)]);
+ 		return;
+ 	      }
+ 	    break;
+ 
+ 	  case POST_INC:
+ 	    value = GET_MODE_SIZE (GET_MODE (x));
+ 	    break;
+ 
+ 	  case POST_DEC:
+ 	    value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
+ 	    break;
+ 	  }
+ 
+ 	fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
+ 	return;
+       }
+ 
+     case 'Q':
+       if (MEM_VOLATILE_P (x))
+ 	fputs(".rel", file);
+       return;
+ 
+     case 'R':
+       if (x == CONST0_RTX (GET_MODE (x)))
+ 	fputs(".s", file);
+       else if (x == CONST1_RTX (GET_MODE (x)))
+ 	fputs(".d", file);
+       else if (x == CONST2_RTX (GET_MODE (x)))
+ 	;
+       else
+ 	output_operand_lossage ("invalid %%R value");
+       return;
+ 
+     case 'S':
+       fprintf (file, "%d", exact_log2 (INTVAL (x)));
+       return;
+ 
+     case 'T':
+       if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
+ 	{
+ 	  fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
+ 	  return;
+ 	}
+       break;
+ 
+     case 'U':
+       if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
+ 	{
+ 	  const char *prefix = "0x";
+ 	  if (INTVAL (x) & 0x80000000)
+ 	    {
+ 	      fprintf (file, "0xffffffff");
+ 	      prefix = "";
+ 	    }
+ 	  fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
+ 	  return;
+ 	}
+       break;
+ 
+     case 'X':
+       {
+ 	unsigned int regno = REGNO (x);
+ 	fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
+       }
+       return;
+ 
+     case 'r':
+       /* If this operand is the constant zero, write it as register zero.
+ 	 Any register, zero, or CONST_INT value is OK here.  */
+       if (GET_CODE (x) == REG)
+ 	fputs (reg_names[REGNO (x)], file);
+       else if (x == CONST0_RTX (GET_MODE (x)))
+ 	fputs ("r0", file);
+       else if (GET_CODE (x) == CONST_INT)
+ 	output_addr_const (file, x);
+       else
+ 	output_operand_lossage ("invalid %%r value");
+       return;
+ 
+     case 'v':
+       gcc_assert (GET_CODE (x) == CONST_VECTOR);
+       x = simplify_subreg (DImode, x, GET_MODE (x), 0);
+       break;
+ 
+     case '+':
+       {
+ 	const char *which;
+ 
+ 	/* For conditional branches, returns or calls, substitute
+ 	   sptk, dptk, dpnt, or spnt for %s.  */
+ 	x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
+ 	if (x)
+ 	  {
+ 	    int pred_val = profile_probability::from_reg_br_prob_note
+ 				 (XINT (x, 0)).to_reg_br_prob_base ();
+ 
+ 	    /* Guess top and bottom 10% statically predicted.  */
+ 	    if (pred_val < REG_BR_PROB_BASE / 50
+ 		&& br_prob_note_reliable_p (x))
+ 	      which = ".spnt";
+ 	    else if (pred_val < REG_BR_PROB_BASE / 2)
+ 	      which = ".dpnt";
+ 	    else if (pred_val < REG_BR_PROB_BASE / 100 * 98
+ 		     || !br_prob_note_reliable_p (x))
+ 	      which = ".dptk";
+ 	    else
+ 	      which = ".sptk";
+ 	  }
+ 	else if (CALL_P (current_output_insn))
+ 	  which = ".sptk";
+ 	else
+ 	  which = ".dptk";
+ 
+ 	fputs (which, file);
+ 	return;
+       }
+ 
+     case ',':
+       x = current_insn_predicate;
+       if (x)
+ 	{
+ 	  unsigned int regno = REGNO (XEXP (x, 0));
+ 	  if (GET_CODE (x) == EQ)
+ 	    regno += 1;
+           fprintf (file, "(%s) ", reg_names [regno]);
+ 	}
+       return;
+ 
+     default:
+       output_operand_lossage ("ia64_print_operand: unknown code");
+       return;
+     }
+ 
+   switch (GET_CODE (x))
+     {
+       /* This happens for the spill/restore instructions.  */
+     case POST_INC:
+     case POST_DEC:
+     case POST_MODIFY:
+       x = XEXP (x, 0);
+       /* fall through */
+ 
+     case REG:
+       fputs (reg_names [REGNO (x)], file);
+       break;
+ 
+     case MEM:
+       {
+ 	rtx addr = XEXP (x, 0);
+ 	if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
+ 	  addr = XEXP (addr, 0);
+ 	fprintf (file, "[%s]", reg_names [REGNO (addr)]);
+ 	break;
+       }
+ 
+     default:
+       output_addr_const (file, x);
+       break;
+     }
+ 
+   return;
+ }
+ 
+ /* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P.  */
+ 
+ static bool
+ ia64_print_operand_punct_valid_p (unsigned char code)
+ {
+   return (code == '+' || code == ',');
+ }
+ 
+ /* Compute a (partial) cost for rtx X.  Return true if the complete
+    cost has been computed, and false if subexpressions should be
+    scanned.  In either case, *TOTAL contains the cost result.  */
+ /* ??? This is incomplete.  */
+ 
+ static bool
+ ia64_rtx_costs (rtx x, machine_mode mode, int outer_code,
+ 		int opno ATTRIBUTE_UNUSED,
+ 		int *total, bool speed ATTRIBUTE_UNUSED)
+ {
+   int code = GET_CODE (x);
+ 
+   switch (code)
+     {
+     case CONST_INT:
+       switch (outer_code)
+         {
+         case SET:
+ 	  *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
+ 	  return true;
+         case PLUS:
+ 	  if (satisfies_constraint_I (x))
+ 	    *total = 0;
+ 	  else if (satisfies_constraint_J (x))
+ 	    *total = 1;
+ 	  else
+ 	    *total = COSTS_N_INSNS (1);
+ 	  return true;
+         default:
+ 	  if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
+ 	    *total = 0;
+ 	  else
+ 	    *total = COSTS_N_INSNS (1);
+ 	  return true;
+ 	}
+ 
+     case CONST_DOUBLE:
+       *total = COSTS_N_INSNS (1);
+       return true;
+ 
+     case CONST:
+     case SYMBOL_REF:
+     case LABEL_REF:
+       *total = COSTS_N_INSNS (3);
+       return true;
+ 
+     case FMA:
+       *total = COSTS_N_INSNS (4);
+       return true;
+ 
+     case MULT:
+       /* For multiplies wider than HImode, we have to go to the FPU,
+          which normally involves copies.  Plus there's the latency
+          of the multiply itself, and the latency of the instructions to
+          transfer integer regs to FP regs.  */
+       if (FLOAT_MODE_P (mode))
+ 	*total = COSTS_N_INSNS (4);
+       else if (GET_MODE_SIZE (mode) > 2)
+         *total = COSTS_N_INSNS (10);
+       else
+ 	*total = COSTS_N_INSNS (2);
+       return true;
+ 
+     case PLUS:
+     case MINUS:
+       if (FLOAT_MODE_P (mode))
+ 	{
+ 	  *total = COSTS_N_INSNS (4);
+ 	  return true;
+ 	}
+       /* FALLTHRU */
+ 
+     case ASHIFT:
+     case ASHIFTRT:
+     case LSHIFTRT:
+       *total = COSTS_N_INSNS (1);
+       return true;
+ 
+     case DIV:
+     case UDIV:
+     case MOD:
+     case UMOD:
+       /* We make divide expensive, so that divide-by-constant will be
+          optimized to a multiply.  */
+       *total = COSTS_N_INSNS (60);
+       return true;
+ 
+     default:
+       return false;
+     }
+ }
+ 
+ /* Calculate the cost of moving data from a register in class FROM to
+    one in class TO, using MODE.  */
+ 
+ static int
+ ia64_register_move_cost (machine_mode mode, reg_class_t from,
+ 			 reg_class_t to)
+ {
+   /* ADDL_REGS is the same as GR_REGS for movement purposes.  */
+   if (to == ADDL_REGS)
+     to = GR_REGS;
+   if (from == ADDL_REGS)
+     from = GR_REGS;
+ 
+   /* All costs are symmetric, so reduce cases by putting the
+      lower number class as the destination.  */
+   if (from < to)
+     {
+       reg_class_t tmp = to;
+       to = from, from = tmp;
+     }
+ 
+   /* Moving from FR<->GR in XFmode must be more expensive than 2,
+      so that we get secondary memory reloads.  Between FR_REGS,
+      we have to make this at least as expensive as memory_move_cost
+      to avoid spectacularly poor register class preferencing.  */
+   if (mode == XFmode || mode == RFmode)
+     {
+       if (to != GR_REGS || from != GR_REGS)
+         return memory_move_cost (mode, to, false);
+       else
+ 	return 3;
+     }
+ 
+   switch (to)
+     {
+     case PR_REGS:
+       /* Moving between PR registers takes two insns.  */
+       if (from == PR_REGS)
+ 	return 3;
+       /* Moving between PR and anything but GR is impossible.  */
+       if (from != GR_REGS)
+ 	return memory_move_cost (mode, to, false);
+       break;
+ 
+     case BR_REGS:
+       /* Moving between BR and anything but GR is impossible.  */
+       if (from != GR_REGS && from != GR_AND_BR_REGS)
+ 	return memory_move_cost (mode, to, false);
+       break;
+ 
+     case AR_I_REGS:
+     case AR_M_REGS:
+       /* Moving between AR and anything but GR is impossible.  */
+       if (from != GR_REGS)
+ 	return memory_move_cost (mode, to, false);
+       break;
+ 
+     case GR_REGS:
+     case FR_REGS:
+     case FP_REGS:
+     case GR_AND_FR_REGS:
+     case GR_AND_BR_REGS:
+     case ALL_REGS:
+       break;
+ 
+     default:
+       gcc_unreachable ();
+     }
+ 
+   return 2;
+ }
+ 
+ /* Calculate the cost of moving data of MODE from a register to or from
+    memory.  */
+ 
+ static int
+ ia64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
+ 		       reg_class_t rclass,
+ 		       bool in ATTRIBUTE_UNUSED)
+ {
+   if (rclass == GENERAL_REGS
+       || rclass == FR_REGS
+       || rclass == FP_REGS
+       || rclass == GR_AND_FR_REGS)
+     return 4;
+   else
+     return 10;
+ }
+ 
+ /* Implement TARGET_PREFERRED_RELOAD_CLASS.  Place additional restrictions
+    on RCLASS to use when copying X into that class.  */
+ 
+ static reg_class_t
+ ia64_preferred_reload_class (rtx x, reg_class_t rclass)
+ {
+   switch (rclass)
+     {
+     case FR_REGS:
+     case FP_REGS:
+       /* Don't allow volatile mem reloads into floating point registers.
+ 	 This is defined to force reload to choose the r/m case instead
+ 	 of the f/f case when reloading (set (reg fX) (mem/v)).  */
+       if (MEM_P (x) && MEM_VOLATILE_P (x))
+ 	return NO_REGS;
+       
+       /* Force all unrecognized constants into the constant pool.  */
+       if (CONSTANT_P (x))
+ 	return NO_REGS;
+       break;
+ 
+     case AR_M_REGS:
+     case AR_I_REGS:
+       if (!OBJECT_P (x))
+ 	return NO_REGS;
+       break;
+ 
+     default:
+       break;
+     }
+ 
+   return rclass;
+ }
+ 
+ /* This function returns the register class required for a secondary
+    register when copying between one of the registers in RCLASS, and X,
+    using MODE.  A return value of NO_REGS means that no secondary register
+    is required.  */
+ 
+ enum reg_class
+ ia64_secondary_reload_class (enum reg_class rclass,
+ 			     machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+ {
+   int regno = -1;
+ 
+   if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
+     regno = true_regnum (x);
+ 
+   switch (rclass)
+     {
+     case BR_REGS:
+     case AR_M_REGS:
+     case AR_I_REGS:
+       /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
+ 	 interaction.  We end up with two pseudos with overlapping lifetimes
+ 	 both of which are equiv to the same constant, and both which need
+ 	 to be in BR_REGS.  This seems to be a cse bug.  cse_basic_block_end
+ 	 changes depending on the path length, which means the qty_first_reg
+ 	 check in make_regs_eqv can give different answers at different times.
+ 	 At some point I'll probably need a reload_indi pattern to handle
+ 	 this.
+ 
+ 	 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
+ 	 wound up with a FP register from GR_AND_FR_REGS.  Extend that to all
+ 	 non-general registers for good measure.  */
+       if (regno >= 0 && ! GENERAL_REGNO_P (regno))
+ 	return GR_REGS;
+ 
+       /* This is needed if a pseudo used as a call_operand gets spilled to a
+ 	 stack slot.  */
+       if (GET_CODE (x) == MEM)
+ 	return GR_REGS;
+       break;
+ 
+     case FR_REGS:
+     case FP_REGS:
+       /* Need to go through general registers to get to other class regs.  */
+       if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
+ 	return GR_REGS;
+ 
+       /* This can happen when a paradoxical subreg is an operand to the
+ 	 muldi3 pattern.  */
+       /* ??? This shouldn't be necessary after instruction scheduling is
+ 	 enabled, because paradoxical subregs are not accepted by
+ 	 register_operand when INSN_SCHEDULING is defined.  Or alternatively,
+ 	 stop the paradoxical subreg stupidity in the *_operand functions
+ 	 in recog.cc.  */
+       if (GET_CODE (x) == MEM
+ 	  && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
+ 	      || GET_MODE (x) == QImode))
+ 	return GR_REGS;
+ 
+       /* This can happen because of the ior/and/etc patterns that accept FP
+ 	 registers as operands.  If the third operand is a constant, then it
+ 	 needs to be reloaded into a FP register.  */
+       if (GET_CODE (x) == CONST_INT)
+ 	return GR_REGS;
+ 
+       /* This can happen because of register elimination in a muldi3 insn.
+ 	 E.g. `26107 * (unsigned long)&u'.  */
+       if (GET_CODE (x) == PLUS)
+ 	return GR_REGS;
+       break;
+ 
+     case PR_REGS:
+       /* ??? This happens if we cse/gcse a BImode value across a call,
+ 	 and the function has a nonlocal goto.  This is because global
+ 	 does not allocate call crossing pseudos to hard registers when
+ 	 crtl->has_nonlocal_goto is true.  This is relatively
+ 	 common for C++ programs that use exceptions.  To reproduce,
+ 	 return NO_REGS and compile libstdc++.  */
+       if (GET_CODE (x) == MEM)
+ 	return GR_REGS;
+ 
+       /* This can happen when we take a BImode subreg of a DImode value,
+ 	 and that DImode value winds up in some non-GR register.  */
+       if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
+ 	return GR_REGS;
+       break;
+ 
+     default:
+       break;
+     }
+ 
+   return NO_REGS;
+ }
+ 
+ 
+ /* Implement targetm.unspec_may_trap_p hook.  */
+ static int
+ ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
+ {
+   switch (XINT (x, 1))
+     {
+     case UNSPEC_LDA:
+     case UNSPEC_LDS:
+     case UNSPEC_LDSA:
+     case UNSPEC_LDCCLR:
+     case UNSPEC_CHKACLR:
+     case UNSPEC_CHKS:
+       /* These unspecs are just wrappers.  */
+       return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
+     }
+ 
+   return default_unspec_may_trap_p (x, flags);
+ }
+ 
+ 
+ /* Parse the -mfixed-range= option string.  */
+ 
+ static void
+ fix_range (const char *const_str)
+ {
+   int i, first, last;
+   char *str, *dash, *comma;
+ 
+   /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
+      REG2 are either register names or register numbers.  The effect
+      of this option is to mark the registers in the range from REG1 to
+      REG2 as ``fixed'' so they won't be used by the compiler.  This is
+      used, e.g., to ensure that kernel mode code doesn't use f32-f127.  */
+ 
+   i = strlen (const_str);
+   str = (char *) alloca (i + 1);
+   memcpy (str, const_str, i + 1);
+ 
+   while (1)
+     {
+       dash = strchr (str, '-');
+       if (!dash)
+ 	{
+ 	  warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
+ 	  return;
+ 	}
+       *dash = '\0';
+ 
+       comma = strchr (dash + 1, ',');
+       if (comma)
+ 	*comma = '\0';
+ 
+       first = decode_reg_name (str);
+       if (first < 0)
+ 	{
+ 	  warning (0, "unknown register name: %s", str);
+ 	  return;
+ 	}
+ 
+       last = decode_reg_name (dash + 1);
+       if (last < 0)
+ 	{
+ 	  warning (0, "unknown register name: %s", dash + 1);
+ 	  return;
+ 	}
+ 
+       *dash = '-';
+ 
+       if (first > last)
+ 	{
+ 	  warning (0, "%s-%s is an empty range", str, dash + 1);
+ 	  return;
+ 	}
+ 
+       for (i = first; i <= last; ++i)
+ 	fixed_regs[i] = 1;
+ 
+       if (!comma)
+ 	break;
+ 
+       *comma = ',';
+       str = comma + 1;
+     }
+ }
+ 
+ /* Implement TARGET_OPTION_OVERRIDE.  */
+ 
+ static void
+ ia64_option_override (void)
+ {
+   unsigned int i;
+   cl_deferred_option *opt;
+   vec<cl_deferred_option> *v
+     = (vec<cl_deferred_option> *) ia64_deferred_options;
+ 
+   if (v)
+     FOR_EACH_VEC_ELT (*v, i, opt)
+       {
+ 	switch (opt->opt_index)
+ 	  {
+ 	  case OPT_mfixed_range_:
+ 	    fix_range (opt->arg);
+ 	    break;
+ 
+ 	  default:
+ 	    gcc_unreachable ();
+ 	  }
+       }
+ 
+   if (TARGET_AUTO_PIC)
+     target_flags |= MASK_CONST_GP;
+ 
+   /* Numerous experiment shows that IRA based loop pressure
+      calculation works better for RTL loop invariant motion on targets
+      with enough (>= 32) registers.  It is an expensive optimization.
+      So it is on only for peak performance.  */
+   if (optimize >= 3)
+     flag_ira_loop_pressure = 1;
+ 
+ 
+   ia64_section_threshold = (OPTION_SET_P (g_switch_value)
+ 			    ? g_switch_value
+ 			    : IA64_DEFAULT_GVALUE);
+ 
+   init_machine_status = ia64_init_machine_status;
+ 
+   if (flag_align_functions && !str_align_functions)
+     str_align_functions = "64";
+   if (flag_align_loops && !str_align_loops)
+     str_align_loops = "32";
+   if (TARGET_ABI_OPEN_VMS)
+     flag_no_common = 1;
+ 
+   ia64_override_options_after_change();
+ }
+ 
+ /* Implement targetm.override_options_after_change.  */
+ 
+ static void
+ ia64_override_options_after_change (void)
+ {
+   if (optimize >= 3
+       && !OPTION_SET_P (flag_selective_scheduling)
+       && !OPTION_SET_P (flag_selective_scheduling2))
+     {
+       flag_selective_scheduling2 = 1;
+       flag_sel_sched_pipelining = 1;
+     }
+   if (mflag_sched_control_spec == 2)
+     {
+       /* Control speculation is on by default for the selective scheduler,
+          but not for the Haifa scheduler.  */
+       mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
+     }
+   if (flag_sel_sched_pipelining && flag_auto_inc_dec)
+     {
+       /* FIXME: remove this when we'd implement breaking autoinsns as
+          a transformation.  */
+       flag_auto_inc_dec = 0;
+     }
+ }
+ 
+ /* Initialize the record of emitted frame related registers.  */
+ 
+ void ia64_init_expanders (void)
+ {
+   memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
+ }
+ 
+ static struct machine_function *
+ ia64_init_machine_status (void)
+ {
+   return ggc_cleared_alloc<machine_function> ();
+ }
+ 
+ static enum attr_itanium_class ia64_safe_itanium_class (rtx_insn *);
+ static enum attr_type ia64_safe_type (rtx_insn *);
+ 
+ static enum attr_itanium_class
+ ia64_safe_itanium_class (rtx_insn *insn)
+ {
+   if (recog_memoized (insn) >= 0)
+     return get_attr_itanium_class (insn);
+   else if (DEBUG_INSN_P (insn))
+     return ITANIUM_CLASS_IGNORE;
+   else
+     return ITANIUM_CLASS_UNKNOWN;
+ }
+ 
+ static enum attr_type
+ ia64_safe_type (rtx_insn *insn)
+ {
+   if (recog_memoized (insn) >= 0)
+     return get_attr_type (insn);
+   else
+     return TYPE_UNKNOWN;
+ }
+ 
+ /* The following collection of routines emit instruction group stop bits as
+    necessary to avoid dependencies.  */
+ 
+ /* Need to track some additional registers as far as serialization is
+    concerned so we can properly handle br.call and br.ret.  We could
+    make these registers visible to gcc, but since these registers are
+    never explicitly used in gcc generated code, it seems wasteful to
+    do so (plus it would make the call and return patterns needlessly
+    complex).  */
+ #define REG_RP		(BR_REG (0))
+ #define REG_AR_CFM	(FIRST_PSEUDO_REGISTER + 1)
+ /* This is used for volatile asms which may require a stop bit immediately
+    before and after them.  */
+ #define REG_VOLATILE	(FIRST_PSEUDO_REGISTER + 2)
+ #define AR_UNAT_BIT_0	(FIRST_PSEUDO_REGISTER + 3)
+ #define NUM_REGS	(AR_UNAT_BIT_0 + 64)
+ 
+ /* For each register, we keep track of how it has been written in the
+    current instruction group.
+ 
+    If a register is written unconditionally (no qualifying predicate),
+    WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
+ 
+    If a register is written if its qualifying predicate P is true, we
+    set WRITE_COUNT to 1 and FIRST_PRED to P.  Later on, the same register
+    may be written again by the complement of P (P^1) and when this happens,
+    WRITE_COUNT gets set to 2.
+ 
+    The result of this is that whenever an insn attempts to write a register
+    whose WRITE_COUNT is two, we need to issue an insn group barrier first.
+ 
+    If a predicate register is written by a floating-point insn, we set
+    WRITTEN_BY_FP to true.
+ 
+    If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
+    to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true.  */
+ 
+ #if GCC_VERSION >= 4000
+ #define RWS_FIELD_TYPE __extension__ unsigned short
+ #else
+ #define RWS_FIELD_TYPE unsigned int
+ #endif
+ struct reg_write_state
+ {
+   RWS_FIELD_TYPE write_count : 2;
+   RWS_FIELD_TYPE first_pred : 10;
+   RWS_FIELD_TYPE written_by_fp : 1;
+   RWS_FIELD_TYPE written_by_and : 1;
+   RWS_FIELD_TYPE written_by_or : 1;
+ };
+ 
+ /* Cumulative info for the current instruction group.  */
+ struct reg_write_state rws_sum[NUM_REGS];
+ #if CHECKING_P
+ /* Bitmap whether a register has been written in the current insn.  */
+ unsigned HOST_WIDEST_FAST_INT rws_insn
+   [(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
+    / HOST_BITS_PER_WIDEST_FAST_INT];
+ 
+ static inline void
+ rws_insn_set (unsigned int regno)
+ {
+   unsigned int elt = regno / HOST_BITS_PER_WIDEST_FAST_INT;
+   unsigned int bit = regno % HOST_BITS_PER_WIDEST_FAST_INT;
+   gcc_assert (!((rws_insn[elt] >> bit) & 1));
+   rws_insn[elt] |= (unsigned HOST_WIDEST_FAST_INT) 1 << bit;
+ }
+ 
+ static inline int
+ rws_insn_test (unsigned int regno)
+ {
+   unsigned int elt = regno / HOST_BITS_PER_WIDEST_FAST_INT;
+   unsigned int bit = regno % HOST_BITS_PER_WIDEST_FAST_INT;
+   return (rws_insn[elt] >> bit) & 1;
+ }
+ #else
+ /* When not checking, track just REG_AR_CFM and REG_VOLATILE.  */
+ unsigned char rws_insn[2];
+ 
+ static inline void
+ rws_insn_set (int regno)
+ {
+   if (regno == REG_AR_CFM)
+     rws_insn[0] = 1;
+   else if (regno == REG_VOLATILE)
+     rws_insn[1] = 1;
+ }
+ 
+ static inline int
+ rws_insn_test (int regno)
+ {
+   if (regno == REG_AR_CFM)
+     return rws_insn[0];
+   if (regno == REG_VOLATILE)
+     return rws_insn[1];
+   return 0;
+ }
+ #endif
+ 
+ /* Indicates whether this is the first instruction after a stop bit,
+    in which case we don't need another stop bit.  Without this,
+    ia64_variable_issue will die when scheduling an alloc.  */
+ static int first_instruction;
+ 
+ /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
+    RTL for one instruction.  */
+ struct reg_flags
+ {
+   unsigned int is_write : 1;	/* Is register being written?  */
+   unsigned int is_fp : 1;	/* Is register used as part of an fp op?  */
+   unsigned int is_branch : 1;	/* Is register used as part of a branch?  */
+   unsigned int is_and : 1;	/* Is register used as part of and.orcm?  */
+   unsigned int is_or : 1;	/* Is register used as part of or.andcm?  */
+   unsigned int is_sibcall : 1;	/* Is this a sibling or normal call?  */
+ };
+ 
+ static void rws_update (int, struct reg_flags, int);
+ static int rws_access_regno (int, struct reg_flags, int);
+ static int rws_access_reg (rtx, struct reg_flags, int);
+ static void update_set_flags (rtx, struct reg_flags *);
+ static int set_src_needs_barrier (rtx, struct reg_flags, int);
+ static int rtx_needs_barrier (rtx, struct reg_flags, int);
+ static void init_insn_group_barriers (void);
+ static int group_barrier_needed (rtx_insn *);
+ static int safe_group_barrier_needed (rtx_insn *);
+ static int in_safe_group_barrier;
+ 
+ /* Update *RWS for REGNO, which is being written by the current instruction,
+    with predicate PRED, and associated register flags in FLAGS.  */
+ 
+ static void
+ rws_update (int regno, struct reg_flags flags, int pred)
+ {
+   if (pred)
+     rws_sum[regno].write_count++;
+   else
+     rws_sum[regno].write_count = 2;
+   rws_sum[regno].written_by_fp |= flags.is_fp;
+   /* ??? Not tracking and/or across differing predicates.  */
+   rws_sum[regno].written_by_and = flags.is_and;
+   rws_sum[regno].written_by_or = flags.is_or;
+   rws_sum[regno].first_pred = pred;
+ }
+ 
+ /* Handle an access to register REGNO of type FLAGS using predicate register
+    PRED.  Update rws_sum array.  Return 1 if this access creates
+    a dependency with an earlier instruction in the same group.  */
+ 
+ static int
+ rws_access_regno (int regno, struct reg_flags flags, int pred)
+ {
+   int need_barrier = 0;
+ 
+   gcc_assert (regno < NUM_REGS);
+ 
+   if (! PR_REGNO_P (regno))
+     flags.is_and = flags.is_or = 0;
+ 
+   if (flags.is_write)
+     {
+       int write_count;
+ 
+       rws_insn_set (regno);
+       write_count = rws_sum[regno].write_count;
+ 
+       switch (write_count)
+ 	{
+ 	case 0:
+ 	  /* The register has not been written yet.  */
+ 	  if (!in_safe_group_barrier)
+ 	    rws_update (regno, flags, pred);
+ 	  break;
+ 
+ 	case 1:
+ 	  /* The register has been written via a predicate.  Treat
+ 	     it like a unconditional write and do not try to check
+ 	     for complementary pred reg in earlier write.  */
+ 	  if (flags.is_and && rws_sum[regno].written_by_and)
+ 	    ;
+ 	  else if (flags.is_or && rws_sum[regno].written_by_or)
+ 	    ;
+ 	  else
+ 	    need_barrier = 1;
+ 	  if (!in_safe_group_barrier)
+ 	    rws_update (regno, flags, pred);
+ 	  break;
+ 
+ 	case 2:
+ 	  /* The register has been unconditionally written already.  We
+ 	     need a barrier.  */
+ 	  if (flags.is_and && rws_sum[regno].written_by_and)
+ 	    ;
+ 	  else if (flags.is_or && rws_sum[regno].written_by_or)
+ 	    ;
+ 	  else
+ 	    need_barrier = 1;
+ 	  if (!in_safe_group_barrier)
+ 	    {
+ 	      rws_sum[regno].written_by_and = flags.is_and;
+ 	      rws_sum[regno].written_by_or = flags.is_or;
+ 	    }
+ 	  break;
+ 
+ 	default:
+ 	  gcc_unreachable ();
+ 	}
+     }
+   else
+     {
+       if (flags.is_branch)
+ 	{
+ 	  /* Branches have several RAW exceptions that allow to avoid
+ 	     barriers.  */
+ 
+ 	  if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
+ 	    /* RAW dependencies on branch regs are permissible as long
+ 	       as the writer is a non-branch instruction.  Since we
+ 	       never generate code that uses a branch register written
+ 	       by a branch instruction, handling this case is
+ 	       easy.  */
+ 	    return 0;
+ 
+ 	  if (REGNO_REG_CLASS (regno) == PR_REGS
+ 	      && ! rws_sum[regno].written_by_fp)
+ 	    /* The predicates of a branch are available within the
+ 	       same insn group as long as the predicate was written by
+ 	       something other than a floating-point instruction.  */
+ 	    return 0;
+ 	}
+ 
+       if (flags.is_and && rws_sum[regno].written_by_and)
+ 	return 0;
+       if (flags.is_or && rws_sum[regno].written_by_or)
+ 	return 0;
+ 
+       switch (rws_sum[regno].write_count)
+ 	{
+ 	case 0:
+ 	  /* The register has not been written yet.  */
+ 	  break;
+ 
+ 	case 1:
+ 	  /* The register has been written via a predicate, assume we
+ 	     need a barrier (don't check for complementary regs).  */
+ 	  need_barrier = 1;
+ 	  break;
+ 
+ 	case 2:
+ 	  /* The register has been unconditionally written already.  We
+ 	     need a barrier.  */
+ 	  need_barrier = 1;
+ 	  break;
+ 
+ 	default:
+ 	  gcc_unreachable ();
+ 	}
+     }
+ 
+   return need_barrier;
+ }
+ 
+ static int
+ rws_access_reg (rtx reg, struct reg_flags flags, int pred)
+ {
+   int regno = REGNO (reg);
+   int n = REG_NREGS (reg);
+ 
+   if (n == 1)
+     return rws_access_regno (regno, flags, pred);
+   else
+     {
+       int need_barrier = 0;
+       while (--n >= 0)
+ 	need_barrier |= rws_access_regno (regno + n, flags, pred);
+       return need_barrier;
+     }
+ }
+ 
+ /* Examine X, which is a SET rtx, and update the flags, the predicate, and
+    the condition, stored in *PFLAGS, *PPRED and *PCOND.  */
+ 
+ static void
+ update_set_flags (rtx x, struct reg_flags *pflags)
+ {
+   rtx src = SET_SRC (x);
+ 
+   switch (GET_CODE (src))
+     {
+     case CALL:
+       return;
+ 
+     case IF_THEN_ELSE:
+       /* There are four cases here:
+ 	 (1) The destination is (pc), in which case this is a branch,
+ 	 nothing here applies.
+ 	 (2) The destination is ar.lc, in which case this is a
+ 	 doloop_end_internal,
+ 	 (3) The destination is an fp register, in which case this is
+ 	 an fselect instruction.
+ 	 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case 
+ 	 this is a check load.
+ 	 In all cases, nothing we do in this function applies.  */
+       return;
+ 
+     default:
+       if (COMPARISON_P (src)
+ 	  && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
+ 	/* Set pflags->is_fp to 1 so that we know we're dealing
+ 	   with a floating point comparison when processing the
+ 	   destination of the SET.  */
+ 	pflags->is_fp = 1;
+ 
+       /* Discover if this is a parallel comparison.  We only handle
+ 	 and.orcm and or.andcm at present, since we must retain a
+ 	 strict inverse on the predicate pair.  */
+       else if (GET_CODE (src) == AND)
+ 	pflags->is_and = 1;
+       else if (GET_CODE (src) == IOR)
+ 	pflags->is_or = 1;
+ 
+       break;
+     }
+ }
+ 
+ /* Subroutine of rtx_needs_barrier; this function determines whether the
+    source of a given SET rtx found in X needs a barrier.  FLAGS and PRED
+    are as in rtx_needs_barrier.  COND is an rtx that holds the condition
+    for this insn.  */
+ 
+ static int
+ set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
+ {
+   int need_barrier = 0;
+   rtx dst;
+   rtx src = SET_SRC (x);
+ 
+   if (GET_CODE (src) == CALL)
+     /* We don't need to worry about the result registers that
+        get written by subroutine call.  */
+     return rtx_needs_barrier (src, flags, pred);
+   else if (SET_DEST (x) == pc_rtx)
+     {
+       /* X is a conditional branch.  */
+       /* ??? This seems redundant, as the caller sets this bit for
+ 	 all JUMP_INSNs.  */
+       if (!ia64_spec_check_src_p (src))
+ 	flags.is_branch = 1;
+       return rtx_needs_barrier (src, flags, pred);
+     }
+ 
+   if (ia64_spec_check_src_p (src))
+     /* Avoid checking one register twice (in condition 
+        and in 'then' section) for ldc pattern.  */
+     {
+       gcc_assert (REG_P (XEXP (src, 2)));
+       need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
+ 		  
+       /* We process MEM below.  */
+       src = XEXP (src, 1);
+     }
+ 
+   need_barrier |= rtx_needs_barrier (src, flags, pred);
+ 
+   dst = SET_DEST (x);
+   if (GET_CODE (dst) == ZERO_EXTRACT)
+     {
+       need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
+       need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
+     }
+   return need_barrier;
+ }
+ 
+ /* Handle an access to rtx X of type FLAGS using predicate register
+    PRED.  Return 1 if this access creates a dependency with an earlier
+    instruction in the same group.  */
+ 
+ static int
+ rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
+ {
+   int i, j;
+   int is_complemented = 0;
+   int need_barrier = 0;
+   const char *format_ptr;
+   struct reg_flags new_flags;
+   rtx cond;
+ 
+   if (! x)
+     return 0;
+ 
+   new_flags = flags;
+ 
+   switch (GET_CODE (x))
+     {
+     case SET:
+       update_set_flags (x, &new_flags);
+       need_barrier = set_src_needs_barrier (x, new_flags, pred);
+       if (GET_CODE (SET_SRC (x)) != CALL)
+ 	{
+ 	  new_flags.is_write = 1;
+ 	  need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
+ 	}
+       break;
+ 
+     case CALL:
+       new_flags.is_write = 0;
+       need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
+ 
+       /* Avoid multiple register writes, in case this is a pattern with
+ 	 multiple CALL rtx.  This avoids a failure in rws_access_reg.  */
+       if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
+ 	{
+ 	  new_flags.is_write = 1;
+ 	  need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
+ 	  need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
+ 	  need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
+ 	}
+       break;
+ 
+     case COND_EXEC:
+       /* X is a predicated instruction.  */
+ 
+       cond = COND_EXEC_TEST (x);
+       gcc_assert (!pred);
+       need_barrier = rtx_needs_barrier (cond, flags, 0);
+ 
+       if (GET_CODE (cond) == EQ)
+ 	is_complemented = 1;
+       cond = XEXP (cond, 0);
+       gcc_assert (GET_CODE (cond) == REG
+ 		  && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
+       pred = REGNO (cond);
+       if (is_complemented)
+ 	++pred;
+ 
+       need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
+       return need_barrier;
+ 
+     case CLOBBER:
+     case USE:
+       /* Clobber & use are for earlier compiler-phases only.  */
+       break;
+ 
+     case ASM_OPERANDS:
+     case ASM_INPUT:
+       /* We always emit stop bits for traditional asms.  We emit stop bits
+ 	 for volatile extended asms if TARGET_VOL_ASM_STOP is true.  */
+       if (GET_CODE (x) != ASM_OPERANDS
+ 	  || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
+ 	{
+ 	  /* Avoid writing the register multiple times if we have multiple
+ 	     asm outputs.  This avoids a failure in rws_access_reg.  */
+ 	  if (! rws_insn_test (REG_VOLATILE))
+ 	    {
+ 	      new_flags.is_write = 1;
+ 	      rws_access_regno (REG_VOLATILE, new_flags, pred);
+ 	    }
+ 	  return 1;
+ 	}
+ 
+       /* For all ASM_OPERANDS, we must traverse the vector of input operands.
+ 	 We cannot just fall through here since then we would be confused
+ 	 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
+ 	 traditional asms unlike their normal usage.  */
+ 
+       for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
+ 	if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
+ 	  need_barrier = 1;
+       break;
+ 
+     case PARALLEL:
+       for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
+ 	{
+ 	  rtx pat = XVECEXP (x, 0, i);
+ 	  switch (GET_CODE (pat))
+ 	    {
+ 	    case SET:
+ 	      update_set_flags (pat, &new_flags);
+ 	      need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
+ 	      break;
+ 
+ 	    case USE:
+ 	    case CALL:
+ 	    case ASM_OPERANDS:
+ 	    case ASM_INPUT:
+ 	      need_barrier |= rtx_needs_barrier (pat, flags, pred);
+ 	      break;
+ 
+ 	    case CLOBBER:
+ 	      if (REG_P (XEXP (pat, 0))
+ 		  && extract_asm_operands (x) != NULL_RTX
+ 		  && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM)
+ 		{
+ 		  new_flags.is_write = 1;
+ 		  need_barrier |= rtx_needs_barrier (XEXP (pat, 0),
+ 						     new_flags, pred);
+ 		  new_flags = flags;
+ 		}
+ 	      break;
+ 
+ 	    case RETURN:
+ 	      break;
+ 
+ 	    default:
+ 	      gcc_unreachable ();
+ 	    }
+ 	}
+       for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
+ 	{
+ 	  rtx pat = XVECEXP (x, 0, i);
+ 	  if (GET_CODE (pat) == SET)
+ 	    {
+ 	      if (GET_CODE (SET_SRC (pat)) != CALL)
+ 		{
+ 		  new_flags.is_write = 1;
+ 		  need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
+ 						     pred);
+ 		}
+ 	    }
+ 	  else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
+ 	    need_barrier |= rtx_needs_barrier (pat, flags, pred);
+ 	}
+       break;
+ 
+     case SUBREG:
+       need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
+       break;
+     case REG:
+       if (REGNO (x) == AR_UNAT_REGNUM)
+ 	{
+ 	  for (i = 0; i < 64; ++i)
+ 	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
+ 	}
+       else
+ 	need_barrier = rws_access_reg (x, flags, pred);
+       break;
+ 
+     case MEM:
+       /* Find the regs used in memory address computation.  */
+       new_flags.is_write = 0;
+       need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
+       break;
+ 
+     case CONST_INT:   case CONST_DOUBLE:  case CONST_VECTOR:
+     case SYMBOL_REF:  case LABEL_REF:     case CONST:
+       break;
+ 
+       /* Operators with side-effects.  */
+     case POST_INC:    case POST_DEC:
+       gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
+ 
+       new_flags.is_write = 0;
+       need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
+       new_flags.is_write = 1;
+       need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
+       break;
+ 
+     case POST_MODIFY:
+       gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
+ 
+       new_flags.is_write = 0;
+       need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
+       need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
+       new_flags.is_write = 1;
+       need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
+       break;
+ 
+       /* Handle common unary and binary ops for efficiency.  */
+     case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
+     case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
+     case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
+     case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
+     case NE:       case EQ:      case GE:      case GT:        case LE:
+     case LT:       case GEU:     case GTU:     case LEU:       case LTU:
+       need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
+       need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
+       break;
+ 
+     case NEG:      case NOT:	        case SIGN_EXTEND:     case ZERO_EXTEND:
+     case TRUNCATE: case FLOAT_EXTEND:   case FLOAT_TRUNCATE:  case FLOAT:
+     case FIX:      case UNSIGNED_FLOAT: case UNSIGNED_FIX:    case ABS:
+     case SQRT:     case FFS:		case POPCOUNT:
+       need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
+       break;
+ 
+     case VEC_SELECT:
+       /* VEC_SELECT's second argument is a PARALLEL with integers that
+ 	 describe the elements selected.  On ia64, those integers are
+ 	 always constants.  Avoid walking the PARALLEL so that we don't
+ 	 get confused with "normal" parallels and then die.  */
+       need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
+       break;
+ 
+     case UNSPEC:
+       switch (XINT (x, 1))
+ 	{
+ 	case UNSPEC_LTOFF_DTPMOD:
+ 	case UNSPEC_LTOFF_DTPREL:
+ 	case UNSPEC_DTPREL:
+ 	case UNSPEC_LTOFF_TPREL:
+ 	case UNSPEC_TPREL:
+ 	case UNSPEC_PRED_REL_MUTEX:
+ 	case UNSPEC_PIC_CALL:
+         case UNSPEC_MF:
+         case UNSPEC_FETCHADD_ACQ:
+         case UNSPEC_FETCHADD_REL:
+ 	case UNSPEC_BSP_VALUE:
+ 	case UNSPEC_FLUSHRS:
+ 	case UNSPEC_BUNDLE_SELECTOR:
+           break;
+ 
+ 	case UNSPEC_GR_SPILL:
+ 	case UNSPEC_GR_RESTORE:
+ 	  {
+ 	    HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
+ 	    HOST_WIDE_INT bit = (offset >> 3) & 63;
+ 
+ 	    need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
+ 	    new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
+ 	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
+ 					      new_flags, pred);
+ 	    break;
+ 	  }
+ 
+ 	case UNSPEC_FR_SPILL:
+ 	case UNSPEC_FR_RESTORE:
+ 	case UNSPEC_GETF_EXP:
+ 	case UNSPEC_SETF_EXP:
+         case UNSPEC_ADDP4:
+ 	case UNSPEC_FR_SQRT_RECIP_APPROX:
+ 	case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
+ 	case UNSPEC_LDA:
+ 	case UNSPEC_LDS:
+ 	case UNSPEC_LDS_A:
+ 	case UNSPEC_LDSA:
+ 	case UNSPEC_CHKACLR:
+         case UNSPEC_CHKS:
+ 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
+ 	  break;
+ 
+ 	case UNSPEC_FR_RECIP_APPROX:
+ 	case UNSPEC_SHRP:
+ 	case UNSPEC_COPYSIGN:
+ 	case UNSPEC_FR_RECIP_APPROX_RES:
+ 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
+ 	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
+ 	  break;
+ 
+         case UNSPEC_CMPXCHG_ACQ:
+         case UNSPEC_CMPXCHG_REL:
+ 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
+ 	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
+ 	  break;
+ 
+ 	default:
+ 	  gcc_unreachable ();
+ 	}
+       break;
+ 
+     case UNSPEC_VOLATILE:
+       switch (XINT (x, 1))
+ 	{
+ 	case UNSPECV_ALLOC:
+ 	  /* Alloc must always be the first instruction of a group.
+ 	     We force this by always returning true.  */
+ 	  /* ??? We might get better scheduling if we explicitly check for
+ 	     input/local/output register dependencies, and modify the
+ 	     scheduler so that alloc is always reordered to the start of
+ 	     the current group.  We could then eliminate all of the
+ 	     first_instruction code.  */
+ 	  rws_access_regno (AR_PFS_REGNUM, flags, pred);
+ 
+ 	  new_flags.is_write = 1;
+ 	  rws_access_regno (REG_AR_CFM, new_flags, pred);
+ 	  return 1;
+ 
+ 	case UNSPECV_SET_BSP:
+ 	case UNSPECV_PROBE_STACK_RANGE:
+ 	  need_barrier = 1;
+           break;
+ 
+ 	case UNSPECV_BLOCKAGE:
+ 	case UNSPECV_INSN_GROUP_BARRIER:
+ 	case UNSPECV_BREAK:
+ 	case UNSPECV_PSAC_ALL:
+ 	case UNSPECV_PSAC_NORMAL:
+ 	  return 0;
+ 
+ 	case UNSPECV_PROBE_STACK_ADDRESS:
+ 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
+ 	  break;
+ 
+ 	default:
+ 	  gcc_unreachable ();
+ 	}
+       break;
+ 
+     case RETURN:
+       new_flags.is_write = 0;
+       need_barrier  = rws_access_regno (REG_RP, flags, pred);
+       need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
+ 
+       new_flags.is_write = 1;
+       need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
+       need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
+       break;
+ 
+     default:
+       format_ptr = GET_RTX_FORMAT (GET_CODE (x));
+       for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+ 	switch (format_ptr[i])
+ 	  {
+ 	  case '0':	/* unused field */
+ 	  case 'i':	/* integer */
+ 	  case 'n':	/* note */
+ 	  case 'w':	/* wide integer */
+ 	  case 's':	/* pointer to string */
+ 	  case 'S':	/* optional pointer to string */
+ 	    break;
+ 
+ 	  case 'e':
+ 	    if (rtx_needs_barrier (XEXP (x, i), flags, pred))
+ 	      need_barrier = 1;
+ 	    break;
+ 
+ 	  case 'E':
+ 	    for (j = XVECLEN (x, i) - 1; j >= 0; --j)
+ 	      if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
+ 		need_barrier = 1;
+ 	    break;
+ 
+ 	  default:
+ 	    gcc_unreachable ();
+ 	  }
+       break;
+     }
+   return need_barrier;
+ }
+ 
+ /* Clear out the state for group_barrier_needed at the start of a
+    sequence of insns.  */
+ 
+ static void
+ init_insn_group_barriers (void)
+ {
+   memset (rws_sum, 0, sizeof (rws_sum));
+   first_instruction = 1;
+ }
+ 
+ /* Given the current state, determine whether a group barrier (a stop bit) is
+    necessary before INSN.  Return nonzero if so.  This modifies the state to
+    include the effects of INSN as a side-effect.  */
+ 
+ static int
+ group_barrier_needed (rtx_insn *insn)
+ {
+   rtx pat;
+   int need_barrier = 0;
+   struct reg_flags flags;
+ 
+   memset (&flags, 0, sizeof (flags));
+   switch (GET_CODE (insn))
+     {
+     case NOTE:
+     case DEBUG_INSN:
+       break;
+ 
+     case BARRIER:
+       /* A barrier doesn't imply an instruction group boundary.  */
+       break;
+ 
+     case CODE_LABEL:
+       memset (rws_insn, 0, sizeof (rws_insn));
+       return 1;
+ 
+     case CALL_INSN:
+       flags.is_branch = 1;
+       flags.is_sibcall = SIBLING_CALL_P (insn);
+       memset (rws_insn, 0, sizeof (rws_insn));
+ 
+       /* Don't bundle a call following another call.  */
+       if ((pat = prev_active_insn (insn)) && CALL_P (pat))
+ 	{
+ 	  need_barrier = 1;
+ 	  break;
+ 	}
+ 
+       need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
+       break;
+ 
+     case JUMP_INSN:
+       if (!ia64_spec_check_p (insn))
+ 	flags.is_branch = 1;
+ 
+       /* Don't bundle a jump following a call.  */
+       if ((pat = prev_active_insn (insn)) && CALL_P (pat))
+ 	{
+ 	  need_barrier = 1;
+ 	  break;
+ 	}
+       /* FALLTHRU */
+ 
+     case INSN:
+       if (GET_CODE (PATTERN (insn)) == USE
+ 	  || GET_CODE (PATTERN (insn)) == CLOBBER)
+ 	/* Don't care about USE and CLOBBER "insns"---those are used to
+ 	   indicate to the optimizer that it shouldn't get rid of
+ 	   certain operations.  */
+ 	break;
+ 
+       pat = PATTERN (insn);
+ 
+       /* Ug.  Hack hacks hacked elsewhere.  */
+       switch (recog_memoized (insn))
+ 	{
+ 	  /* We play dependency tricks with the epilogue in order
+ 	     to get proper schedules.  Undo this for dv analysis.  */
+ 	case CODE_FOR_epilogue_deallocate_stack:
+ 	case CODE_FOR_prologue_allocate_stack:
+ 	  pat = XVECEXP (pat, 0, 0);
+ 	  break;
+ 
+ 	  /* The pattern we use for br.cloop confuses the code above.
+ 	     The second element of the vector is representative.  */
+ 	case CODE_FOR_doloop_end_internal:
+ 	  pat = XVECEXP (pat, 0, 1);
+ 	  break;
+ 
+ 	  /* Doesn't generate code.  */
+ 	case CODE_FOR_pred_rel_mutex:
+ 	case CODE_FOR_prologue_use:
+ 	  return 0;
+ 
+ 	default:
+ 	  break;
+ 	}
+ 
+       memset (rws_insn, 0, sizeof (rws_insn));
+       need_barrier = rtx_needs_barrier (pat, flags, 0);
+ 
+       /* Check to see if the previous instruction was a volatile
+ 	 asm.  */
+       if (! need_barrier)
+ 	need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
+ 
+       break;
+ 
+     default:
+       gcc_unreachable ();
+     }
+ 
+   if (first_instruction && important_for_bundling_p (insn))
+     {
+       need_barrier = 0;
+       first_instruction = 0;
+     }
+ 
+   return need_barrier;
+ }
+ 
+ /* Like group_barrier_needed, but do not clobber the current state.  */
+ 
+ static int
+ safe_group_barrier_needed (rtx_insn *insn)
+ {
+   int saved_first_instruction;
+   int t;
+ 
+   saved_first_instruction = first_instruction;
+   in_safe_group_barrier = 1;
+ 
+   t = group_barrier_needed (insn);
+ 
+   first_instruction = saved_first_instruction;
+   in_safe_group_barrier = 0;
+ 
+   return t;
+ }
+ 
+ /* Scan the current function and insert stop bits as necessary to
+    eliminate dependencies.  This function assumes that a final
+    instruction scheduling pass has been run which has already
+    inserted most of the necessary stop bits.  This function only
+    inserts new ones at basic block boundaries, since these are
+    invisible to the scheduler.  */
+ 
+ static void
+ emit_insn_group_barriers (FILE *dump)
+ {
+   rtx_insn *insn;
+   rtx_insn *last_label = 0;
+   int insns_since_last_label = 0;
+ 
+   init_insn_group_barriers ();
+ 
+   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+     {
+       if (LABEL_P (insn))
+ 	{
+ 	  if (insns_since_last_label)
+ 	    last_label = insn;
+ 	  insns_since_last_label = 0;
+ 	}
+       else if (NOTE_P (insn)
+ 	       && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
+ 	{
+ 	  if (insns_since_last_label)
+ 	    last_label = insn;
+ 	  insns_since_last_label = 0;
+ 	}
+       else if (NONJUMP_INSN_P (insn)
+ 	       && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
+ 	       && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
+ 	{
+ 	  init_insn_group_barriers ();
+ 	  last_label = 0;
+ 	}
+       else if (NONDEBUG_INSN_P (insn))
+ 	{
+ 	  insns_since_last_label = 1;
+ 
+ 	  if (group_barrier_needed (insn))
+ 	    {
+ 	      if (last_label)
+ 		{
+ 		  if (dump)
+ 		    fprintf (dump, "Emitting stop before label %d\n",
+ 			     INSN_UID (last_label));
+ 		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
+ 		  insn = last_label;
+ 
+ 		  init_insn_group_barriers ();
+ 		  last_label = 0;
+ 		}
+ 	    }
+ 	}
+     }
+ }
+ 
+ /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
+    This function has to emit all necessary group barriers.  */
+ 
+ static void
+ emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
+ {
+   rtx_insn *insn;
+ 
+   init_insn_group_barriers ();
+ 
+   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+     {
+       if (BARRIER_P (insn))
+ 	{
+ 	  rtx_insn *last = prev_active_insn (insn);
+ 
+ 	  if (! last)
+ 	    continue;
+ 	  if (JUMP_TABLE_DATA_P (last))
+ 	    last = prev_active_insn (last);
+ 	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
+ 	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
+ 
+ 	  init_insn_group_barriers ();
+ 	}
+       else if (NONDEBUG_INSN_P (insn))
+ 	{
+ 	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
+ 	    init_insn_group_barriers ();
+ 	  else if (group_barrier_needed (insn))
+ 	    {
+ 	      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
+ 	      init_insn_group_barriers ();
+ 	      group_barrier_needed (insn);
+ 	    }
+ 	}
+     }
+ }
+ 
+ 
+ 
+ /* Instruction scheduling support.  */
+ 
+ #define NR_BUNDLES 10
+ 
+ /* A list of names of all available bundles.  */
+ 
+ static const char *bundle_name [NR_BUNDLES] =
+ {
+   ".mii",
+   ".mmi",
+   ".mfi",
+   ".mmf",
+ #if NR_BUNDLES == 10
+   ".bbb",
+   ".mbb",
+ #endif
+   ".mib",
+   ".mmb",
+   ".mfb",
+   ".mlx"
+ };
+ 
+ /* Nonzero if we should insert stop bits into the schedule.  */
+ 
+ int ia64_final_schedule = 0;
+ 
+ /* Codes of the corresponding queried units: */
+ 
+ static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
+ static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
+ 
+ static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
+ static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
+ 
+ static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
+ 
+ /* The following variable value is an insn group barrier.  */
+ 
+ static rtx_insn *dfa_stop_insn;
+ 
+ /* The following variable value is the last issued insn.  */
+ 
+ static rtx_insn *last_scheduled_insn;
+ 
+ /* The following variable value is pointer to a DFA state used as
+    temporary variable.  */
+ 
+ static state_t temp_dfa_state = NULL;
+ 
+ /* The following variable value is DFA state after issuing the last
+    insn.  */
+ 
+ static state_t prev_cycle_state = NULL;
+ 
+ /* The following array element values are TRUE if the corresponding
+    insn requires to add stop bits before it.  */
+ 
+ static char *stops_p = NULL;
+ 
+ /* The following variable is used to set up the mentioned above array.  */
+ 
+ static int stop_before_p = 0;
+ 
+ /* The following variable value is length of the arrays `clocks' and
+    `add_cycles'. */
+ 
+ static int clocks_length;
+ 
+ /* The following variable value is number of data speculations in progress.  */
+ static int pending_data_specs = 0;
+ 
+ /* Number of memory references on current and three future processor cycles.  */
+ static char mem_ops_in_group[4];
+ 
+ /* Number of current processor cycle (from scheduler's point of view).  */
+ static int current_cycle;
+ 
+ static rtx ia64_single_set (rtx_insn *);
+ static void ia64_emit_insn_before (rtx, rtx_insn *);
+ 
+ /* Map a bundle number to its pseudo-op.  */
+ 
+ const char *
+ get_bundle_name (int b)
+ {
+   return bundle_name[b];
+ }
+ 
+ 
+ /* Return the maximum number of instructions a cpu can issue.  */
+ 
+ static int
+ ia64_issue_rate (void)
+ {
+   return 6;
+ }
+ 
+ /* Helper function - like single_set, but look inside COND_EXEC.  */
+ 
+ static rtx
+ ia64_single_set (rtx_insn *insn)
+ {
+   rtx x = PATTERN (insn), ret;
+   if (GET_CODE (x) == COND_EXEC)
+     x = COND_EXEC_CODE (x);
+   if (GET_CODE (x) == SET)
+     return x;
+ 
+   /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
+      Although they are not classical single set, the second set is there just
+      to protect it from moving past FP-relative stack accesses.  */
+   switch (recog_memoized (insn))
+     {
+     case CODE_FOR_prologue_allocate_stack:
+     case CODE_FOR_prologue_allocate_stack_pr:
+     case CODE_FOR_epilogue_deallocate_stack:
+     case CODE_FOR_epilogue_deallocate_stack_pr:
+       ret = XVECEXP (x, 0, 0);
+       break;
+ 
+     default:
+       ret = single_set_2 (insn, x);
+       break;
+     }
+ 
+   return ret;
+ }
+ 
+ /* Adjust the cost of a scheduling dependency.
+    Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
+    COST is the current cost, DW is dependency weakness.  */
+ static int
+ ia64_adjust_cost (rtx_insn *insn, int dep_type1, rtx_insn *dep_insn,
+ 		  int cost, dw_t dw)
+ {
+   enum reg_note dep_type = (enum reg_note) dep_type1;
+   enum attr_itanium_class dep_class;
+   enum attr_itanium_class insn_class;
+ 
+   insn_class = ia64_safe_itanium_class (insn);
+   dep_class = ia64_safe_itanium_class (dep_insn);
+ 
+   /* Treat true memory dependencies separately.  Ignore apparent true
+      dependence between store and call (call has a MEM inside a SYMBOL_REF).  */
+   if (dep_type == REG_DEP_TRUE
+       && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
+       && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
+     return 0;
+ 
+   if (dw == MIN_DEP_WEAK)
+     /* Store and load are likely to alias, use higher cost to avoid stall.  */
+     return param_sched_mem_true_dep_cost;
+   else if (dw > MIN_DEP_WEAK)
+     {
+       /* Store and load are less likely to alias.  */
+       if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
+ 	/* Assume there will be no cache conflict for floating-point data.
+ 	   For integer data, L1 conflict penalty is huge (17 cycles), so we
+ 	   never assume it will not cause a conflict.  */
+ 	return 0;
+       else
+ 	return cost;
+     }
+ 
+   if (dep_type != REG_DEP_OUTPUT)
+     return cost;
+ 
+   if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
+       || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
+     return 0;
+ 
+   return cost;
+ }
+ 
+ /* Like emit_insn_before, but skip cycle_display notes.
+    ??? When cycle display notes are implemented, update this.  */
+ 
+ static void
+ ia64_emit_insn_before (rtx insn, rtx_insn *before)
+ {
+   emit_insn_before (insn, before);
+ }
+ 
+ /* The following function marks insns who produce addresses for load
+    and store insns.  Such insns will be placed into M slots because it
+    decrease latency time for Itanium1 (see function
+    `ia64_produce_address_p' and the DFA descriptions).  */
+ 
+ static void
+ ia64_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
+ {
+   rtx_insn *insn, *next, *next_tail;
+ 
+   /* Before reload, which_alternative is not set, which means that
+      ia64_safe_itanium_class will produce wrong results for (at least)
+      move instructions.  */
+   if (!reload_completed)
+     return;
+ 
+   next_tail = NEXT_INSN (tail);
+   for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
+     if (INSN_P (insn))
+       insn->call = 0;
+   for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
+     if (INSN_P (insn)
+ 	&& ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
+       {
+ 	sd_iterator_def sd_it;
+ 	dep_t dep;
+ 	bool has_mem_op_consumer_p = false;
+ 
+ 	FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
+ 	  {
+ 	    enum attr_itanium_class c;
+ 
+ 	    if (DEP_TYPE (dep) != REG_DEP_TRUE)
+ 	      continue;
+ 
+ 	    next = DEP_CON (dep);
+ 	    c = ia64_safe_itanium_class (next);
+ 	    if ((c == ITANIUM_CLASS_ST
+ 		 || c == ITANIUM_CLASS_STF)
+ 		&& ia64_st_address_bypass_p (insn, next))
+ 	      {
+ 		has_mem_op_consumer_p = true;
+ 		break;
+ 	      }
+ 	    else if ((c == ITANIUM_CLASS_LD
+ 		      || c == ITANIUM_CLASS_FLD
+ 		      || c == ITANIUM_CLASS_FLDP)
+ 		     && ia64_ld_address_bypass_p (insn, next))
+ 	      {
+ 		has_mem_op_consumer_p = true;
+ 		break;
+ 	      }
+ 	  }
+ 
+ 	insn->call = has_mem_op_consumer_p;
+       }
+ }
+ 
+ /* We're beginning a new block.  Initialize data structures as necessary.  */
+ 
+ static void
+ ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
+ 		 int sched_verbose ATTRIBUTE_UNUSED,
+ 		 int max_ready ATTRIBUTE_UNUSED)
+ {
+   if (flag_checking && !sel_sched_p () && reload_completed)
+     {
+       for (rtx_insn *insn = NEXT_INSN (current_sched_info->prev_head);
+ 	   insn != current_sched_info->next_tail;
+ 	   insn = NEXT_INSN (insn))
+ 	gcc_assert (!SCHED_GROUP_P (insn));
+     }
+   last_scheduled_insn = NULL;
+   init_insn_group_barriers ();
+ 
+   current_cycle = 0;
+   memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
+ }
+ 
+ /* We're beginning a scheduling pass.  Check assertion.  */
+ 
+ static void
+ ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
+                         int sched_verbose ATTRIBUTE_UNUSED,
+                         int max_ready ATTRIBUTE_UNUSED)
+ {  
+   gcc_assert (pending_data_specs == 0);
+ }
+ 
+ /* Scheduling pass is now finished.  Free/reset static variable.  */
+ static void
+ ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
+ 			  int sched_verbose ATTRIBUTE_UNUSED)
+ {
+   gcc_assert (pending_data_specs == 0);
+ }
+ 
+ /* Return TRUE if INSN is a load (either normal or speculative, but not a
+    speculation check), FALSE otherwise.  */
+ static bool
+ is_load_p (rtx_insn *insn)
+ {
+   enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
+ 
+   return
+    ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
+     && get_attr_check_load (insn) == CHECK_LOAD_NO);
+ }
+ 
+ /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
+    (taking account for 3-cycle cache reference postponing for stores: Intel
+    Itanium 2 Reference Manual for Software Development and Optimization,
+    6.7.3.1).  */
+ static void
+ record_memory_reference (rtx_insn *insn)
+ {
+   enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
+ 
+   switch (insn_class) {
+     case ITANIUM_CLASS_FLD:
+     case ITANIUM_CLASS_LD:
+       mem_ops_in_group[current_cycle % 4]++;
+       break;
+     case ITANIUM_CLASS_STF:
+     case ITANIUM_CLASS_ST:
+       mem_ops_in_group[(current_cycle + 3) % 4]++;
+       break;
+     default:;
+   }
+ }
+ 
+ /* We are about to being issuing insns for this clock cycle.
+    Override the default sort algorithm to better slot instructions.  */
+ 
+ static int
+ ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
+ 			int *pn_ready, int clock_var,
+ 			int reorder_type)
+ {
+   int n_asms;
+   int n_ready = *pn_ready;
+   rtx_insn **e_ready = ready + n_ready;
+   rtx_insn **insnp;
+ 
+   if (sched_verbose)
+     fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
+ 
+   if (reorder_type == 0)
+     {
+       /* First, move all USEs, CLOBBERs and other crud out of the way.  */
+       n_asms = 0;
+       for (insnp = ready; insnp < e_ready; insnp++)
+ 	if (insnp < e_ready)
+ 	  {
+ 	    rtx_insn *insn = *insnp;
+ 	    enum attr_type t = ia64_safe_type (insn);
+ 	    if (t == TYPE_UNKNOWN)
+ 	      {
+ 		if (GET_CODE (PATTERN (insn)) == ASM_INPUT
+ 		    || asm_noperands (PATTERN (insn)) >= 0)
+ 		  {
+ 		    rtx_insn *lowest = ready[n_asms];
+ 		    ready[n_asms] = insn;
+ 		    *insnp = lowest;
+ 		    n_asms++;
+ 		  }
+ 		else
+ 		  {
+ 		    rtx_insn *highest = ready[n_ready - 1];
+ 		    ready[n_ready - 1] = insn;
+ 		    *insnp = highest;
+ 		    return 1;
+ 		  }
+ 	      }
+ 	  }
+ 
+       if (n_asms < n_ready)
+ 	{
+ 	  /* Some normal insns to process.  Skip the asms.  */
+ 	  ready += n_asms;
+ 	  n_ready -= n_asms;
+ 	}
+       else if (n_ready > 0)
+ 	return 1;
+     }
+ 
+   if (ia64_final_schedule)
+     {
+       int deleted = 0;
+       int nr_need_stop = 0;
+ 
+       for (insnp = ready; insnp < e_ready; insnp++)
+ 	if (safe_group_barrier_needed (*insnp))
+ 	  nr_need_stop++;
+ 
+       if (reorder_type == 1 && n_ready == nr_need_stop)
+ 	return 0;
+       if (reorder_type == 0)
+ 	return 1;
+       insnp = e_ready;
+       /* Move down everything that needs a stop bit, preserving
+ 	 relative order.  */
+       while (insnp-- > ready + deleted)
+ 	while (insnp >= ready + deleted)
+ 	  {
+ 	    rtx_insn *insn = *insnp;
+ 	    if (! safe_group_barrier_needed (insn))
+ 	      break;
+ 	    memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
+ 	    *ready = insn;
+ 	    deleted++;
+ 	  }
+       n_ready -= deleted;
+       ready += deleted;
+     }
+ 
+   current_cycle = clock_var;
+   if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
+     {
+       int moved = 0;
+ 
+       insnp = e_ready;
+       /* Move down loads/stores, preserving relative order.  */
+       while (insnp-- > ready + moved)
+ 	while (insnp >= ready + moved)
+ 	  {
+ 	    rtx_insn *insn = *insnp;
+ 	    if (! is_load_p (insn))
+ 	      break;
+ 	    memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
+ 	    *ready = insn;
+ 	    moved++;
+ 	  }
+       n_ready -= moved;
+       ready += moved;
+     }
+ 
+   return 1;
+ }
+ 
+ /* We are about to being issuing insns for this clock cycle.  Override
+    the default sort algorithm to better slot instructions.  */
+ 
+ static int
+ ia64_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
+ 		    int *pn_ready, int clock_var)
+ {
+   return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
+ 				 pn_ready, clock_var, 0);
+ }
+ 
+ /* Like ia64_sched_reorder, but called after issuing each insn.
+    Override the default sort algorithm to better slot instructions.  */
+ 
+ static int
+ ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
+ 		     int sched_verbose ATTRIBUTE_UNUSED, rtx_insn **ready,
+ 		     int *pn_ready, int clock_var)
+ {
+   return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
+ 				 clock_var, 1);
+ }
+ 
+ /* We are about to issue INSN.  Return the number of insns left on the
+    ready queue that can be issued this cycle.  */
+ 
+ static int
+ ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
+ 		     int sched_verbose ATTRIBUTE_UNUSED,
+ 		     rtx_insn *insn,
+ 		     int can_issue_more ATTRIBUTE_UNUSED)
+ {
+   if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
+     /* Modulo scheduling does not extend h_i_d when emitting
+        new instructions.  Don't use h_i_d, if we don't have to.  */
+     {
+       if (DONE_SPEC (insn) & BEGIN_DATA)
+ 	pending_data_specs++;
+       if (CHECK_SPEC (insn) & BEGIN_DATA)
+ 	pending_data_specs--;
+     }
+ 
+   if (DEBUG_INSN_P (insn))
+     return 1;
+ 
+   last_scheduled_insn = insn;
+   memcpy (prev_cycle_state, curr_state, dfa_state_size);
+   if (reload_completed)
+     {
+       int needed = group_barrier_needed (insn);
+       
+       gcc_assert (!needed);
+       if (CALL_P (insn))
+ 	init_insn_group_barriers ();
+       stops_p [INSN_UID (insn)] = stop_before_p;
+       stop_before_p = 0;
+ 
+       record_memory_reference (insn);
+     }
+   return 1;
+ }
+ 
+ /* We are choosing insn from the ready queue.  Return zero if INSN
+    can be chosen.  */
+ 
+ static int
+ ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
+ {
+   gcc_assert (insn && INSN_P (insn));
+ 
+   /* Size of ALAT is 32.  As far as we perform conservative
+      data speculation, we keep ALAT half-empty.  */
+   if (pending_data_specs >= 16 && (TODO_SPEC (insn) & BEGIN_DATA))
+     return ready_index == 0 ? -1 : 1;
+ 
+   if (ready_index == 0)
+     return 0;
+ 
+   if ((!reload_completed
+        || !safe_group_barrier_needed (insn))
+       && (!mflag_sched_mem_insns_hard_limit
+ 	  || !is_load_p (insn)
+ 	  || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns))
+     return 0;
+ 
+   return 1;
+ }
+ 
+ /* The following variable value is pseudo-insn used by the DFA insn
+    scheduler to change the DFA state when the simulated clock is
+    increased.  */
+ 
+ static rtx_insn *dfa_pre_cycle_insn;
+ 
+ /* Returns 1 when a meaningful insn was scheduled between the last group
+    barrier and LAST.  */
+ static int
+ scheduled_good_insn (rtx_insn *last)
+ {
+   if (last && recog_memoized (last) >= 0)
+     return 1;
+ 
+   for ( ;
+        last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
+        && !stops_p[INSN_UID (last)];
+        last = PREV_INSN (last))
+     /* We could hit a NOTE_INSN_DELETED here which is actually outside
+        the ebb we're scheduling.  */
+     if (INSN_P (last) && recog_memoized (last) >= 0)
+       return 1;
+ 
+   return 0;
+ }
+ 
+ /* We are about to being issuing INSN.  Return nonzero if we cannot
+    issue it on given cycle CLOCK and return zero if we should not sort
+    the ready queue on the next clock start.  */
+ 
+ static int
+ ia64_dfa_new_cycle (FILE *dump, int verbose, rtx_insn *insn, int last_clock,
+ 		    int clock, int *sort_p)
+ {
+   gcc_assert (insn && INSN_P (insn));
+ 
+   if (DEBUG_INSN_P (insn))
+     return 0;
+ 
+   /* When a group barrier is needed for insn, last_scheduled_insn
+      should be set.  */
+   gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
+               || last_scheduled_insn);
+ 
+   if ((reload_completed
+        && (safe_group_barrier_needed (insn)
+ 	   || (mflag_sched_stop_bits_after_every_cycle
+ 	       && last_clock != clock
+ 	       && last_scheduled_insn
+ 	       && scheduled_good_insn (last_scheduled_insn))))
+       || (last_scheduled_insn
+ 	  && (CALL_P (last_scheduled_insn)
+ 	      || unknown_for_bundling_p (last_scheduled_insn))))
+     {
+       init_insn_group_barriers ();
+ 
+       if (verbose && dump)
+ 	fprintf (dump, "//    Stop should be before %d%s\n", INSN_UID (insn),
+ 		 last_clock == clock ? " + cycle advance" : "");
+ 
+       stop_before_p = 1;
+       current_cycle = clock;
+       mem_ops_in_group[current_cycle % 4] = 0;
+ 
+       if (last_clock == clock)
+ 	{
+ 	  state_transition (curr_state, dfa_stop_insn);
+ 	  if (TARGET_EARLY_STOP_BITS)
+ 	    *sort_p = (last_scheduled_insn == NULL_RTX
+ 		       || ! CALL_P (last_scheduled_insn));
+ 	  else
+ 	    *sort_p = 0;
+ 	  return 1;
+ 	}
+ 
+       if (last_scheduled_insn)
+ 	{
+ 	  if (unknown_for_bundling_p (last_scheduled_insn))
+ 	    state_reset (curr_state);
+ 	  else
+ 	    {
+ 	      memcpy (curr_state, prev_cycle_state, dfa_state_size);
+ 	      state_transition (curr_state, dfa_stop_insn);
+ 	      state_transition (curr_state, dfa_pre_cycle_insn);
+ 	      state_transition (curr_state, NULL);
+ 	    }
+ 	}
+     }
+   return 0;
+ }
+ 
+ /* Implement targetm.sched.h_i_d_extended hook.
+    Extend internal data structures.  */
+ static void
+ ia64_h_i_d_extended (void)
+ {
+   if (stops_p != NULL) 
+     {
+       int new_clocks_length = get_max_uid () * 3 / 2;
+       stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
+       clocks_length = new_clocks_length;
+     }
+ }
+ 
+ 
+ /* This structure describes the data used by the backend to guide scheduling.
+    When the current scheduling point is switched, this data should be saved
+    and restored later, if the scheduler returns to this point.  */
+ struct _ia64_sched_context
+ {
+   state_t prev_cycle_state;
+   rtx_insn *last_scheduled_insn;
+   struct reg_write_state rws_sum[NUM_REGS];
+   struct reg_write_state rws_insn[NUM_REGS];
+   int first_instruction;
+   int pending_data_specs;
+   int current_cycle;
+   char mem_ops_in_group[4];
+ };
+ typedef struct _ia64_sched_context *ia64_sched_context_t;
+ 
+ /* Allocates a scheduling context.  */
+ static void *
+ ia64_alloc_sched_context (void)
+ {
+   return xmalloc (sizeof (struct _ia64_sched_context));
+ }
+ 
+ /* Initializes the _SC context with clean data, if CLEAN_P, and from
+    the global context otherwise.  */
+ static void
+ ia64_init_sched_context (void *_sc, bool clean_p)
+ {
+   ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
+ 
+   sc->prev_cycle_state = xmalloc (dfa_state_size);
+   if (clean_p)
+     {
+       state_reset (sc->prev_cycle_state);
+       sc->last_scheduled_insn = NULL;
+       memset (sc->rws_sum, 0, sizeof (rws_sum));
+       memset (sc->rws_insn, 0, sizeof (rws_insn));
+       sc->first_instruction = 1;
+       sc->pending_data_specs = 0;
+       sc->current_cycle = 0;
+       memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
+     }
+   else
+     {
+       memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
+       sc->last_scheduled_insn = last_scheduled_insn;
+       memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
+       memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
+       sc->first_instruction = first_instruction;
+       sc->pending_data_specs = pending_data_specs;
+       sc->current_cycle = current_cycle;
+       memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
+     }
+ }
+ 
+ /* Sets the global scheduling context to the one pointed to by _SC.  */
+ static void
+ ia64_set_sched_context (void *_sc)
+ {
+   ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
+ 
+   gcc_assert (sc != NULL);
+ 
+   memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
+   last_scheduled_insn = sc->last_scheduled_insn;
+   memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
+   memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
+   first_instruction = sc->first_instruction;
+   pending_data_specs = sc->pending_data_specs;
+   current_cycle = sc->current_cycle;
+   memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
+ }
+ 
+ /* Clears the data in the _SC scheduling context.  */
+ static void
+ ia64_clear_sched_context (void *_sc)
+ {
+   ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
+   
+   free (sc->prev_cycle_state);
+   sc->prev_cycle_state = NULL;
+ }
+ 
+ /* Frees the _SC scheduling context.  */
+ static void
+ ia64_free_sched_context (void *_sc)
+ {
+   gcc_assert (_sc != NULL);
+ 
+   free (_sc);
+ }
+ 
+ typedef rtx (* gen_func_t) (rtx, rtx);
+ 
+ /* Return a function that will generate a load of mode MODE_NO
+    with speculation types TS.  */
+ static gen_func_t
+ get_spec_load_gen_function (ds_t ts, int mode_no)
+ {
+   static gen_func_t gen_ld_[] = {
+     gen_movbi,
+     gen_movqi_internal,
+     gen_movhi_internal,
+     gen_movsi_internal,
+     gen_movdi_internal,
+     gen_movsf_internal,
+     gen_movdf_internal,
+     gen_movxf_internal,
+     gen_movti_internal,
+     gen_zero_extendqidi2,
+     gen_zero_extendhidi2,
+     gen_zero_extendsidi2,
+   };
+ 
+   static gen_func_t gen_ld_a[] = {
+     gen_movbi_advanced,
+     gen_movqi_advanced,
+     gen_movhi_advanced,
+     gen_movsi_advanced,
+     gen_movdi_advanced,
+     gen_movsf_advanced,
+     gen_movdf_advanced,
+     gen_movxf_advanced,
+     gen_movti_advanced,
+     gen_zero_extendqidi2_advanced,
+     gen_zero_extendhidi2_advanced,
+     gen_zero_extendsidi2_advanced,
+   };
+   static gen_func_t gen_ld_s[] = {
+     gen_movbi_speculative,
+     gen_movqi_speculative,
+     gen_movhi_speculative,
+     gen_movsi_speculative,
+     gen_movdi_speculative,
+     gen_movsf_speculative,
+     gen_movdf_speculative,
+     gen_movxf_speculative,
+     gen_movti_speculative,
+     gen_zero_extendqidi2_speculative,
+     gen_zero_extendhidi2_speculative,
+     gen_zero_extendsidi2_speculative,
+   };
+   static gen_func_t gen_ld_sa[] = {
+     gen_movbi_speculative_advanced,
+     gen_movqi_speculative_advanced,
+     gen_movhi_speculative_advanced,
+     gen_movsi_speculative_advanced,
+     gen_movdi_speculative_advanced,
+     gen_movsf_speculative_advanced,
+     gen_movdf_speculative_advanced,
+     gen_movxf_speculative_advanced,
+     gen_movti_speculative_advanced,
+     gen_zero_extendqidi2_speculative_advanced,
+     gen_zero_extendhidi2_speculative_advanced,
+     gen_zero_extendsidi2_speculative_advanced,
+   };
+   static gen_func_t gen_ld_s_a[] = {
+     gen_movbi_speculative_a,
+     gen_movqi_speculative_a,
+     gen_movhi_speculative_a,
+     gen_movsi_speculative_a,
+     gen_movdi_speculative_a,
+     gen_movsf_speculative_a,
+     gen_movdf_speculative_a,
+     gen_movxf_speculative_a,
+     gen_movti_speculative_a,
+     gen_zero_extendqidi2_speculative_a,
+     gen_zero_extendhidi2_speculative_a,
+     gen_zero_extendsidi2_speculative_a,
+   };
+ 
+   gen_func_t *gen_ld;
+ 
+   if (ts & BEGIN_DATA)
+     {
+       if (ts & BEGIN_CONTROL)
+ 	gen_ld = gen_ld_sa;
+       else
+ 	gen_ld = gen_ld_a;
+     }
+   else if (ts & BEGIN_CONTROL)
+     {
+       if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
+ 	  || ia64_needs_block_p (ts))
+ 	gen_ld = gen_ld_s;
+       else
+ 	gen_ld = gen_ld_s_a;
+     }
+   else if (ts == 0)
+     gen_ld = gen_ld_;
+   else
+     gcc_unreachable ();
+ 
+   return gen_ld[mode_no];
+ }
+ 
+ /* Constants that help mapping 'machine_mode' to int.  */
+ enum SPEC_MODES
+   {
+     SPEC_MODE_INVALID = -1,
+     SPEC_MODE_FIRST = 0,
+     SPEC_MODE_FOR_EXTEND_FIRST = 1,
+     SPEC_MODE_FOR_EXTEND_LAST = 3,
+     SPEC_MODE_LAST = 8
+   };
+ 
+ enum
+   {
+     /* Offset to reach ZERO_EXTEND patterns.  */
+     SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
+   };
+ 
+ /* Return index of the MODE.  */
+ static int
+ ia64_mode_to_int (machine_mode mode)
+ {
+   switch (mode)
+     {
+     case E_BImode: return 0; /* SPEC_MODE_FIRST  */
+     case E_QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST  */
+     case E_HImode: return 2;
+     case E_SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST  */
+     case E_DImode: return 4;
+     case E_SFmode: return 5;
+     case E_DFmode: return 6;
+     case E_XFmode: return 7;
+     case E_TImode:
+       /* ??? This mode needs testing.  Bypasses for ldfp8 instruction are not
+ 	 mentioned in itanium[12].md.  Predicate fp_register_operand also
+ 	 needs to be defined.  Bottom line: better disable for now.  */
+       return SPEC_MODE_INVALID;
+     default:     return SPEC_MODE_INVALID;
+     }
+ }
+ 
+ /* Provide information about speculation capabilities.  */
+ static void
+ ia64_set_sched_flags (spec_info_t spec_info)
+ {
+   unsigned int *flags = &(current_sched_info->flags);
+ 
+   if (*flags & SCHED_RGN
+       || *flags & SCHED_EBB
+       || *flags & SEL_SCHED)
+     {
+       int mask = 0;
+ 
+       if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
+           || (mflag_sched_ar_data_spec && reload_completed))
+ 	{
+ 	  mask |= BEGIN_DATA;
+ 
+ 	  if (!sel_sched_p ()
+ 	      && ((mflag_sched_br_in_data_spec && !reload_completed)
+ 		  || (mflag_sched_ar_in_data_spec && reload_completed)))
+ 	    mask |= BE_IN_DATA;
+ 	}
+       
+       if (mflag_sched_control_spec
+           && (!sel_sched_p ()
+ 	      || reload_completed))
+ 	{
+ 	  mask |= BEGIN_CONTROL;
+ 	  
+ 	  if (!sel_sched_p () && mflag_sched_in_control_spec)
+ 	    mask |= BE_IN_CONTROL;
+ 	}
+ 
+       spec_info->mask = mask;
+ 
+       if (mask)
+ 	{
+ 	  *flags |= USE_DEPS_LIST | DO_SPECULATION;
+ 
+ 	  if (mask & BE_IN_SPEC)
+ 	    *flags |= NEW_BBS;
+ 	  
+ 	  spec_info->flags = 0;
+       
+ 	  if ((mask & CONTROL_SPEC)
+ 	      && sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
+ 	    spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
+ 
+ 	  if (sched_verbose >= 1)
+ 	    spec_info->dump = sched_dump;
+ 	  else
+ 	    spec_info->dump = 0;
+ 	  
+ 	  if (mflag_sched_count_spec_in_critical_path)
+ 	    spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
+ 	}
+     }
+   else
+     spec_info->mask = 0;
+ }
+ 
+ /* If INSN is an appropriate load return its mode.
+    Return -1 otherwise.  */
+ static int
+ get_mode_no_for_insn (rtx_insn *insn)
+ {
+   rtx reg, mem, mode_rtx;
+   int mode_no;
+   bool extend_p;
+ 
+   extract_insn_cached (insn);
+ 
+   /* We use WHICH_ALTERNATIVE only after reload.  This will
+      guarantee that reload won't touch a speculative insn.  */
+ 
+   if (recog_data.n_operands != 2)
+     return -1;
+ 
+   reg = recog_data.operand[0];
+   mem = recog_data.operand[1];
+ 
+   /* We should use MEM's mode since REG's mode in presence of
+      ZERO_EXTEND will always be DImode.  */
+   if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
+     /* Process non-speculative ld.  */
+     {
+       if (!reload_completed)
+ 	{
+ 	  /* Do not speculate into regs like ar.lc.  */
+ 	  if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
+ 	    return -1;
+ 
+ 	  if (!MEM_P (mem))
+ 	    return -1;
+ 
+ 	  {
+ 	    rtx mem_reg = XEXP (mem, 0);
+ 
+ 	    if (!REG_P (mem_reg))
+ 	      return -1;
+ 	  }
+ 
+ 	  mode_rtx = mem;
+ 	}
+       else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
+ 	{
+ 	  gcc_assert (REG_P (reg) && MEM_P (mem));
+ 	  mode_rtx = mem;
+ 	}
+       else
+ 	return -1;
+     }
+   else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
+ 	   || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
+ 	   || get_attr_check_load (insn) == CHECK_LOAD_YES)
+     /* Process speculative ld or ld.c.  */
+     {
+       gcc_assert (REG_P (reg) && MEM_P (mem));
+       mode_rtx = mem;
+     }
+   else
+     {
+       enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
+ 
+       if (attr_class == ITANIUM_CLASS_CHK_A
+ 	  || attr_class == ITANIUM_CLASS_CHK_S_I
+ 	  || attr_class == ITANIUM_CLASS_CHK_S_F)
+ 	/* Process chk.  */
+ 	mode_rtx = reg;
+       else
+ 	return -1;
+     }
+ 
+   mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
+ 
+   if (mode_no == SPEC_MODE_INVALID)
+     return -1;
+ 
+   extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
+ 
+   if (extend_p)
+     {
+       if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
+ 	    && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
+ 	return -1;
+ 
+       mode_no += SPEC_GEN_EXTEND_OFFSET;
+     }
+ 
+   return mode_no;
+ }
+ 
+ /* If X is an unspec part of a speculative load, return its code.
+    Return -1 otherwise.  */
+ static int
+ get_spec_unspec_code (const_rtx x)
+ {
+   if (GET_CODE (x) != UNSPEC)
+     return -1;
+ 
+   {
+     int code;
+ 
+     code = XINT (x, 1);
+ 
+     switch (code)
+       {
+       case UNSPEC_LDA:
+       case UNSPEC_LDS:
+       case UNSPEC_LDS_A:
+       case UNSPEC_LDSA:
+ 	return code;
+ 
+       default:
+ 	return -1;
+       }
+   }
+ }
+ 
+ /* Implement skip_rtx_p hook.  */
+ static bool
+ ia64_skip_rtx_p (const_rtx x)
+ {
+   return get_spec_unspec_code (x) != -1;
+ }
+ 
+ /* If INSN is a speculative load, return its UNSPEC code.
+    Return -1 otherwise.  */
+ static int
+ get_insn_spec_code (const_rtx insn)
+ {
+   rtx pat, reg, mem;
+ 
+   pat = PATTERN (insn);
+ 
+   if (GET_CODE (pat) == COND_EXEC)
+     pat = COND_EXEC_CODE (pat);
+ 
+   if (GET_CODE (pat) != SET)
+     return -1;
+ 
+   reg = SET_DEST (pat);
+   if (!REG_P (reg))
+     return -1;
+ 
+   mem = SET_SRC (pat);
+   if (GET_CODE (mem) == ZERO_EXTEND)
+     mem = XEXP (mem, 0);
+ 
+   return get_spec_unspec_code (mem);
+ }
+ 
+ /* If INSN is a speculative load, return a ds with the speculation types.
+    Otherwise [if INSN is a normal instruction] return 0.  */
+ static ds_t
+ ia64_get_insn_spec_ds (rtx_insn *insn)
+ {
+   int code = get_insn_spec_code (insn);
+ 
+   switch (code)
+     {
+     case UNSPEC_LDA:
+       return BEGIN_DATA;
+ 
+     case UNSPEC_LDS:
+     case UNSPEC_LDS_A:
+       return BEGIN_CONTROL;
+ 
+     case UNSPEC_LDSA:
+       return BEGIN_DATA | BEGIN_CONTROL;
+ 
+     default:
+       return 0;
+     }
+ }
+ 
+ /* If INSN is a speculative load return a ds with the speculation types that
+    will be checked.
+    Otherwise [if INSN is a normal instruction] return 0.  */
+ static ds_t
+ ia64_get_insn_checked_ds (rtx_insn *insn)
+ {
+   int code = get_insn_spec_code (insn);
+ 
+   switch (code)
+     {
+     case UNSPEC_LDA:
+       return BEGIN_DATA | BEGIN_CONTROL;
+ 
+     case UNSPEC_LDS:
+       return BEGIN_CONTROL;
+ 
+     case UNSPEC_LDS_A:
+     case UNSPEC_LDSA:
+       return BEGIN_DATA | BEGIN_CONTROL;
+ 
+     default:
+       return 0;
+     }
+ }
+ 
+ /* If GEN_P is true, calculate the index of needed speculation check and return
+    speculative pattern for INSN with speculative mode TS, machine mode
+    MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
+    If GEN_P is false, just calculate the index of needed speculation check.  */
+ static rtx
+ ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
+ {
+   rtx pat, new_pat;
+   gen_func_t gen_load;
+ 
+   gen_load = get_spec_load_gen_function (ts, mode_no);
+ 
+   new_pat = gen_load (copy_rtx (recog_data.operand[0]),
+ 		      copy_rtx (recog_data.operand[1]));
+ 
+   pat = PATTERN (insn);
+   if (GET_CODE (pat) == COND_EXEC)
+     new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
+ 				 new_pat);
+ 
+   return new_pat;
+ }
+ 
+ static bool
+ insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
+ 			      ds_t ds ATTRIBUTE_UNUSED)
+ {
+   return false;
+ }
+ 
+ /* Implement targetm.sched.speculate_insn hook.
+    Check if the INSN can be TS speculative.
+    If 'no' - return -1.
+    If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
+    If current pattern of the INSN already provides TS speculation,
+    return 0.  */
+ static int
+ ia64_speculate_insn (rtx_insn *insn, ds_t ts, rtx *new_pat)
+ {  
+   int mode_no;
+   int res;
+   
+   gcc_assert (!(ts & ~SPECULATIVE));
+ 
+   if (ia64_spec_check_p (insn))
+     return -1;
+ 
+   if ((ts & BE_IN_SPEC)
+       && !insn_can_be_in_speculative_p (insn, ts))
+     return -1;
+ 
+   mode_no = get_mode_no_for_insn (insn);
+ 
+   if (mode_no != SPEC_MODE_INVALID)
+     {
+       if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
+ 	res = 0;
+       else
+ 	{
+ 	  res = 1;
+ 	  *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
+ 	}
+     }
+   else
+     res = -1;
+ 
+   return res;
+ }
+ 
+ /* Return a function that will generate a check for speculation TS with mode
+    MODE_NO.
+    If simple check is needed, pass true for SIMPLE_CHECK_P.
+    If clearing check is needed, pass true for CLEARING_CHECK_P.  */
+ static gen_func_t
+ get_spec_check_gen_function (ds_t ts, int mode_no,
+ 			     bool simple_check_p, bool clearing_check_p)
+ {
+   static gen_func_t gen_ld_c_clr[] = {
+     gen_movbi_clr,
+     gen_movqi_clr,
+     gen_movhi_clr,
+     gen_movsi_clr,
+     gen_movdi_clr,
+     gen_movsf_clr,
+     gen_movdf_clr,
+     gen_movxf_clr,
+     gen_movti_clr,
+     gen_zero_extendqidi2_clr,
+     gen_zero_extendhidi2_clr,
+     gen_zero_extendsidi2_clr,
+   };
+   static gen_func_t gen_ld_c_nc[] = {
+     gen_movbi_nc,
+     gen_movqi_nc,
+     gen_movhi_nc,
+     gen_movsi_nc,
+     gen_movdi_nc,
+     gen_movsf_nc,
+     gen_movdf_nc,
+     gen_movxf_nc,
+     gen_movti_nc,
+     gen_zero_extendqidi2_nc,
+     gen_zero_extendhidi2_nc,
+     gen_zero_extendsidi2_nc,
+   };
+   static gen_func_t gen_chk_a_clr[] = {
+     gen_advanced_load_check_clr_bi,
+     gen_advanced_load_check_clr_qi,
+     gen_advanced_load_check_clr_hi,
+     gen_advanced_load_check_clr_si,
+     gen_advanced_load_check_clr_di,
+     gen_advanced_load_check_clr_sf,
+     gen_advanced_load_check_clr_df,
+     gen_advanced_load_check_clr_xf,
+     gen_advanced_load_check_clr_ti,
+     gen_advanced_load_check_clr_di,
+     gen_advanced_load_check_clr_di,
+     gen_advanced_load_check_clr_di,
+   };
+   static gen_func_t gen_chk_a_nc[] = {
+     gen_advanced_load_check_nc_bi,
+     gen_advanced_load_check_nc_qi,
+     gen_advanced_load_check_nc_hi,
+     gen_advanced_load_check_nc_si,
+     gen_advanced_load_check_nc_di,
+     gen_advanced_load_check_nc_sf,
+     gen_advanced_load_check_nc_df,
+     gen_advanced_load_check_nc_xf,
+     gen_advanced_load_check_nc_ti,
+     gen_advanced_load_check_nc_di,
+     gen_advanced_load_check_nc_di,
+     gen_advanced_load_check_nc_di,
+   };
+   static gen_func_t gen_chk_s[] = {
+     gen_speculation_check_bi,
+     gen_speculation_check_qi,
+     gen_speculation_check_hi,
+     gen_speculation_check_si,
+     gen_speculation_check_di,
+     gen_speculation_check_sf,
+     gen_speculation_check_df,
+     gen_speculation_check_xf,
+     gen_speculation_check_ti,
+     gen_speculation_check_di,
+     gen_speculation_check_di,
+     gen_speculation_check_di,
+   };
+ 
+   gen_func_t *gen_check;
+ 
+   if (ts & BEGIN_DATA)
+     {
+       /* We don't need recovery because even if this is ld.sa
+ 	 ALAT entry will be allocated only if NAT bit is set to zero.
+ 	 So it is enough to use ld.c here.  */
+ 
+       if (simple_check_p)
+ 	{
+ 	  gcc_assert (mflag_sched_spec_ldc);
+ 
+ 	  if (clearing_check_p)
+ 	    gen_check = gen_ld_c_clr;
+ 	  else
+ 	    gen_check = gen_ld_c_nc;
+ 	}
+       else
+ 	{
+ 	  if (clearing_check_p)
+ 	    gen_check = gen_chk_a_clr;
+ 	  else
+ 	    gen_check = gen_chk_a_nc;
+ 	}
+     }
+   else if (ts & BEGIN_CONTROL)
+     {
+       if (simple_check_p)
+ 	/* We might want to use ld.sa -> ld.c instead of
+ 	   ld.s -> chk.s.  */
+ 	{
+ 	  gcc_assert (!ia64_needs_block_p (ts));
+ 
+ 	  if (clearing_check_p)
+ 	    gen_check = gen_ld_c_clr;
+ 	  else
+ 	    gen_check = gen_ld_c_nc;
+ 	}
+       else
+ 	{
+ 	  gen_check = gen_chk_s;
+ 	}
+     }
+   else
+     gcc_unreachable ();
+ 
+   gcc_assert (mode_no >= 0);
+   return gen_check[mode_no];
+ }
+ 
+ /* Return nonzero, if INSN needs branchy recovery check.  */
+ static bool
+ ia64_needs_block_p (ds_t ts)
+ {
+   if (ts & BEGIN_DATA)
+     return !mflag_sched_spec_ldc;
+ 
+   gcc_assert ((ts & BEGIN_CONTROL) != 0);
+ 
+   return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
+ }
+ 
+ /* Generate (or regenerate) a recovery check for INSN.  */
+ static rtx
+ ia64_gen_spec_check (rtx_insn *insn, rtx_insn *label, ds_t ds)
+ {
+   rtx op1, pat, check_pat;
+   gen_func_t gen_check;
+   int mode_no;
+ 
+   mode_no = get_mode_no_for_insn (insn);
+   gcc_assert (mode_no >= 0);
+ 
+   if (label)
+     op1 = label;
+   else
+     {
+       gcc_assert (!ia64_needs_block_p (ds));
+       op1 = copy_rtx (recog_data.operand[1]);
+     }
+       
+   gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
+ 					   true);
+ 
+   check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
+     
+   pat = PATTERN (insn);
+   if (GET_CODE (pat) == COND_EXEC)
+     check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
+ 				   check_pat);
+ 
+   return check_pat;
+ }
+ 
+ /* Return nonzero, if X is branchy recovery check.  */
+ static int
+ ia64_spec_check_p (rtx x)
+ {
+   x = PATTERN (x);
+   if (GET_CODE (x) == COND_EXEC)
+     x = COND_EXEC_CODE (x);
+   if (GET_CODE (x) == SET)
+     return ia64_spec_check_src_p (SET_SRC (x));
+   return 0;
+ }
+ 
+ /* Return nonzero, if SRC belongs to recovery check.  */
+ static int
+ ia64_spec_check_src_p (rtx src)
+ {
+   if (GET_CODE (src) == IF_THEN_ELSE)
+     {
+       rtx t;
+ 
+       t = XEXP (src, 0);
+       if (GET_CODE (t) == NE)
+ 	{
+ 	  t = XEXP (t, 0);	    
+ 
+ 	  if (GET_CODE (t) == UNSPEC)
+ 	    {
+ 	      int code;
+ 	      
+ 	      code = XINT (t, 1);
+ 	     
+ 	      if (code == UNSPEC_LDCCLR
+ 		  || code == UNSPEC_LDCNC
+ 		  || code == UNSPEC_CHKACLR
+ 		  || code == UNSPEC_CHKANC
+ 		  || code == UNSPEC_CHKS)
+ 		{
+ 		  gcc_assert (code != 0);
+ 		  return code;
+ 		}
+ 	    }
+ 	}
+     }
+   return 0;
+ }
+ 
+ 
+ /* The following page contains abstract data `bundle states' which are
+    used for bundling insns (inserting nops and template generation).  */
+ 
+ /* The following describes state of insn bundling.  */
+ 
+ struct bundle_state
+ {
+   /* Unique bundle state number to identify them in the debugging
+      output  */
+   int unique_num;
+   rtx_insn *insn; /* corresponding insn, NULL for the 1st and the last state  */
+   /* number nops before and after the insn  */
+   short before_nops_num, after_nops_num;
+   int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
+                    insn */
+   int cost;     /* cost of the state in cycles */
+   int accumulated_insns_num; /* number of all previous insns including
+ 				nops.  L is considered as 2 insns */
+   int branch_deviation; /* deviation of previous branches from 3rd slots  */
+   int middle_bundle_stops; /* number of stop bits in the middle of bundles */
+   struct bundle_state *next;  /* next state with the same insn_num  */
+   struct bundle_state *originator; /* originator (previous insn state)  */
+   /* All bundle states are in the following chain.  */
+   struct bundle_state *allocated_states_chain;
+   /* The DFA State after issuing the insn and the nops.  */
+   state_t dfa_state;
+ };
+ 
+ /* The following is map insn number to the corresponding bundle state.  */
+ 
+ static struct bundle_state **index_to_bundle_states;
+ 
+ /* The unique number of next bundle state.  */
+ 
+ static int bundle_states_num;
+ 
+ /* All allocated bundle states are in the following chain.  */
+ 
+ static struct bundle_state *allocated_bundle_states_chain;
+ 
+ /* All allocated but not used bundle states are in the following
+    chain.  */
+ 
+ static struct bundle_state *free_bundle_state_chain;
+ 
+ 
+ /* The following function returns a free bundle state.  */
+ 
+ static struct bundle_state *
+ get_free_bundle_state (void)
+ {
+   struct bundle_state *result;
+ 
+   if (free_bundle_state_chain != NULL)
+     {
+       result = free_bundle_state_chain;
+       free_bundle_state_chain = result->next;
+     }
+   else
+     {
+       result = XNEW (struct bundle_state);
+       result->dfa_state = xmalloc (dfa_state_size);
+       result->allocated_states_chain = allocated_bundle_states_chain;
+       allocated_bundle_states_chain = result;
+     }
+   result->unique_num = bundle_states_num++;
+   return result;
+ 
+ }
+ 
+ /* The following function frees given bundle state.  */
+ 
+ static void
+ free_bundle_state (struct bundle_state *state)
+ {
+   state->next = free_bundle_state_chain;
+   free_bundle_state_chain = state;
+ }
+ 
+ /* Start work with abstract data `bundle states'.  */
+ 
+ static void
+ initiate_bundle_states (void)
+ {
+   bundle_states_num = 0;
+   free_bundle_state_chain = NULL;
+   allocated_bundle_states_chain = NULL;
+ }
+ 
+ /* Finish work with abstract data `bundle states'.  */
+ 
+ static void
+ finish_bundle_states (void)
+ {
+   struct bundle_state *curr_state, *next_state;
+ 
+   for (curr_state = allocated_bundle_states_chain;
+        curr_state != NULL;
+        curr_state = next_state)
+     {
+       next_state = curr_state->allocated_states_chain;
+       free (curr_state->dfa_state);
+       free (curr_state);
+     }
+ }
+ 
+ /* Hashtable helpers.  */
+ 
+ struct bundle_state_hasher : nofree_ptr_hash <bundle_state>
+ {
+   static inline hashval_t hash (const bundle_state *);
+   static inline bool equal (const bundle_state *, const bundle_state *);
+ };
+ 
+ /* The function returns hash of BUNDLE_STATE.  */
+ 
+ inline hashval_t
+ bundle_state_hasher::hash (const bundle_state *state)
+ {
+   unsigned result, i;
+ 
+   for (result = i = 0; i < dfa_state_size; i++)
+     result += (((unsigned char *) state->dfa_state) [i]
+ 	       << ((i % CHAR_BIT) * 3 + CHAR_BIT));
+   return result + state->insn_num;
+ }
+ 
+ /* The function returns nonzero if the bundle state keys are equal.  */
+ 
+ inline bool
+ bundle_state_hasher::equal (const bundle_state *state1,
+ 			    const bundle_state *state2)
+ {
+   return (state1->insn_num == state2->insn_num
+ 	  && memcmp (state1->dfa_state, state2->dfa_state,
+ 		     dfa_state_size) == 0);
+ }
+ 
+ /* Hash table of the bundle states.  The key is dfa_state and insn_num
+    of the bundle states.  */
+ 
+ static hash_table<bundle_state_hasher> *bundle_state_table;
+ 
+ /* The function inserts the BUNDLE_STATE into the hash table.  The
+    function returns nonzero if the bundle has been inserted into the
+    table.  The table contains the best bundle state with given key.  */
+ 
+ static int
+ insert_bundle_state (struct bundle_state *bundle_state)
+ {
+   struct bundle_state **entry_ptr;
+ 
+   entry_ptr = bundle_state_table->find_slot (bundle_state, INSERT);
+   if (*entry_ptr == NULL)
+     {
+       bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
+       index_to_bundle_states [bundle_state->insn_num] = bundle_state;
+       *entry_ptr = bundle_state;
+       return TRUE;
+     }
+   else if (bundle_state->cost < (*entry_ptr)->cost
+ 	   || (bundle_state->cost == (*entry_ptr)->cost
+ 	       && ((*entry_ptr)->accumulated_insns_num
+ 		   > bundle_state->accumulated_insns_num
+ 		   || ((*entry_ptr)->accumulated_insns_num
+ 		       == bundle_state->accumulated_insns_num
+ 		       && ((*entry_ptr)->branch_deviation
+ 			   > bundle_state->branch_deviation
+ 			   || ((*entry_ptr)->branch_deviation
+ 			       == bundle_state->branch_deviation
+ 			       && (*entry_ptr)->middle_bundle_stops
+ 			       > bundle_state->middle_bundle_stops))))))
+ 
+     {
+       struct bundle_state temp;
+ 
+       temp = **entry_ptr;
+       **entry_ptr = *bundle_state;
+       (*entry_ptr)->next = temp.next;
+       *bundle_state = temp;
+     }
+   return FALSE;
+ }
+ 
+ /* Start work with the hash table.  */
+ 
+ static void
+ initiate_bundle_state_table (void)
+ {
+   bundle_state_table = new hash_table<bundle_state_hasher> (50);
+ }
+ 
+ /* Finish work with the hash table.  */
+ 
+ static void
+ finish_bundle_state_table (void)
+ {
+   delete bundle_state_table;
+   bundle_state_table = NULL;
+ }
+ 
+ 
+ 
+ /* The following variable is a insn `nop' used to check bundle states
+    with different number of inserted nops.  */
+ 
+ static rtx_insn *ia64_nop;
+ 
+ /* The following function tries to issue NOPS_NUM nops for the current
+    state without advancing processor cycle.  If it failed, the
+    function returns FALSE and frees the current state.  */
+ 
+ static int
+ try_issue_nops (struct bundle_state *curr_state, int nops_num)
+ {
+   int i;
+ 
+   for (i = 0; i < nops_num; i++)
+     if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
+       {
+ 	free_bundle_state (curr_state);
+ 	return FALSE;
+       }
+   return TRUE;
+ }
+ 
+ /* The following function tries to issue INSN for the current
+    state without advancing processor cycle.  If it failed, the
+    function returns FALSE and frees the current state.  */
+ 
+ static int
+ try_issue_insn (struct bundle_state *curr_state, rtx insn)
+ {
+   if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
+     {
+       free_bundle_state (curr_state);
+       return FALSE;
+     }
+   return TRUE;
+ }
+ 
+ /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
+    starting with ORIGINATOR without advancing processor cycle.  If
+    TRY_BUNDLE_END_P is TRUE, the function also/only (if
+    ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
+    If it was successful, the function creates new bundle state and
+    insert into the hash table and into `index_to_bundle_states'.  */
+ 
+ static void
+ issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
+ 		     rtx_insn *insn, int try_bundle_end_p,
+ 		     int only_bundle_end_p)
+ {
+   struct bundle_state *curr_state;
+ 
+   curr_state = get_free_bundle_state ();
+   memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
+   curr_state->insn = insn;
+   curr_state->insn_num = originator->insn_num + 1;
+   curr_state->cost = originator->cost;
+   curr_state->originator = originator;
+   curr_state->before_nops_num = before_nops_num;
+   curr_state->after_nops_num = 0;
+   curr_state->accumulated_insns_num
+     = originator->accumulated_insns_num + before_nops_num;
+   curr_state->branch_deviation = originator->branch_deviation;
+   curr_state->middle_bundle_stops = originator->middle_bundle_stops;
+   gcc_assert (insn);
+   if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
+     {
+       gcc_assert (GET_MODE (insn) != TImode);
+       if (!try_issue_nops (curr_state, before_nops_num))
+ 	return;
+       if (!try_issue_insn (curr_state, insn))
+ 	return;
+       memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
+       if (curr_state->accumulated_insns_num % 3 != 0)
+ 	curr_state->middle_bundle_stops++;
+       if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
+ 	  && curr_state->accumulated_insns_num % 3 != 0)
+ 	{
+ 	  free_bundle_state (curr_state);
+ 	  return;
+ 	}
+     }
+   else if (GET_MODE (insn) != TImode)
+     {
+       if (!try_issue_nops (curr_state, before_nops_num))
+ 	return;
+       if (!try_issue_insn (curr_state, insn))
+ 	return;
+       curr_state->accumulated_insns_num++;
+       gcc_assert (!unknown_for_bundling_p (insn));
+ 
+       if (ia64_safe_type (insn) == TYPE_L)
+ 	curr_state->accumulated_insns_num++;
+     }
+   else
+     {
+       /* If this is an insn that must be first in a group, then don't allow
+ 	 nops to be emitted before it.  Currently, alloc is the only such
+ 	 supported instruction.  */
+       /* ??? The bundling automatons should handle this for us, but they do
+ 	 not yet have support for the first_insn attribute.  */
+       if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
+ 	{
+ 	  free_bundle_state (curr_state);
+ 	  return;
+ 	}
+ 
+       state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
+       state_transition (curr_state->dfa_state, NULL);
+       curr_state->cost++;
+       if (!try_issue_nops (curr_state, before_nops_num))
+ 	return;
+       if (!try_issue_insn (curr_state, insn))
+ 	return;
+       curr_state->accumulated_insns_num++;
+       if (unknown_for_bundling_p (insn))
+ 	{
+ 	  /* Finish bundle containing asm insn.  */
+ 	  curr_state->after_nops_num
+ 	    = 3 - curr_state->accumulated_insns_num % 3;
+ 	  curr_state->accumulated_insns_num
+ 	    += 3 - curr_state->accumulated_insns_num % 3;
+ 	}
+       else if (ia64_safe_type (insn) == TYPE_L)
+ 	curr_state->accumulated_insns_num++;
+     }
+   if (ia64_safe_type (insn) == TYPE_B)
+     curr_state->branch_deviation
+       += 2 - (curr_state->accumulated_insns_num - 1) % 3;
+   if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
+     {
+       if (!only_bundle_end_p && insert_bundle_state (curr_state))
+ 	{
+ 	  state_t dfa_state;
+ 	  struct bundle_state *curr_state1;
+ 	  struct bundle_state *allocated_states_chain;
+ 
+ 	  curr_state1 = get_free_bundle_state ();
+ 	  dfa_state = curr_state1->dfa_state;
+ 	  allocated_states_chain = curr_state1->allocated_states_chain;
+ 	  *curr_state1 = *curr_state;
+ 	  curr_state1->dfa_state = dfa_state;
+ 	  curr_state1->allocated_states_chain = allocated_states_chain;
+ 	  memcpy (curr_state1->dfa_state, curr_state->dfa_state,
+ 		  dfa_state_size);
+ 	  curr_state = curr_state1;
+ 	}
+       if (!try_issue_nops (curr_state,
+ 			   3 - curr_state->accumulated_insns_num % 3))
+ 	return;
+       curr_state->after_nops_num
+ 	= 3 - curr_state->accumulated_insns_num % 3;
+       curr_state->accumulated_insns_num
+ 	+= 3 - curr_state->accumulated_insns_num % 3;
+     }
+   if (!insert_bundle_state (curr_state))
+     free_bundle_state (curr_state);
+   return;
+ }
+ 
+ /* The following function returns position in the two window bundle
+    for given STATE.  */
+ 
+ static int
+ get_max_pos (state_t state)
+ {
+   if (cpu_unit_reservation_p (state, pos_6))
+     return 6;
+   else if (cpu_unit_reservation_p (state, pos_5))
+     return 5;
+   else if (cpu_unit_reservation_p (state, pos_4))
+     return 4;
+   else if (cpu_unit_reservation_p (state, pos_3))
+     return 3;
+   else if (cpu_unit_reservation_p (state, pos_2))
+     return 2;
+   else if (cpu_unit_reservation_p (state, pos_1))
+     return 1;
+   else
+     return 0;
+ }
+ 
+ /* The function returns code of a possible template for given position
+    and state.  The function should be called only with 2 values of
+    position equal to 3 or 6.  We avoid generating F NOPs by putting
+    templates containing F insns at the end of the template search
+    because undocumented anomaly in McKinley derived cores which can
+    cause stalls if an F-unit insn (including a NOP) is issued within a
+    six-cycle window after reading certain application registers (such
+    as ar.bsp).  Furthermore, power-considerations also argue against
+    the use of F-unit instructions unless they're really needed.  */
+ 
+ static int
+ get_template (state_t state, int pos)
+ {
+   switch (pos)
+     {
+     case 3:
+       if (cpu_unit_reservation_p (state, _0mmi_))
+ 	return 1;
+       else if (cpu_unit_reservation_p (state, _0mii_))
+ 	return 0;
+       else if (cpu_unit_reservation_p (state, _0mmb_))
+ 	return 7;
+       else if (cpu_unit_reservation_p (state, _0mib_))
+ 	return 6;
+       else if (cpu_unit_reservation_p (state, _0mbb_))
+ 	return 5;
+       else if (cpu_unit_reservation_p (state, _0bbb_))
+ 	return 4;
+       else if (cpu_unit_reservation_p (state, _0mmf_))
+ 	return 3;
+       else if (cpu_unit_reservation_p (state, _0mfi_))
+ 	return 2;
+       else if (cpu_unit_reservation_p (state, _0mfb_))
+ 	return 8;
+       else if (cpu_unit_reservation_p (state, _0mlx_))
+ 	return 9;
+       else
+ 	gcc_unreachable ();
+     case 6:
+       if (cpu_unit_reservation_p (state, _1mmi_))
+ 	return 1;
+       else if (cpu_unit_reservation_p (state, _1mii_))
+ 	return 0;
+       else if (cpu_unit_reservation_p (state, _1mmb_))
+ 	return 7;
+       else if (cpu_unit_reservation_p (state, _1mib_))
+ 	return 6;
+       else if (cpu_unit_reservation_p (state, _1mbb_))
+ 	return 5;
+       else if (cpu_unit_reservation_p (state, _1bbb_))
+ 	return 4;
+       else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
+ 	return 3;
+       else if (cpu_unit_reservation_p (state, _1mfi_))
+ 	return 2;
+       else if (cpu_unit_reservation_p (state, _1mfb_))
+ 	return 8;
+       else if (cpu_unit_reservation_p (state, _1mlx_))
+ 	return 9;
+       else
+ 	gcc_unreachable ();
+     default:
+       gcc_unreachable ();
+     }
+ }
+ 
+ /* True when INSN is important for bundling.  */
+ 
+ static bool
+ important_for_bundling_p (rtx_insn *insn)
+ {
+   return (INSN_P (insn)
+ 	  && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
+ 	  && GET_CODE (PATTERN (insn)) != USE
+ 	  && GET_CODE (PATTERN (insn)) != CLOBBER);
+ }
+ 
+ /* The following function returns an insn important for insn bundling
+    followed by INSN and before TAIL.  */
+ 
+ static rtx_insn *
+ get_next_important_insn (rtx_insn *insn, rtx_insn *tail)
+ {
+   for (; insn && insn != tail; insn = NEXT_INSN (insn))
+     if (important_for_bundling_p (insn))
+       return insn;
+   return NULL;
+ }
+ 
+ /* True when INSN is unknown, but important, for bundling.  */
+ 
+ static bool
+ unknown_for_bundling_p (rtx_insn *insn)
+ {
+   return (INSN_P (insn)
+ 	  && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_UNKNOWN
+ 	  && GET_CODE (PATTERN (insn)) != USE
+ 	  && GET_CODE (PATTERN (insn)) != CLOBBER);
+ }
+ 
+ /* Add a bundle selector TEMPLATE0 before INSN.  */
+ 
+ static void
+ ia64_add_bundle_selector_before (int template0, rtx_insn *insn)
+ {
+   rtx b = gen_bundle_selector (GEN_INT (template0));
+ 
+   ia64_emit_insn_before (b, insn);
+ #if NR_BUNDLES == 10
+   if ((template0 == 4 || template0 == 5)
+       && ia64_except_unwind_info (&global_options) == UI_TARGET)
+     {
+       int i;
+       rtx note = NULL_RTX;
+ 
+       /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
+ 	 first or second slot.  If it is and has REG_EH_NOTE set, copy it
+ 	 to following nops, as br.call sets rp to the address of following
+ 	 bundle and therefore an EH region end must be on a bundle
+ 	 boundary.  */
+       insn = PREV_INSN (insn);
+       for (i = 0; i < 3; i++)
+ 	{
+ 	  do
+ 	    insn = next_active_insn (insn);
+ 	  while (NONJUMP_INSN_P (insn)
+ 		 && get_attr_empty (insn) == EMPTY_YES);
+ 	  if (CALL_P (insn))
+ 	    note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
+ 	  else if (note)
+ 	    {
+ 	      int code;
+ 
+ 	      gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
+ 			  || code == CODE_FOR_nop_b);
+ 	      if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
+ 		note = NULL_RTX;
+ 	      else
+ 		add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
+ 	    }
+ 	}
+     }
+ #endif
+ }
+ 
+ /* The following function does insn bundling.  Bundling means
+    inserting templates and nop insns to fit insn groups into permitted
+    templates.  Instruction scheduling uses NDFA (non-deterministic
+    finite automata) encoding informations about the templates and the
+    inserted nops.  Nondeterminism of the automata permits follows
+    all possible insn sequences very fast.
+ 
+    Unfortunately it is not possible to get information about inserting
+    nop insns and used templates from the automata states.  The
+    automata only says that we can issue an insn possibly inserting
+    some nops before it and using some template.  Therefore insn
+    bundling in this function is implemented by using DFA
+    (deterministic finite automata).  We follow all possible insn
+    sequences by inserting 0-2 nops (that is what the NDFA describe for
+    insn scheduling) before/after each insn being bundled.  We know the
+    start of simulated processor cycle from insn scheduling (insn
+    starting a new cycle has TImode).
+ 
+    Simple implementation of insn bundling would create enormous
+    number of possible insn sequences satisfying information about new
+    cycle ticks taken from the insn scheduling.  To make the algorithm
+    practical we use dynamic programming.  Each decision (about
+    inserting nops and implicitly about previous decisions) is described
+    by structure bundle_state (see above).  If we generate the same
+    bundle state (key is automaton state after issuing the insns and
+    nops for it), we reuse already generated one.  As consequence we
+    reject some decisions which cannot improve the solution and
+    reduce memory for the algorithm.
+ 
+    When we reach the end of EBB (extended basic block), we choose the
+    best sequence and then, moving back in EBB, insert templates for
+    the best alternative.  The templates are taken from querying
+    automaton state for each insn in chosen bundle states.
+ 
+    So the algorithm makes two (forward and backward) passes through
+    EBB.  */
+ 
+ static void
+ bundling (FILE *dump, int verbose, rtx_insn *prev_head_insn, rtx_insn *tail)
+ {
+   struct bundle_state *curr_state, *next_state, *best_state;
+   rtx_insn *insn, *next_insn;
+   int insn_num;
+   int i, bundle_end_p, only_bundle_end_p, asm_p;
+   int pos = 0, max_pos, template0, template1;
+   rtx_insn *b;
+   enum attr_type type;
+ 
+   insn_num = 0;
+   /* Count insns in the EBB.  */
+   for (insn = NEXT_INSN (prev_head_insn);
+        insn && insn != tail;
+        insn = NEXT_INSN (insn))
+     if (INSN_P (insn))
+       insn_num++;
+   if (insn_num == 0)
+     return;
+   bundling_p = 1;
+   dfa_clean_insn_cache ();
+   initiate_bundle_state_table ();
+   index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
+   /* First (forward) pass -- generation of bundle states.  */
+   curr_state = get_free_bundle_state ();
+   curr_state->insn = NULL;
+   curr_state->before_nops_num = 0;
+   curr_state->after_nops_num = 0;
+   curr_state->insn_num = 0;
+   curr_state->cost = 0;
+   curr_state->accumulated_insns_num = 0;
+   curr_state->branch_deviation = 0;
+   curr_state->middle_bundle_stops = 0;
+   curr_state->next = NULL;
+   curr_state->originator = NULL;
+   state_reset (curr_state->dfa_state);
+   index_to_bundle_states [0] = curr_state;
+   insn_num = 0;
+   /* Shift cycle mark if it is put on insn which could be ignored.  */
+   for (insn = NEXT_INSN (prev_head_insn);
+        insn != tail;
+        insn = NEXT_INSN (insn))
+     if (INSN_P (insn)
+ 	&& !important_for_bundling_p (insn)
+ 	&& GET_MODE (insn) == TImode)
+       {
+ 	PUT_MODE (insn, VOIDmode);
+ 	for (next_insn = NEXT_INSN (insn);
+ 	     next_insn != tail;
+ 	     next_insn = NEXT_INSN (next_insn))
+ 	  if (important_for_bundling_p (next_insn)
+ 	      && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
+ 	    {
+ 	      PUT_MODE (next_insn, TImode);
+ 	      break;
+ 	    }
+       }
+   /* Forward pass: generation of bundle states.  */
+   for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
+        insn != NULL_RTX;
+        insn = next_insn)
+     {
+       gcc_assert (important_for_bundling_p (insn));
+       type = ia64_safe_type (insn);
+       next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
+       insn_num++;
+       index_to_bundle_states [insn_num] = NULL;
+       for (curr_state = index_to_bundle_states [insn_num - 1];
+ 	   curr_state != NULL;
+ 	   curr_state = next_state)
+ 	{
+ 	  pos = curr_state->accumulated_insns_num % 3;
+ 	  next_state = curr_state->next;
+ 	  /* We must fill up the current bundle in order to start a
+ 	     subsequent asm insn in a new bundle.  Asm insn is always
+ 	     placed in a separate bundle.  */
+ 	  only_bundle_end_p
+ 	    = (next_insn != NULL_RTX
+ 	       && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
+ 	       && unknown_for_bundling_p (next_insn));
+ 	  /* We may fill up the current bundle if it is the cycle end
+ 	     without a group barrier.  */
+ 	  bundle_end_p
+ 	    = (only_bundle_end_p || next_insn == NULL_RTX
+ 	       || (GET_MODE (next_insn) == TImode
+ 		   && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
+ 	  if (type == TYPE_F || type == TYPE_B || type == TYPE_L
+ 	      || type == TYPE_S)
+ 	    issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
+ 				 only_bundle_end_p);
+ 	  issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
+ 			       only_bundle_end_p);
+ 	  issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
+ 			       only_bundle_end_p);
+ 	}
+       gcc_assert (index_to_bundle_states [insn_num]);
+       for (curr_state = index_to_bundle_states [insn_num];
+ 	   curr_state != NULL;
+ 	   curr_state = curr_state->next)
+ 	if (verbose >= 2 && dump)
+ 	  {
+ 	    /* This structure is taken from generated code of the
+ 	       pipeline hazard recognizer (see file insn-attrtab.cc).
+ 	       Please don't forget to change the structure if a new
+ 	       automaton is added to .md file.  */
+ 	    struct DFA_chip
+ 	    {
+ 	      unsigned short one_automaton_state;
+ 	      unsigned short oneb_automaton_state;
+ 	      unsigned short two_automaton_state;
+ 	      unsigned short twob_automaton_state;
+ 	    };
+ 
+ 	    fprintf
+ 	      (dump,
+ 	       "//    Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
+ 	       curr_state->unique_num,
+ 	       (curr_state->originator == NULL
+ 		? -1 : curr_state->originator->unique_num),
+ 	       curr_state->cost,
+ 	       curr_state->before_nops_num, curr_state->after_nops_num,
+ 	       curr_state->accumulated_insns_num, curr_state->branch_deviation,
+ 	       curr_state->middle_bundle_stops,
+ 	       ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
+ 	       INSN_UID (insn));
+ 	  }
+     }
+   
+   /* We should find a solution because the 2nd insn scheduling has
+      found one.  */
+   gcc_assert (index_to_bundle_states [insn_num]);
+   /* Find a state corresponding to the best insn sequence.  */
+   best_state = NULL;
+   for (curr_state = index_to_bundle_states [insn_num];
+        curr_state != NULL;
+        curr_state = curr_state->next)
+     /* We are just looking at the states with fully filled up last
+        bundle.  The first we prefer insn sequences with minimal cost
+        then with minimal inserted nops and finally with branch insns
+        placed in the 3rd slots.  */
+     if (curr_state->accumulated_insns_num % 3 == 0
+ 	&& (best_state == NULL || best_state->cost > curr_state->cost
+ 	    || (best_state->cost == curr_state->cost
+ 		&& (curr_state->accumulated_insns_num
+ 		    < best_state->accumulated_insns_num
+ 		    || (curr_state->accumulated_insns_num
+ 			== best_state->accumulated_insns_num
+ 			&& (curr_state->branch_deviation
+ 			    < best_state->branch_deviation
+ 			    || (curr_state->branch_deviation
+ 				== best_state->branch_deviation
+ 				&& curr_state->middle_bundle_stops
+ 				< best_state->middle_bundle_stops)))))))
+       best_state = curr_state;
+   /* Second (backward) pass: adding nops and templates.  */
+   gcc_assert (best_state);
+   insn_num = best_state->before_nops_num;
+   template0 = template1 = -1;
+   for (curr_state = best_state;
+        curr_state->originator != NULL;
+        curr_state = curr_state->originator)
+     {
+       insn = curr_state->insn;
+       asm_p = unknown_for_bundling_p (insn);
+       insn_num++;
+       if (verbose >= 2 && dump)
+ 	{
+ 	  struct DFA_chip
+ 	  {
+ 	    unsigned short one_automaton_state;
+ 	    unsigned short oneb_automaton_state;
+ 	    unsigned short two_automaton_state;
+ 	    unsigned short twob_automaton_state;
+ 	  };
+ 
+ 	  fprintf
+ 	    (dump,
+ 	     "//    Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
+ 	     curr_state->unique_num,
+ 	     (curr_state->originator == NULL
+ 	      ? -1 : curr_state->originator->unique_num),
+ 	     curr_state->cost,
+ 	     curr_state->before_nops_num, curr_state->after_nops_num,
+ 	     curr_state->accumulated_insns_num, curr_state->branch_deviation,
+ 	     curr_state->middle_bundle_stops,
+ 	     ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
+ 	     INSN_UID (insn));
+ 	}
+       /* Find the position in the current bundle window.  The window can
+ 	 contain at most two bundles.  Two bundle window means that
+ 	 the processor will make two bundle rotation.  */
+       max_pos = get_max_pos (curr_state->dfa_state);
+       if (max_pos == 6
+ 	  /* The following (negative template number) means that the
+ 	     processor did one bundle rotation.  */
+ 	  || (max_pos == 3 && template0 < 0))
+ 	{
+ 	  /* We are at the end of the window -- find template(s) for
+ 	     its bundle(s).  */
+ 	  pos = max_pos;
+ 	  if (max_pos == 3)
+ 	    template0 = get_template (curr_state->dfa_state, 3);
+ 	  else
+ 	    {
+ 	      template1 = get_template (curr_state->dfa_state, 3);
+ 	      template0 = get_template (curr_state->dfa_state, 6);
+ 	    }
+ 	}
+       if (max_pos > 3 && template1 < 0)
+ 	/* It may happen when we have the stop inside a bundle.  */
+ 	{
+ 	  gcc_assert (pos <= 3);
+ 	  template1 = get_template (curr_state->dfa_state, 3);
+ 	  pos += 3;
+ 	}
+       if (!asm_p)
+ 	/* Emit nops after the current insn.  */
+ 	for (i = 0; i < curr_state->after_nops_num; i++)
+ 	  {
+ 	    rtx nop_pat = gen_nop ();
+ 	    rtx_insn *nop = emit_insn_after (nop_pat, insn);
+ 	    pos--;
+ 	    gcc_assert (pos >= 0);
+ 	    if (pos % 3 == 0)
+ 	      {
+ 		/* We are at the start of a bundle: emit the template
+ 		   (it should be defined).  */
+ 		gcc_assert (template0 >= 0);
+ 		ia64_add_bundle_selector_before (template0, nop);
+ 		/* If we have two bundle window, we make one bundle
+ 		   rotation.  Otherwise template0 will be undefined
+ 		   (negative value).  */
+ 		template0 = template1;
+ 		template1 = -1;
+ 	      }
+ 	  }
+       /* Move the position backward in the window.  Group barrier has
+ 	 no slot.  Asm insn takes all bundle.  */
+       if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
+ 	  && !unknown_for_bundling_p (insn))
+ 	pos--;
+       /* Long insn takes 2 slots.  */
+       if (ia64_safe_type (insn) == TYPE_L)
+ 	pos--;
+       gcc_assert (pos >= 0);
+       if (pos % 3 == 0
+ 	  && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
+ 	  && !unknown_for_bundling_p (insn))
+ 	{
+ 	  /* The current insn is at the bundle start: emit the
+ 	     template.  */
+ 	  gcc_assert (template0 >= 0);
+ 	  ia64_add_bundle_selector_before (template0, insn);
+ 	  b = PREV_INSN (insn);
+ 	  insn = b;
+ 	  /* See comment above in analogous place for emitting nops
+ 	     after the insn.  */
+ 	  template0 = template1;
+ 	  template1 = -1;
+ 	}
+       /* Emit nops after the current insn.  */
+       for (i = 0; i < curr_state->before_nops_num; i++)
+ 	{
+ 	  rtx nop_pat = gen_nop ();
+ 	  ia64_emit_insn_before (nop_pat, insn);
+ 	  rtx_insn *nop = PREV_INSN (insn);
+ 	  insn = nop;
+ 	  pos--;
+ 	  gcc_assert (pos >= 0);
+ 	  if (pos % 3 == 0)
+ 	    {
+ 	      /* See comment above in analogous place for emitting nops
+ 		 after the insn.  */
+ 	      gcc_assert (template0 >= 0);
+ 	      ia64_add_bundle_selector_before (template0, insn);
+ 	      b = PREV_INSN (insn);
+ 	      insn = b;
+ 	      template0 = template1;
+ 	      template1 = -1;
+ 	    }
+ 	}
+     }
+ 
+   if (flag_checking)
+     {
+       /* Assert right calculation of middle_bundle_stops.  */
+       int num = best_state->middle_bundle_stops;
+       bool start_bundle = true, end_bundle = false;
+ 
+       for (insn = NEXT_INSN (prev_head_insn);
+ 	   insn && insn != tail;
+ 	   insn = NEXT_INSN (insn))
+ 	{
+ 	  if (!INSN_P (insn))
+ 	    continue;
+ 	  if (recog_memoized (insn) == CODE_FOR_bundle_selector)
+ 	    start_bundle = true;
+ 	  else
+ 	    {
+ 	      rtx_insn *next_insn;
+ 
+ 	      for (next_insn = NEXT_INSN (insn);
+ 		   next_insn && next_insn != tail;
+ 		   next_insn = NEXT_INSN (next_insn))
+ 		if (INSN_P (next_insn)
+ 		    && (ia64_safe_itanium_class (next_insn)
+ 			!= ITANIUM_CLASS_IGNORE
+ 			|| recog_memoized (next_insn)
+ 			== CODE_FOR_bundle_selector)
+ 		    && GET_CODE (PATTERN (next_insn)) != USE
+ 		    && GET_CODE (PATTERN (next_insn)) != CLOBBER)
+ 		  break;
+ 
+ 	      end_bundle = next_insn == NULL_RTX
+ 		|| next_insn == tail
+ 		|| (INSN_P (next_insn)
+ 		    && recog_memoized (next_insn) == CODE_FOR_bundle_selector);
+ 	      if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
+ 		  && !start_bundle && !end_bundle
+ 		  && next_insn
+ 		  && !unknown_for_bundling_p (next_insn))
+ 		num--;
+ 
+ 	      start_bundle = false;
+ 	    }
+ 	}
+ 
+       gcc_assert (num == 0);
+     }
+ 
+   free (index_to_bundle_states);
+   finish_bundle_state_table ();
+   bundling_p = 0;
+   dfa_clean_insn_cache ();
+ }
+ 
+ /* The following function is called at the end of scheduling BB or
+    EBB.  After reload, it inserts stop bits and does insn bundling.  */
+ 
+ static void
+ ia64_sched_finish (FILE *dump, int sched_verbose)
+ {
+   if (sched_verbose)
+     fprintf (dump, "// Finishing schedule.\n");
+   if (!reload_completed)
+     return;
+   if (reload_completed)
+     {
+       final_emit_insn_group_barriers (dump);
+       bundling (dump, sched_verbose, current_sched_info->prev_head,
+ 		current_sched_info->next_tail);
+       if (sched_verbose && dump)
+ 	fprintf (dump, "//    finishing %d-%d\n",
+ 		 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
+ 		 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
+ 
+       return;
+     }
+ }
+ 
+ /* The following function inserts stop bits in scheduled BB or EBB.  */
+ 
+ static void
+ final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
+ {
+   rtx_insn *insn;
+   int need_barrier_p = 0;
+   int seen_good_insn = 0;
+ 
+   init_insn_group_barriers ();
+ 
+   for (insn = NEXT_INSN (current_sched_info->prev_head);
+        insn != current_sched_info->next_tail;
+        insn = NEXT_INSN (insn))
+     {
+       if (BARRIER_P (insn))
+ 	{
+ 	  rtx_insn *last = prev_active_insn (insn);
+ 
+ 	  if (! last)
+ 	    continue;
+ 	  if (JUMP_TABLE_DATA_P (last))
+ 	    last = prev_active_insn (last);
+ 	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
+ 	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
+ 
+ 	  init_insn_group_barriers ();
+ 	  seen_good_insn = 0;
+ 	  need_barrier_p = 0;
+ 	}
+       else if (NONDEBUG_INSN_P (insn))
+ 	{
+ 	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
+ 	    {
+ 	      init_insn_group_barriers ();
+ 	      seen_good_insn = 0;
+ 	      need_barrier_p = 0;
+ 	    }
+ 	  else if (need_barrier_p || group_barrier_needed (insn)
+ 		   || (mflag_sched_stop_bits_after_every_cycle
+ 		       && GET_MODE (insn) == TImode
+ 		       && seen_good_insn))
+ 	    {
+ 	      if (TARGET_EARLY_STOP_BITS)
+ 		{
+ 		  rtx_insn *last;
+ 
+ 		  for (last = insn;
+ 		       last != current_sched_info->prev_head;
+ 		       last = PREV_INSN (last))
+ 		    if (INSN_P (last) && GET_MODE (last) == TImode
+ 			&& stops_p [INSN_UID (last)])
+ 		      break;
+ 		  if (last == current_sched_info->prev_head)
+ 		    last = insn;
+ 		  last = prev_active_insn (last);
+ 		  if (last
+ 		      && recog_memoized (last) != CODE_FOR_insn_group_barrier)
+ 		    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
+ 				     last);
+ 		  init_insn_group_barriers ();
+ 		  for (last = NEXT_INSN (last);
+ 		       last != insn;
+ 		       last = NEXT_INSN (last))
+ 		    if (INSN_P (last))
+ 		      {
+ 			group_barrier_needed (last);
+ 			if (recog_memoized (last) >= 0
+ 			    && important_for_bundling_p (last))
+ 			  seen_good_insn = 1;
+ 		      }
+ 		}
+ 	      else
+ 		{
+ 		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
+ 				    insn);
+ 		  init_insn_group_barriers ();
+ 		  seen_good_insn = 0;
+ 		}
+ 	      group_barrier_needed (insn);
+ 	      if (recog_memoized (insn) >= 0
+ 		  && important_for_bundling_p (insn))
+ 		seen_good_insn = 1;
+ 	    }
+ 	  else if (recog_memoized (insn) >= 0
+ 		   && important_for_bundling_p (insn))
+ 	    seen_good_insn = 1;
+ 	  need_barrier_p = (CALL_P (insn) || unknown_for_bundling_p (insn));
+ 	}
+     }
+ }
+ 
+ 
+ 
+ /* If the following function returns TRUE, we will use the DFA
+    insn scheduler.  */
+ 
+ static int
+ ia64_first_cycle_multipass_dfa_lookahead (void)
+ {
+   return (reload_completed ? 6 : 4);
+ }
+ 
+ /* The following function initiates variable `dfa_pre_cycle_insn'.  */
+ 
+ static void
+ ia64_init_dfa_pre_cycle_insn (void)
+ {
+   if (temp_dfa_state == NULL)
+     {
+       dfa_state_size = state_size ();
+       temp_dfa_state = xmalloc (dfa_state_size);
+       prev_cycle_state = xmalloc (dfa_state_size);
+     }
+   dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
+   SET_PREV_INSN (dfa_pre_cycle_insn) = SET_NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
+   recog_memoized (dfa_pre_cycle_insn);
+   dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
+   SET_PREV_INSN (dfa_stop_insn) = SET_NEXT_INSN (dfa_stop_insn) = NULL_RTX;
+   recog_memoized (dfa_stop_insn);
+ }
+ 
+ /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
+    used by the DFA insn scheduler.  */
+ 
+ static rtx
+ ia64_dfa_pre_cycle_insn (void)
+ {
+   return dfa_pre_cycle_insn;
+ }
+ 
+ /* The following function returns TRUE if PRODUCER (of type ilog or
+    ld) produces address for CONSUMER (of type st or stf). */
+ 
+ int
+ ia64_st_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
+ {
+   rtx dest, reg, mem;
+ 
+   gcc_assert (producer && consumer);
+   dest = ia64_single_set (producer);
+   gcc_assert (dest);
+   reg = SET_DEST (dest);
+   gcc_assert (reg);
+   if (GET_CODE (reg) == SUBREG)
+     reg = SUBREG_REG (reg);
+   gcc_assert (GET_CODE (reg) == REG);
+   
+   dest = ia64_single_set (consumer);
+   gcc_assert (dest);
+   mem = SET_DEST (dest);
+   gcc_assert (mem && GET_CODE (mem) == MEM);
+   return reg_mentioned_p (reg, mem);
+ }
+ 
+ /* The following function returns TRUE if PRODUCER (of type ilog or
+    ld) produces address for CONSUMER (of type ld or fld). */
+ 
+ int
+ ia64_ld_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
+ {
+   rtx dest, src, reg, mem;
+ 
+   gcc_assert (producer && consumer);
+   dest = ia64_single_set (producer);
+   gcc_assert (dest);
+   reg = SET_DEST (dest);
+   gcc_assert (reg);
+   if (GET_CODE (reg) == SUBREG)
+     reg = SUBREG_REG (reg);
+   gcc_assert (GET_CODE (reg) == REG);
+   
+   src = ia64_single_set (consumer);
+   gcc_assert (src);
+   mem = SET_SRC (src);
+   gcc_assert (mem);
+  
+   if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
+     mem = XVECEXP (mem, 0, 0);
+   else if (GET_CODE (mem) == IF_THEN_ELSE)
+     /* ??? Is this bypass necessary for ld.c?  */
+     {
+       gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
+       mem = XEXP (mem, 1);
+     }
+      
+   while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
+     mem = XEXP (mem, 0);
+ 
+   if (GET_CODE (mem) == UNSPEC)
+     {
+       int c = XINT (mem, 1);
+ 
+       gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
+ 		  || c == UNSPEC_LDSA);
+       mem = XVECEXP (mem, 0, 0);
+     }
+ 
+   /* Note that LO_SUM is used for GOT loads.  */
+   gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
+ 
+   return reg_mentioned_p (reg, mem);
+ }
+ 
+ /* The following function returns TRUE if INSN produces address for a
+    load/store insn.  We will place such insns into M slot because it
+    decreases its latency time.  */
+ 
+ int
+ ia64_produce_address_p (rtx insn)
+ {
+   return insn->call;
+ }
+ 
+ 
+ /* Emit pseudo-ops for the assembler to describe predicate relations.
+    At present this assumes that we only consider predicate pairs to
+    be mutex, and that the assembler can deduce proper values from
+    straight-line code.  */
+ 
+ static void
+ emit_predicate_relation_info (void)
+ {
+   basic_block bb;
+ 
+   FOR_EACH_BB_REVERSE_FN (bb, cfun)
+     {
+       int r;
+       rtx_insn *head = BB_HEAD (bb);
+ 
+       /* We only need such notes at code labels.  */
+       if (! LABEL_P (head))
+ 	continue;
+       if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
+ 	head = NEXT_INSN (head);
+ 
+       /* Skip p0, which may be thought to be live due to (reg:DI p0)
+ 	 grabbing the entire block of predicate registers.  */
+       for (r = PR_REG (2); r < PR_REG (64); r += 2)
+ 	if (REGNO_REG_SET_P (df_get_live_in (bb), r))
+ 	  {
+ 	    rtx p = gen_rtx_REG (BImode, r);
+ 	    rtx_insn *n = emit_insn_after (gen_pred_rel_mutex (p), head);
+ 	    if (head == BB_END (bb))
+ 	      BB_END (bb) = n;
+ 	    head = n;
+ 	  }
+     }
+ 
+   /* Look for conditional calls that do not return, and protect predicate
+      relations around them.  Otherwise the assembler will assume the call
+      returns, and complain about uses of call-clobbered predicates after
+      the call.  */
+   FOR_EACH_BB_REVERSE_FN (bb, cfun)
+     {
+       rtx_insn *insn = BB_HEAD (bb);
+ 
+       while (1)
+ 	{
+ 	  if (CALL_P (insn)
+ 	      && GET_CODE (PATTERN (insn)) == COND_EXEC
+ 	      && find_reg_note (insn, REG_NORETURN, NULL_RTX))
+ 	    {
+ 	      rtx_insn *b =
+ 		emit_insn_before (gen_safe_across_calls_all (), insn);
+ 	      rtx_insn *a = emit_insn_after (gen_safe_across_calls_normal (), insn);
+ 	      if (BB_HEAD (bb) == insn)
+ 		BB_HEAD (bb) = b;
+ 	      if (BB_END (bb) == insn)
+ 		BB_END (bb) = a;
+ 	    }
+ 
+ 	  if (insn == BB_END (bb))
+ 	    break;
+ 	  insn = NEXT_INSN (insn);
+ 	}
+     }
+ }
+ 
+ /* Perform machine dependent operations on the rtl chain INSNS.  */
+ 
+ static void
+ ia64_reorg (void)
+ {
+   /* We are freeing block_for_insn in the toplev to keep compatibility
+      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
+   compute_bb_for_insn ();
+ 
+   /* If optimizing, we'll have split before scheduling.  */
+   if (optimize == 0)
+     split_all_insns ();
+ 
+   if (optimize && flag_schedule_insns_after_reload
+       && dbg_cnt (ia64_sched2))
+     {
+       basic_block bb;
+       timevar_push (TV_SCHED2);
+       ia64_final_schedule = 1;
+ 
+       /* We can't let modulo-sched prevent us from scheduling any bbs,
+ 	 since we need the final schedule to produce bundle information.  */
+       FOR_EACH_BB_FN (bb, cfun)
+ 	bb->flags &= ~BB_DISABLE_SCHEDULE;
+ 
+       initiate_bundle_states ();
+       ia64_nop = make_insn_raw (gen_nop ());
+       SET_PREV_INSN (ia64_nop) = SET_NEXT_INSN (ia64_nop) = NULL_RTX;
+       recog_memoized (ia64_nop);
+       clocks_length = get_max_uid () + 1;
+       stops_p = XCNEWVEC (char, clocks_length);
+ 
+       if (ia64_tune == PROCESSOR_ITANIUM2)
+ 	{
+ 	  pos_1 = get_cpu_unit_code ("2_1");
+ 	  pos_2 = get_cpu_unit_code ("2_2");
+ 	  pos_3 = get_cpu_unit_code ("2_3");
+ 	  pos_4 = get_cpu_unit_code ("2_4");
+ 	  pos_5 = get_cpu_unit_code ("2_5");
+ 	  pos_6 = get_cpu_unit_code ("2_6");
+ 	  _0mii_ = get_cpu_unit_code ("2b_0mii.");
+ 	  _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
+ 	  _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
+ 	  _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
+ 	  _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
+ 	  _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
+ 	  _0mib_ = get_cpu_unit_code ("2b_0mib.");
+ 	  _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
+ 	  _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
+ 	  _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
+ 	  _1mii_ = get_cpu_unit_code ("2b_1mii.");
+ 	  _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
+ 	  _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
+ 	  _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
+ 	  _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
+ 	  _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
+ 	  _1mib_ = get_cpu_unit_code ("2b_1mib.");
+ 	  _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
+ 	  _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
+ 	  _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
+ 	}
+       else
+ 	{
+ 	  pos_1 = get_cpu_unit_code ("1_1");
+ 	  pos_2 = get_cpu_unit_code ("1_2");
+ 	  pos_3 = get_cpu_unit_code ("1_3");
+ 	  pos_4 = get_cpu_unit_code ("1_4");
+ 	  pos_5 = get_cpu_unit_code ("1_5");
+ 	  pos_6 = get_cpu_unit_code ("1_6");
+ 	  _0mii_ = get_cpu_unit_code ("1b_0mii.");
+ 	  _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
+ 	  _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
+ 	  _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
+ 	  _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
+ 	  _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
+ 	  _0mib_ = get_cpu_unit_code ("1b_0mib.");
+ 	  _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
+ 	  _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
+ 	  _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
+ 	  _1mii_ = get_cpu_unit_code ("1b_1mii.");
+ 	  _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
+ 	  _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
+ 	  _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
+ 	  _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
+ 	  _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
+ 	  _1mib_ = get_cpu_unit_code ("1b_1mib.");
+ 	  _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
+ 	  _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
+ 	  _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
+ 	}
+ 
+       if (flag_selective_scheduling2
+ 	  && !maybe_skip_selective_scheduling ())
+         run_selective_scheduling ();
+       else
+ 	schedule_ebbs ();
+ 
+       /* Redo alignment computation, as it might gone wrong.  */
+       compute_alignments ();
+ 
+       /* We cannot reuse this one because it has been corrupted by the
+ 	 evil glat.  */
+       finish_bundle_states ();
+       free (stops_p);
+       stops_p = NULL;
+       emit_insn_group_barriers (dump_file);
+ 
+       ia64_final_schedule = 0;
+       timevar_pop (TV_SCHED2);
+     }
+   else
+     emit_all_insn_group_barriers (dump_file);
+ 
+   df_analyze ();
+  
+   /* A call must not be the last instruction in a function, so that the
+      return address is still within the function, so that unwinding works
+      properly.  Note that IA-64 differs from dwarf2 on this point.  */
+   if (ia64_except_unwind_info (&global_options) == UI_TARGET)
+     {
+       rtx_insn *insn;
+       int saw_stop = 0;
+ 
+       insn = get_last_insn ();
+       if (! INSN_P (insn))
+         insn = prev_active_insn (insn);
+       if (insn)
+ 	{
+ 	  /* Skip over insns that expand to nothing.  */
+ 	  while (NONJUMP_INSN_P (insn)
+ 		 && get_attr_empty (insn) == EMPTY_YES)
+ 	    {
+ 	      if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
+ 		  && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
+ 		saw_stop = 1;
+ 	      insn = prev_active_insn (insn);
+ 	    }
+ 	  if (CALL_P (insn))
+ 	    {
+ 	      if (! saw_stop)
+ 		emit_insn (gen_insn_group_barrier (GEN_INT (3)));
+ 	      emit_insn (gen_break_f ());
+ 	      emit_insn (gen_insn_group_barrier (GEN_INT (3)));
+ 	    }
+ 	}
+     }
+ 
+   emit_predicate_relation_info ();
+ 
+   if (flag_var_tracking)
+     {
+       timevar_push (TV_VAR_TRACKING);
+       variable_tracking_main ();
+       timevar_pop (TV_VAR_TRACKING);
+     }
+   df_finish_pass (false);
+ }
+ 
+ /* Return true if REGNO is used by the epilogue.  */
+ 
+ int
+ ia64_epilogue_uses (int regno)
+ {
+   switch (regno)
+     {
+     case R_GR (1):
+       /* With a call to a function in another module, we will write a new
+ 	 value to "gp".  After returning from such a call, we need to make
+ 	 sure the function restores the original gp-value, even if the
+ 	 function itself does not use the gp anymore.  */
+       return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
+ 
+     case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
+     case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
+       /* For functions defined with the syscall_linkage attribute, all
+ 	 input registers are marked as live at all function exits.  This
+ 	 prevents the register allocator from using the input registers,
+ 	 which in turn makes it possible to restart a system call after
+ 	 an interrupt without having to save/restore the input registers.
+ 	 This also prevents kernel data from leaking to application code.  */
+       return lookup_attribute ("syscall_linkage",
+ 	   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
+ 
+     case R_BR (0):
+       /* Conditional return patterns can't represent the use of `b0' as
+          the return address, so we force the value live this way.  */
+       return 1;
+ 
+     case AR_PFS_REGNUM:
+       /* Likewise for ar.pfs, which is used by br.ret.  */
+       return 1;
+ 
+     default:
+       return 0;
+     }
+ }
+ 
+ /* Return true if REGNO is used by the frame unwinder.  */
+ 
+ int
+ ia64_eh_uses (int regno)
+ {
+   unsigned int r;
+ 
+   if (! reload_completed)
+     return 0;
+ 
+   if (regno == 0)
+     return 0;
+ 
+   for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
+     if (regno == current_frame_info.r[r]
+        || regno == emitted_frame_related_regs[r])
+       return 1;
+ 
+   return 0;
+ }
+ 
+ /* Return true if this goes in small data/bss.  */
+ 
+ /* ??? We could also support own long data here.  Generating movl/add/ld8
+    instead of addl,ld8/ld8.  This makes the code bigger, but should make the
+    code faster because there is one less load.  This also includes incomplete
+    types which can't go in sdata/sbss.  */
+ 
+ static bool
+ ia64_in_small_data_p (const_tree exp)
+ {
+   if (TARGET_NO_SDATA)
+     return false;
+ 
+   /* We want to merge strings, so we never consider them small data.  */
+   if (TREE_CODE (exp) == STRING_CST)
+     return false;
+ 
+   /* Functions are never small data.  */
+   if (TREE_CODE (exp) == FUNCTION_DECL)
+     return false;
+ 
+   if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
+     {
+       const char *section = DECL_SECTION_NAME (exp);
+ 
+       if (strcmp (section, ".sdata") == 0
+ 	  || startswith (section, ".sdata.")
+ 	  || startswith (section, ".gnu.linkonce.s.")
+ 	  || strcmp (section, ".sbss") == 0
+ 	  || startswith (section, ".sbss.")
+ 	  || startswith (section, ".gnu.linkonce.sb."))
+ 	return true;
+     }
+   else
+     {
+       HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
+ 
+       /* If this is an incomplete type with size 0, then we can't put it
+ 	 in sdata because it might be too big when completed.  */
+       if (size > 0 && size <= ia64_section_threshold)
+ 	return true;
+     }
+ 
+   return false;
+ }
+ 
+ /* Output assembly directives for prologue regions.  */
+ 
+ /* The current basic block number.  */
+ 
+ static bool last_block;
+ 
+ /* True if we need a copy_state command at the start of the next block.  */
+ 
+ static bool need_copy_state;
+ 
+ #ifndef MAX_ARTIFICIAL_LABEL_BYTES
+ # define MAX_ARTIFICIAL_LABEL_BYTES 30
+ #endif
+ 
+ /* The function emits unwind directives for the start of an epilogue.  */
+ 
+ static void
+ process_epilogue (FILE *out_file, rtx insn ATTRIBUTE_UNUSED,
+ 		  bool unwind, bool frame ATTRIBUTE_UNUSED)
+ {
+   /* If this isn't the last block of the function, then we need to label the
+      current state, and copy it back in at the start of the next block.  */
+ 
+   if (!last_block)
+     {
+       if (unwind)
+ 	fprintf (out_file, "\t.label_state %d\n",
+ 		 ++cfun->machine->state_num);
+       need_copy_state = true;
+     }
+ 
+   if (unwind)
+     fprintf (out_file, "\t.restore sp\n");
+ }
+ 
+ /* This function processes a SET pattern for REG_CFA_ADJUST_CFA.  */
+ 
+ static void
+ process_cfa_adjust_cfa (FILE *out_file, rtx pat, rtx insn,
+ 			bool unwind, bool frame)
+ {
+   rtx dest = SET_DEST (pat);
+   rtx src = SET_SRC (pat);
+ 
+   if (dest == stack_pointer_rtx)
+     {
+       if (GET_CODE (src) == PLUS)
+ 	{
+ 	  rtx op0 = XEXP (src, 0);
+ 	  rtx op1 = XEXP (src, 1);
+ 	  
+ 	  gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
+ 	  
+ 	  if (INTVAL (op1) < 0)
+ 	    {
+ 	      gcc_assert (!frame_pointer_needed);
+ 	      if (unwind)
+ 		fprintf (out_file,
+ 			 "\t.fframe " HOST_WIDE_INT_PRINT_DEC"\n",
+ 			 -INTVAL (op1));
+ 	    }
+ 	  else
+ 	    process_epilogue (out_file, insn, unwind, frame);
+ 	}
+       else
+ 	{
+ 	  gcc_assert (src == hard_frame_pointer_rtx);
+ 	  process_epilogue (out_file, insn, unwind, frame);
+ 	}
+     }
+   else if (dest == hard_frame_pointer_rtx)
+     {
+       gcc_assert (src == stack_pointer_rtx);
+       gcc_assert (frame_pointer_needed);
+ 
+       if (unwind)
+ 	fprintf (out_file, "\t.vframe r%d\n",
+ 		 ia64_dbx_register_number (REGNO (dest)));
+     }
+   else
+     gcc_unreachable ();
+ }
+ 
+ /* This function processes a SET pattern for REG_CFA_REGISTER.  */
+ 
+ static void
+ process_cfa_register (FILE *out_file, rtx pat, bool unwind)
+ {
+   rtx dest = SET_DEST (pat);
+   rtx src = SET_SRC (pat);
+   int dest_regno = REGNO (dest);
+   int src_regno;
+ 
+   if (src == pc_rtx)
+     {
+       /* Saving return address pointer.  */
+       if (unwind)
+ 	fprintf (out_file, "\t.save rp, r%d\n",
+ 		 ia64_dbx_register_number (dest_regno));
+       return;
+     }
+ 
+   src_regno = REGNO (src);
+ 
+   switch (src_regno)
+     {
+     case PR_REG (0):
+       gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
+       if (unwind)
+ 	fprintf (out_file, "\t.save pr, r%d\n",
+ 		 ia64_dbx_register_number (dest_regno));
+       break;
+ 
+     case AR_UNAT_REGNUM:
+       gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
+       if (unwind)
+ 	fprintf (out_file, "\t.save ar.unat, r%d\n",
+ 		 ia64_dbx_register_number (dest_regno));
+       break;
+ 
+     case AR_LC_REGNUM:
+       gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
+       if (unwind)
+ 	fprintf (out_file, "\t.save ar.lc, r%d\n",
+ 		 ia64_dbx_register_number (dest_regno));
+       break;
+ 
+     default:
+       /* Everything else should indicate being stored to memory.  */
+       gcc_unreachable ();
+     }
+ }
+ 
+ /* This function processes a SET pattern for REG_CFA_OFFSET.  */
+ 
+ static void
+ process_cfa_offset (FILE *out_file, rtx pat, bool unwind)
+ {
+   rtx dest = SET_DEST (pat);
+   rtx src = SET_SRC (pat);
+   int src_regno = REGNO (src);
+   const char *saveop;
+   HOST_WIDE_INT off;
+   rtx base;
+ 
+   gcc_assert (MEM_P (dest));
+   if (GET_CODE (XEXP (dest, 0)) == REG)
+     {
+       base = XEXP (dest, 0);
+       off = 0;
+     }
+   else
+     {
+       gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
+ 		  && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
+       base = XEXP (XEXP (dest, 0), 0);
+       off = INTVAL (XEXP (XEXP (dest, 0), 1));
+     }
+ 
+   if (base == hard_frame_pointer_rtx)
+     {
+       saveop = ".savepsp";
+       off = - off;
+     }
+   else
+     {
+       gcc_assert (base == stack_pointer_rtx);
+       saveop = ".savesp";
+     }
+ 
+   src_regno = REGNO (src);
+   switch (src_regno)
+     {
+     case BR_REG (0):
+       gcc_assert (!current_frame_info.r[reg_save_b0]);
+       if (unwind)
+ 	fprintf (out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n",
+ 		 saveop, off);
+       break;
+ 
+     case PR_REG (0):
+       gcc_assert (!current_frame_info.r[reg_save_pr]);
+       if (unwind)
+ 	fprintf (out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n",
+ 		 saveop, off);
+       break;
+ 
+     case AR_LC_REGNUM:
+       gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
+       if (unwind)
+ 	fprintf (out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n",
+ 		 saveop, off);
+       break;
+ 
+     case AR_PFS_REGNUM:
+       gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
+       if (unwind)
+ 	fprintf (out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n",
+ 		 saveop, off);
+       break;
+ 
+     case AR_UNAT_REGNUM:
+       gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
+       if (unwind)
+ 	fprintf (out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n",
+ 		 saveop, off);
+       break;
+ 
+     case GR_REG (4):
+     case GR_REG (5):
+     case GR_REG (6):
+     case GR_REG (7):
+       if (unwind)
+ 	fprintf (out_file, "\t.save.g 0x%x\n",
+ 		 1 << (src_regno - GR_REG (4)));
+       break;
+ 
+     case BR_REG (1):
+     case BR_REG (2):
+     case BR_REG (3):
+     case BR_REG (4):
+     case BR_REG (5):
+       if (unwind)
+ 	fprintf (out_file, "\t.save.b 0x%x\n",
+ 		 1 << (src_regno - BR_REG (1)));
+       break;
+ 
+     case FR_REG (2):
+     case FR_REG (3):
+     case FR_REG (4):
+     case FR_REG (5):
+       if (unwind)
+ 	fprintf (out_file, "\t.save.f 0x%x\n",
+ 		 1 << (src_regno - FR_REG (2)));
+       break;
+ 
+     case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
+     case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
+     case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
+     case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
+       if (unwind)
+ 	fprintf (out_file, "\t.save.gf 0x0, 0x%x\n",
+ 		 1 << (src_regno - FR_REG (12)));
+       break;
+ 
+     default:
+       /* ??? For some reason we mark other general registers, even those
+ 	 we can't represent in the unwind info.  Ignore them.  */
+       break;
+     }
+ }
+ 
+ /* This function looks at a single insn and emits any directives
+    required to unwind this insn.  */
+ 
+ static void
+ ia64_asm_unwind_emit (FILE *out_file, rtx_insn *insn)
+ {
+   bool unwind = ia64_except_unwind_info (&global_options) == UI_TARGET;
+   bool frame = dwarf2out_do_frame ();
+   rtx note, pat;
+   bool handled_one;
+ 
+   if (!unwind && !frame)
+     return;
+ 
+   if (NOTE_INSN_BASIC_BLOCK_P (insn))
+     {
+       last_block = NOTE_BASIC_BLOCK (insn)->next_bb
+      == EXIT_BLOCK_PTR_FOR_FN (cfun);
+ 
+       /* Restore unwind state from immediately before the epilogue.  */
+       if (need_copy_state)
+ 	{
+ 	  if (unwind)
+ 	    {
+ 	      fprintf (out_file, "\t.body\n");
+ 	      fprintf (out_file, "\t.copy_state %d\n",
+ 		       cfun->machine->state_num);
+ 	    }
+ 	  need_copy_state = false;
+ 	}
+     }
+ 
+   if (NOTE_P (insn) || ! RTX_FRAME_RELATED_P (insn))
+     return;
+ 
+   /* Look for the ALLOC insn.  */
+   if (INSN_CODE (insn) == CODE_FOR_alloc)
+     {
+       rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0));
+       int dest_regno = REGNO (dest);
+ 
+       /* If this is the final destination for ar.pfs, then this must
+ 	 be the alloc in the prologue.  */
+       if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
+ 	{
+ 	  if (unwind)
+ 	    fprintf (out_file, "\t.save ar.pfs, r%d\n",
+ 		     ia64_dbx_register_number (dest_regno));
+ 	}
+       else
+ 	{
+ 	  /* This must be an alloc before a sibcall.  We must drop the
+ 	     old frame info.  The easiest way to drop the old frame
+ 	     info is to ensure we had a ".restore sp" directive
+ 	     followed by a new prologue.  If the procedure doesn't
+ 	     have a memory-stack frame, we'll issue a dummy ".restore
+ 	     sp" now.  */
+ 	  if (current_frame_info.total_size == 0 && !frame_pointer_needed)
+ 	    /* if haven't done process_epilogue() yet, do it now */
+ 	    process_epilogue (out_file, insn, unwind, frame);
+ 	  if (unwind)
+ 	    fprintf (out_file, "\t.prologue\n");
+ 	}
+       return;
+     }
+ 
+   handled_one = false;
+   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
+     switch (REG_NOTE_KIND (note))
+       {
+       case REG_CFA_ADJUST_CFA:
+ 	pat = XEXP (note, 0);
+ 	if (pat == NULL)
+ 	  pat = PATTERN (insn);
+ 	process_cfa_adjust_cfa (out_file, pat, insn, unwind, frame);
+ 	handled_one = true;
+ 	break;
+ 
+       case REG_CFA_OFFSET:
+ 	pat = XEXP (note, 0);
+ 	if (pat == NULL)
+ 	  pat = PATTERN (insn);
+ 	process_cfa_offset (out_file, pat, unwind);
+ 	handled_one = true;
+ 	break;
+ 
+       case REG_CFA_REGISTER:
+ 	pat = XEXP (note, 0);
+ 	if (pat == NULL)
+ 	  pat = PATTERN (insn);
+ 	process_cfa_register (out_file, pat, unwind);
+ 	handled_one = true;
+ 	break;
+ 
+       case REG_FRAME_RELATED_EXPR:
+       case REG_CFA_DEF_CFA:
+       case REG_CFA_EXPRESSION:
+       case REG_CFA_RESTORE:
+       case REG_CFA_SET_VDRAP:
+ 	/* Not used in the ia64 port.  */
+ 	gcc_unreachable ();
+ 
+       default:
+ 	/* Not a frame-related note.  */
+ 	break;
+       }
+ 
+   /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
+      explicit action to take.  No guessing required.  */
+   gcc_assert (handled_one);
+ }
+ 
+ /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
+ 
+ static void
+ ia64_asm_emit_except_personality (rtx personality)
+ {
+   fputs ("\t.personality\t", asm_out_file);
+   output_addr_const (asm_out_file, personality);
+   fputc ('\n', asm_out_file);
+ }
+ 
+ /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
+ 
+ static void
+ ia64_asm_init_sections (void)
+ {
+   exception_section = get_unnamed_section (0, output_section_asm_op,
+ 					   "\t.handlerdata");
+ }
+ 
+ /* Implement TARGET_DEBUG_UNWIND_INFO.  */
+ 
+ static enum unwind_info_type
+ ia64_debug_unwind_info (void)
+ {
+   return UI_TARGET;
+ }
+ 
+ enum ia64_builtins
+ {
+   IA64_BUILTIN_BSP,
+   IA64_BUILTIN_COPYSIGNQ,
+   IA64_BUILTIN_FABSQ,
+   IA64_BUILTIN_FLUSHRS,
+   IA64_BUILTIN_INFQ,
+   IA64_BUILTIN_HUGE_VALQ,
+   IA64_BUILTIN_NANQ,
+   IA64_BUILTIN_NANSQ,
+   IA64_BUILTIN_max
+ };
+ 
+ static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max];
+ 
+ void
+ ia64_init_builtins (void)
+ {
+   tree fpreg_type;
+   tree float80_type;
+   tree decl;
+ 
+   /* The __fpreg type.  */
+   fpreg_type = make_node (REAL_TYPE);
+   TYPE_PRECISION (fpreg_type) = 82;
+   layout_type (fpreg_type);
+   (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
+ 
+   /* The __float80 type.  */
+   if (float64x_type_node != NULL_TREE
+       && TYPE_MODE (float64x_type_node) == XFmode)
+     float80_type = float64x_type_node;
+   else
+     {
+       float80_type = make_node (REAL_TYPE);
+       TYPE_PRECISION (float80_type) = 80;
+       layout_type (float80_type);
+     }
+   (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
+ 
+   /* The __float128 type.  */
+   if (!TARGET_HPUX)
+     {
+       tree ftype;
+       tree const_string_type
+ 	= build_pointer_type (build_qualified_type
+ 			      (char_type_node, TYPE_QUAL_CONST));
+ 
+       (*lang_hooks.types.register_builtin_type) (float128_type_node,
+ 						 "__float128");
+ 
+       /* TFmode support builtins.  */
+       ftype = build_function_type_list (float128_type_node, NULL_TREE);
+       decl = add_builtin_function ("__builtin_infq", ftype,
+ 				   IA64_BUILTIN_INFQ, BUILT_IN_MD,
+ 				   NULL, NULL_TREE);
+       ia64_builtins[IA64_BUILTIN_INFQ] = decl;
+ 
+       decl = add_builtin_function ("__builtin_huge_valq", ftype,
+ 				   IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
+ 				   NULL, NULL_TREE);
+       ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl;
+ 
+       ftype = build_function_type_list (float128_type_node,
+ 					const_string_type,
+ 					NULL_TREE);
+       decl = add_builtin_function ("__builtin_nanq", ftype,
+ 				   IA64_BUILTIN_NANQ, BUILT_IN_MD,
+ 				   "nanq", NULL_TREE);
+       TREE_READONLY (decl) = 1;
+       ia64_builtins[IA64_BUILTIN_NANQ] = decl;
+ 
+       decl = add_builtin_function ("__builtin_nansq", ftype,
+ 				   IA64_BUILTIN_NANSQ, BUILT_IN_MD,
+ 				   "nansq", NULL_TREE);
+       TREE_READONLY (decl) = 1;
+       ia64_builtins[IA64_BUILTIN_NANSQ] = decl;
+ 
+       ftype = build_function_type_list (float128_type_node,
+ 					float128_type_node,
+ 					NULL_TREE);
+       decl = add_builtin_function ("__builtin_fabsq", ftype,
+ 				   IA64_BUILTIN_FABSQ, BUILT_IN_MD,
+ 				   "__fabstf2", NULL_TREE);
+       TREE_READONLY (decl) = 1;
+       ia64_builtins[IA64_BUILTIN_FABSQ] = decl;
+ 
+       ftype = build_function_type_list (float128_type_node,
+ 					float128_type_node,
+ 					float128_type_node,
+ 					NULL_TREE);
+       decl = add_builtin_function ("__builtin_copysignq", ftype,
+ 				   IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
+ 				   "__copysigntf3", NULL_TREE);
+       TREE_READONLY (decl) = 1;
+       ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl;
+     }
+   else
+     /* Under HPUX, this is a synonym for "long double".  */
+     (*lang_hooks.types.register_builtin_type) (long_double_type_node,
+ 					       "__float128");
+ 
+   /* Fwrite on VMS is non-standard.  */
+ #if TARGET_ABI_OPEN_VMS
+   vms_patch_builtins ();
+ #endif
+ 
+ #define def_builtin(name, type, code)					\
+   add_builtin_function ((name), (type), (code), BUILT_IN_MD,	\
+ 		       NULL, NULL_TREE)
+ 
+   decl = def_builtin ("__builtin_ia64_bsp",
+ 		      build_function_type_list (ptr_type_node, NULL_TREE),
+ 		      IA64_BUILTIN_BSP);
+   ia64_builtins[IA64_BUILTIN_BSP] = decl;
+ 
+   decl = def_builtin ("__builtin_ia64_flushrs",
+ 		      build_function_type_list (void_type_node, NULL_TREE),
+ 		      IA64_BUILTIN_FLUSHRS);
+   ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl;
+ 
+ #undef def_builtin
+ 
+   if (TARGET_HPUX)
+     {
+       if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
+ 	set_user_assembler_name (decl, "_Isfinite");
+       if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
+ 	set_user_assembler_name (decl, "_Isfinitef");
+       if ((decl = builtin_decl_explicit (BUILT_IN_FINITEL)) != NULL_TREE)
+ 	set_user_assembler_name (decl, "_Isfinitef128");
+     }
+ }
+ 
+ static tree
+ ia64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
+ 		   tree *args, bool ignore ATTRIBUTE_UNUSED)
+ {
+   if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
+     {
+       enum ia64_builtins fn_code
+ 	= (enum ia64_builtins) DECL_MD_FUNCTION_CODE (fndecl);
+       switch (fn_code)
+ 	{
+ 	case IA64_BUILTIN_NANQ:
+ 	case IA64_BUILTIN_NANSQ:
+ 	  {
+ 	    tree type = TREE_TYPE (TREE_TYPE (fndecl));
+ 	    const char *str = c_getstr (*args);
+ 	    int quiet = fn_code == IA64_BUILTIN_NANQ;
+ 	    REAL_VALUE_TYPE real;
+ 
+ 	    if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
+ 	      return build_real (type, real);
+ 	    return NULL_TREE;
+ 	  }
+ 
+ 	default:
+ 	  break;
+ 	}
+     }
+ 
+ #ifdef SUBTARGET_FOLD_BUILTIN
+   return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
+ #endif
+ 
+   return NULL_TREE;
+ }
+ 
+ rtx
+ ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+ 		     machine_mode mode ATTRIBUTE_UNUSED,
+ 		     int ignore ATTRIBUTE_UNUSED)
+ {
+   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+   unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
+ 
+   switch (fcode)
+     {
+     case IA64_BUILTIN_BSP:
+       if (! target || ! register_operand (target, DImode))
+ 	target = gen_reg_rtx (DImode);
+       emit_insn (gen_bsp_value (target));
+ #ifdef POINTERS_EXTEND_UNSIGNED
+       target = convert_memory_address (ptr_mode, target);
+ #endif
+       return target;
+ 
+     case IA64_BUILTIN_FLUSHRS:
+       emit_insn (gen_flushrs ());
+       return const0_rtx;
+ 
+     case IA64_BUILTIN_INFQ:
+     case IA64_BUILTIN_HUGE_VALQ:
+       {
+         machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
+ 	REAL_VALUE_TYPE inf;
+ 	rtx tmp;
+ 
+ 	real_inf (&inf);
+ 	tmp = const_double_from_real_value (inf, target_mode);
+ 
+ 	tmp = validize_mem (force_const_mem (target_mode, tmp));
+ 
+ 	if (target == 0)
+ 	  target = gen_reg_rtx (target_mode);
+ 
+ 	emit_move_insn (target, tmp);
+ 	return target;
+       }
+ 
+     case IA64_BUILTIN_NANQ:
+     case IA64_BUILTIN_NANSQ:
+     case IA64_BUILTIN_FABSQ:
+     case IA64_BUILTIN_COPYSIGNQ:
+       return expand_call (exp, target, ignore);
+ 
+     default:
+       gcc_unreachable ();
+     }
+ 
+   return NULL_RTX;
+ }
+ 
+ /* Return the ia64 builtin for CODE.  */
+ 
+ static tree
+ ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+ {
+   if (code >= IA64_BUILTIN_max)
+     return error_mark_node;
+ 
+   return ia64_builtins[code];
+ }
+ 
+ /* Implement TARGET_FUNCTION_ARG_PADDING.
+ 
+    For the HP-UX IA64 aggregate parameters are passed stored in the
+    most significant bits of the stack slot.  */
+ 
+ static pad_direction
+ ia64_function_arg_padding (machine_mode mode, const_tree type)
+ {
+   /* Exception to normal case for structures/unions/etc.  */
+   if (TARGET_HPUX
+       && type
+       && AGGREGATE_TYPE_P (type)
+       && int_size_in_bytes (type) < UNITS_PER_WORD)
+     return PAD_UPWARD;
+ 
+   /* Fall back to the default.  */
+   return default_function_arg_padding (mode, type);
+ }
+ 
+ /* Emit text to declare externally defined variables and functions, because
+    the Intel assembler does not support undefined externals.  */
+ 
+ void
+ ia64_asm_output_external (FILE *file, tree decl, const char *name)
+ {
+   /* We output the name if and only if TREE_SYMBOL_REFERENCED is
+      set in order to avoid putting out names that are never really
+      used. */
+   if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
+     {
+       /* maybe_assemble_visibility will return 1 if the assembler
+ 	 visibility directive is output.  */
+       int need_visibility = ((*targetm.binds_local_p) (decl)
+ 			     && maybe_assemble_visibility (decl));
+ 
+       /* GNU as does not need anything here, but the HP linker does
+ 	 need something for external functions.  */
+       if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
+ 	  && TREE_CODE (decl) == FUNCTION_DECL)
+ 	  (*targetm.asm_out.globalize_decl_name) (file, decl);
+       else if (need_visibility && !TARGET_GNU_AS)
+ 	(*targetm.asm_out.globalize_label) (file, name);
+     }
+ }
+ 
+ /* Set SImode div/mod functions, init_integral_libfuncs only initializes
+    modes of word_mode and larger.  Rename the TFmode libfuncs using the
+    HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
+    backward compatibility. */
+ 
+ static void
+ ia64_init_libfuncs (void)
+ {
+   set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
+   set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
+   set_optab_libfunc (smod_optab, SImode, "__modsi3");
+   set_optab_libfunc (umod_optab, SImode, "__umodsi3");
+ 
+   set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
+   set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
+   set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
+   set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
+   set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
+ 
+   set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
+   set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
+   set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
+   set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
+   set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
+   set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
+ 
+   set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
+   set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
+   set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
+   set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
+   set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
+ 
+   set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
+   set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
+   set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
+   /* HP-UX 11.23 libc does not have a function for unsigned
+      SImode-to-TFmode conversion.  */
+   set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
+ }
+ 
+ /* Rename all the TFmode libfuncs using the HPUX conventions.  */
+ 
+ static void
+ ia64_hpux_init_libfuncs (void)
+ {
+   ia64_init_libfuncs ();
+ 
+   /* The HP SI millicode division and mod functions expect DI arguments.
+      By turning them off completely we avoid using both libgcc and the
+      non-standard millicode routines and use the HP DI millicode routines
+      instead.  */
+ 
+   set_optab_libfunc (sdiv_optab, SImode, 0);
+   set_optab_libfunc (udiv_optab, SImode, 0);
+   set_optab_libfunc (smod_optab, SImode, 0);
+   set_optab_libfunc (umod_optab, SImode, 0);
+ 
+   set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
+   set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
+   set_optab_libfunc (smod_optab, DImode, "__milli_remI");
+   set_optab_libfunc (umod_optab, DImode, "__milli_remU");
+ 
+   /* HP-UX libc has TF min/max/abs routines in it.  */
+   set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
+   set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
+   set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
+ 
+   /* ia64_expand_compare uses this.  */
+   cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
+ 
+   /* These should never be used.  */
+   set_optab_libfunc (eq_optab, TFmode, 0);
+   set_optab_libfunc (ne_optab, TFmode, 0);
+   set_optab_libfunc (gt_optab, TFmode, 0);
+   set_optab_libfunc (ge_optab, TFmode, 0);
+   set_optab_libfunc (lt_optab, TFmode, 0);
+   set_optab_libfunc (le_optab, TFmode, 0);
+ }
+ 
+ /* Rename the division and modulus functions in VMS.  */
+ 
+ static void
+ ia64_vms_init_libfuncs (void)
+ {
+   set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
+   set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
+   set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
+   set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
+   set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
+   set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
+   set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
+   set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
+ #ifdef MEM_LIBFUNCS_INIT
+   MEM_LIBFUNCS_INIT;
+ #endif
+ }
+ 
+ /* Rename the TFmode libfuncs available from soft-fp in glibc using
+    the HPUX conventions.  */
+ 
+ static void
+ ia64_sysv4_init_libfuncs (void)
+ {
+   ia64_init_libfuncs ();
+ 
+   /* These functions are not part of the HPUX TFmode interface.  We
+      use them instead of _U_Qfcmp, which doesn't work the way we
+      expect.  */
+   set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
+   set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
+   set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
+   set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
+   set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
+   set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
+ 
+   /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
+      glibc doesn't have them.  */
+ }
+ 
+ /* Use soft-fp.  */
+ 
+ static void
+ ia64_soft_fp_init_libfuncs (void)
+ {
+ }
+ 
+ static bool
+ ia64_vms_valid_pointer_mode (scalar_int_mode mode)
+ {
+   return (mode == SImode || mode == DImode);
+ }
+ 
+ /* For HPUX, it is illegal to have relocations in shared segments.  */
+ 
+ static int
+ ia64_hpux_reloc_rw_mask (void)
+ {
+   return 3;
+ }
+ 
+ /* For others, relax this so that relocations to local data goes in
+    read-only segments, but we still cannot allow global relocations
+    in read-only segments.  */
+ 
+ static int
+ ia64_reloc_rw_mask (void)
+ {
+   return flag_pic ? 3 : 2;
+ }
+ 
+ /* Return the section to use for X.  The only special thing we do here
+    is to honor small data.  */
+ 
+ static section *
+ ia64_select_rtx_section (machine_mode mode, rtx x,
+ 			 unsigned HOST_WIDE_INT align)
+ {
+   if (GET_MODE_SIZE (mode) > 0
+       && GET_MODE_SIZE (mode) <= ia64_section_threshold
+       && !TARGET_NO_SDATA)
+     return sdata_section;
+   else
+     return default_elf_select_rtx_section (mode, x, align);
+ }
+ 
+ static unsigned int
+ ia64_section_type_flags (tree decl, const char *name, int reloc)
+ {
+   unsigned int flags = 0;
+ 
+   if (strcmp (name, ".sdata") == 0
+       || startswith (name, ".sdata.")
+       || startswith (name, ".gnu.linkonce.s.")
+       || startswith (name, ".sdata2.")
+       || startswith (name, ".gnu.linkonce.s2.")
+       || strcmp (name, ".sbss") == 0
+       || startswith (name, ".sbss.")
+       || startswith (name, ".gnu.linkonce.sb."))
+     flags = SECTION_SMALL;
+ 
+   flags |= default_section_type_flags (decl, name, reloc);
+   return flags;
+ }
+ 
+ /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
+    structure type and that the address of that type should be passed
+    in out0, rather than in r8.  */
+ 
+ static bool
+ ia64_struct_retval_addr_is_first_parm_p (tree fntype)
+ {
+   tree ret_type = TREE_TYPE (fntype);
+ 
+   /* The Itanium C++ ABI requires that out0, rather than r8, be used
+      as the structure return address parameter, if the return value
+      type has a non-trivial copy constructor or destructor.  It is not
+      clear if this same convention should be used for other
+      programming languages.  Until G++ 3.4, we incorrectly used r8 for
+      these return values.  */
+   return (abi_version_at_least (2)
+ 	  && ret_type
+ 	  && TYPE_MODE (ret_type) == BLKmode 
+ 	  && TREE_ADDRESSABLE (ret_type)
+ 	  && lang_GNU_CXX ());
+ }
+ 
+ /* Output the assembler code for a thunk function.  THUNK_DECL is the
+    declaration for the thunk function itself, FUNCTION is the decl for
+    the target function.  DELTA is an immediate constant offset to be
+    added to THIS.  If VCALL_OFFSET is nonzero, the word at
+    *(*this + vcall_offset) should be added to THIS.  */
+ 
+ static void
+ ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
+ 		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+ 		      tree function)
+ {
+   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
+   rtx this_rtx, funexp;
+   rtx_insn *insn;
+   unsigned int this_parmno;
+   unsigned int this_regno;
+   rtx delta_rtx;
+ 
+   reload_completed = 1;
+   epilogue_completed = 1;
+ 
+   /* Set things up as ia64_expand_prologue might.  */
+   last_scratch_gr_reg = 15;
+ 
+   memset (&current_frame_info, 0, sizeof (current_frame_info));
+   current_frame_info.spill_cfa_off = -16;
+   current_frame_info.n_input_regs = 1;
+   current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
+ 
+   /* Mark the end of the (empty) prologue.  */
+   emit_note (NOTE_INSN_PROLOGUE_END);
+ 
+   /* Figure out whether "this" will be the first parameter (the
+      typical case) or the second parameter (as happens when the
+      virtual function returns certain class objects).  */
+   this_parmno
+     = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
+        ? 1 : 0);
+   this_regno = IN_REG (this_parmno);
+   if (!TARGET_REG_NAMES)
+     reg_names[this_regno] = ia64_reg_numbers[this_parmno];
+ 
+   this_rtx = gen_rtx_REG (Pmode, this_regno);
+ 
+   /* Apply the constant offset, if required.  */
+   delta_rtx = GEN_INT (delta);
+   if (TARGET_ILP32)
+     {
+       rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
+       REG_POINTER (tmp) = 1;
+       if (delta && satisfies_constraint_I (delta_rtx))
+ 	{
+ 	  emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
+ 	  delta = 0;
+ 	}
+       else
+ 	emit_insn (gen_ptr_extend (this_rtx, tmp));
+     }
+   if (delta)
+     {
+       if (!satisfies_constraint_I (delta_rtx))
+ 	{
+ 	  rtx tmp = gen_rtx_REG (Pmode, 2);
+ 	  emit_move_insn (tmp, delta_rtx);
+ 	  delta_rtx = tmp;
+ 	}
+       emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
+     }
+ 
+   /* Apply the offset from the vtable, if required.  */
+   if (vcall_offset)
+     {
+       rtx vcall_offset_rtx = GEN_INT (vcall_offset);
+       rtx tmp = gen_rtx_REG (Pmode, 2);
+ 
+       if (TARGET_ILP32)
+ 	{
+ 	  rtx t = gen_rtx_REG (ptr_mode, 2);
+ 	  REG_POINTER (t) = 1;
+ 	  emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
+ 	  if (satisfies_constraint_I (vcall_offset_rtx))
+ 	    {
+ 	      emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
+ 	      vcall_offset = 0;
+ 	    }
+ 	  else
+ 	    emit_insn (gen_ptr_extend (tmp, t));
+ 	}
+       else
+ 	emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
+ 
+       if (vcall_offset)
+ 	{
+ 	  if (!satisfies_constraint_J (vcall_offset_rtx))
+ 	    {
+ 	      rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
+ 	      emit_move_insn (tmp2, vcall_offset_rtx);
+ 	      vcall_offset_rtx = tmp2;
+ 	    }
+ 	  emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
+ 	}
+ 
+       if (TARGET_ILP32)
+ 	emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
+       else
+ 	emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
+ 
+       emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
+     }
+ 
+   /* Generate a tail call to the target function.  */
+   if (! TREE_USED (function))
+     {
+       assemble_external (function);
+       TREE_USED (function) = 1;
+     }
+   funexp = XEXP (DECL_RTL (function), 0);
+   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
+   ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
+   insn = get_last_insn ();
+   SIBLING_CALL_P (insn) = 1;
+ 
+   /* Code generation for calls relies on splitting.  */
+   reload_completed = 1;
+   epilogue_completed = 1;
+   try_split (PATTERN (insn), insn, 0);
+ 
+   emit_barrier ();
+ 
+   /* Run just enough of rest_of_compilation to get the insns emitted.
+      There's not really enough bulk here to make other passes such as
+      instruction scheduling worth while.  */
+ 
+   emit_all_insn_group_barriers (NULL);
+   insn = get_insns ();
+   shorten_branches (insn);
+   assemble_start_function (thunk, fnname);
+   final_start_function (insn, file, 1);
+   final (insn, file, 1);
+   final_end_function ();
+   assemble_end_function (thunk, fnname);
+ 
+   reload_completed = 0;
+   epilogue_completed = 0;
+ }
+ 
+ /* Worker function for TARGET_STRUCT_VALUE_RTX.  */
+ 
+ static rtx
+ ia64_struct_value_rtx (tree fntype,
+ 		       int incoming ATTRIBUTE_UNUSED)
+ {
+   if (TARGET_ABI_OPEN_VMS ||
+       (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
+     return NULL_RTX;
+   return gen_rtx_REG (Pmode, GR_REG (8));
+ }
+ 
+ static bool
+ ia64_scalar_mode_supported_p (scalar_mode mode)
+ {
+   switch (mode)
+     {
+     case E_QImode:
+     case E_HImode:
+     case E_SImode:
+     case E_DImode:
+     case E_TImode:
+       return true;
+ 
+     case E_SFmode:
+     case E_DFmode:
+     case E_XFmode:
+     case E_RFmode:
+       return true;
+ 
+     case E_TFmode:
+       return true;
+ 
+     default:
+       return false;
+     }
+ }
+ 
+ static bool
+ ia64_vector_mode_supported_p (machine_mode mode)
+ {
+   switch (mode)
+     {
+     case E_V8QImode:
+     case E_V4HImode:
+     case E_V2SImode:
+       return true;
+ 
+     case E_V2SFmode:
+       return true;
+ 
+     default:
+       return false;
+     }
+ }
+ 
+ /* Implement the FUNCTION_PROFILER macro.  */
+ 
+ void
+ ia64_output_function_profiler (FILE *file, int labelno)
+ {
+   bool indirect_call;
+ 
+   /* If the function needs a static chain and the static chain
+      register is r15, we use an indirect call so as to bypass
+      the PLT stub in case the executable is dynamically linked,
+      because the stub clobbers r15 as per 5.3.6 of the psABI.
+      We don't need to do that in non canonical PIC mode.  */
+ 
+   if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
+     {
+       gcc_assert (STATIC_CHAIN_REGNUM == 15);
+       indirect_call = true;
+     }
+   else
+     indirect_call = false;
+ 
+   if (TARGET_GNU_AS)
+     fputs ("\t.prologue 4, r40\n", file);
+   else
+     fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
+   fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
+ 
+   if (NO_PROFILE_COUNTERS)
+     fputs ("\tmov out3 = r0\n", file);
+   else
+     {
+       char buf[20];
+       ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
+ 
+       if (TARGET_AUTO_PIC)
+ 	fputs ("\tmovl out3 = @gprel(", file);
+       else
+ 	fputs ("\taddl out3 = @ltoff(", file);
+       assemble_name (file, buf);
+       if (TARGET_AUTO_PIC)
+ 	fputs (")\n", file);
+       else
+ 	fputs ("), r1\n", file);
+     }
+ 
+   if (indirect_call)
+     fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
+   fputs ("\t;;\n", file);
+ 
+   fputs ("\t.save rp, r42\n", file);
+   fputs ("\tmov out2 = b0\n", file);
+   if (indirect_call)
+     fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
+   fputs ("\t.body\n", file);
+   fputs ("\tmov out1 = r1\n", file);
+   if (indirect_call)
+     {
+       fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
+       fputs ("\tmov b6 = r16\n", file);
+       fputs ("\tld8 r1 = [r14]\n", file);
+       fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
+     }
+   else
+     fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
+ }
+ 
+ static GTY(()) rtx mcount_func_rtx;
+ static rtx
+ gen_mcount_func_rtx (void)
+ {
+   if (!mcount_func_rtx)
+     mcount_func_rtx = init_one_libfunc ("_mcount");
+   return mcount_func_rtx;
+ }
+ 
+ void
+ ia64_profile_hook (int labelno)
+ {
+   rtx label, ip;
+ 
+   if (NO_PROFILE_COUNTERS)
+     label = const0_rtx;
+   else
+     {
+       char buf[30];
+       const char *label_name;
+       ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
+       label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
+       label = gen_rtx_SYMBOL_REF (Pmode, label_name);
+       SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
+     }
+   ip = gen_reg_rtx (Pmode);
+   emit_insn (gen_ip_value (ip));
+   emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
+                      VOIDmode,
+ 		     gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
+ 		     ip, Pmode,
+ 		     label, Pmode);
+ }
+ 
+ /* Return the mangling of TYPE if it is an extended fundamental type.  */
+ 
+ static const char *
+ ia64_mangle_type (const_tree type)
+ {
+   type = TYPE_MAIN_VARIANT (type);
+ 
+   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
+       && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
+     return NULL;
+ 
+   /* On HP-UX, "long double" is mangled as "e" so __float128 is
+      mangled as "e".  */
+   if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
+     return "g";
+   /* On HP-UX, "e" is not available as a mangling of __float80 so use
+      an extended mangling.  Elsewhere, "e" is available since long
+      double is 80 bits.  */
+   if (TYPE_MODE (type) == XFmode)
+     return TARGET_HPUX ? "u9__float80" : "e";
+   if (TYPE_MODE (type) == RFmode)
+     return "u7__fpreg";
+   return NULL;
+ }
+ 
+ /* Return the diagnostic message string if conversion from FROMTYPE to
+    TOTYPE is not allowed, NULL otherwise.  */
+ static const char *
+ ia64_invalid_conversion (const_tree fromtype, const_tree totype)
+ {
+   /* Reject nontrivial conversion to or from __fpreg.  */
+   if (TYPE_MODE (fromtype) == RFmode
+       && TYPE_MODE (totype) != RFmode
+       && TYPE_MODE (totype) != VOIDmode)
+     return N_("invalid conversion from %<__fpreg%>");
+   if (TYPE_MODE (totype) == RFmode
+       && TYPE_MODE (fromtype) != RFmode)
+     return N_("invalid conversion to %<__fpreg%>");
+   return NULL;
+ }
+ 
+ /* Return the diagnostic message string if the unary operation OP is
+    not permitted on TYPE, NULL otherwise.  */
+ static const char *
+ ia64_invalid_unary_op (int op, const_tree type)
+ {
+   /* Reject operations on __fpreg other than unary + or &.  */
+   if (TYPE_MODE (type) == RFmode
+       && op != CONVERT_EXPR
+       && op != ADDR_EXPR)
+     return N_("invalid operation on %<__fpreg%>");
+   return NULL;
+ }
+ 
+ /* Return the diagnostic message string if the binary operation OP is
+    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
+ static const char *
+ ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
+ {
+   /* Reject operations on __fpreg.  */
+   if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
+     return N_("invalid operation on %<__fpreg%>");
+   return NULL;
+ }
+ 
+ /* HP-UX version_id attribute.
+    For object foo, if the version_id is set to 1234 put out an alias
+    of '.alias foo "foo{1234}"  We can't use "foo{1234}" in anything
+    other than an alias statement because it is an illegal symbol name.  */
+ 
+ static tree
+ ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
+                                  tree name ATTRIBUTE_UNUSED,
+                                  tree args,
+                                  int flags ATTRIBUTE_UNUSED,
+                                  bool *no_add_attrs)
+ {
+   tree arg = TREE_VALUE (args);
+ 
+   if (TREE_CODE (arg) != STRING_CST)
+     {
+       error("version attribute is not a string");
+       *no_add_attrs = true;
+       return NULL_TREE;
+     }
+   return NULL_TREE;
+ }
+ 
+ /* Target hook for c_mode_for_suffix.  */
+ 
+ static machine_mode
+ ia64_c_mode_for_suffix (char suffix)
+ {
+   if (suffix == 'q')
+     return TFmode;
+   if (suffix == 'w')
+     return XFmode;
+ 
+   return VOIDmode;
+ }
+ 
+ static GTY(()) rtx ia64_dconst_0_5_rtx;
+ 
+ rtx
+ ia64_dconst_0_5 (void)
+ {
+   if (! ia64_dconst_0_5_rtx)
+     {
+       REAL_VALUE_TYPE rv;
+       real_from_string (&rv, "0.5");
+       ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
+     }
+   return ia64_dconst_0_5_rtx;
+ }
+ 
+ static GTY(()) rtx ia64_dconst_0_375_rtx;
+ 
+ rtx
+ ia64_dconst_0_375 (void)
+ {
+   if (! ia64_dconst_0_375_rtx)
+     {
+       REAL_VALUE_TYPE rv;
+       real_from_string (&rv, "0.375");
+       ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
+     }
+   return ia64_dconst_0_375_rtx;
+ }
+ 
+ static fixed_size_mode
+ ia64_get_reg_raw_mode (int regno)
+ {
+   if (FR_REGNO_P (regno))
+     return XFmode;
+   return default_get_reg_raw_mode(regno);
+ }
+ 
+ /* Implement TARGET_MEMBER_TYPE_FORCES_BLK.  ??? Might not be needed
+    anymore.  */
+ 
+ bool
+ ia64_member_type_forces_blk (const_tree, machine_mode mode)
+ {
+   return TARGET_HPUX && mode == TFmode;
+ }
+ 
+ /* Always default to .text section until HP-UX linker is fixed.  */
+ 
+ ATTRIBUTE_UNUSED static section *
+ ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
+ 			    enum node_frequency freq ATTRIBUTE_UNUSED,
+ 			    bool startup ATTRIBUTE_UNUSED,
+ 			    bool exit ATTRIBUTE_UNUSED)
+ {
+   return NULL;
+ }
+ 
+ /* Construct (set target (vec_select op0 (parallel perm))) and
+    return true if that's a valid instruction in the active ISA.  */
+ 
+ static bool
+ expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
+ {
+   rtx rperm[MAX_VECT_LEN], x;
+   unsigned i;
+ 
+   for (i = 0; i < nelt; ++i)
+     rperm[i] = GEN_INT (perm[i]);
+ 
+   x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
+   x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
+   x = gen_rtx_SET (target, x);
+ 
+   rtx_insn *insn = emit_insn (x);
+   if (recog_memoized (insn) < 0)
+     {
+       remove_insn (insn);
+       return false;
+     }
+   return true;
+ }
+ 
+ /* Similar, but generate a vec_concat from op0 and op1 as well.  */
+ 
+ static bool
+ expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
+ 			const unsigned char *perm, unsigned nelt)
+ {
+   machine_mode v2mode;
+   rtx x;
+ 
+   if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode))
+     return false;
+   x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
+   return expand_vselect (target, x, perm, nelt);
+ }
+ 
+ /* Try to expand a no-op permutation.  */
+ 
+ static bool
+ expand_vec_perm_identity (struct expand_vec_perm_d *d)
+ {
+   unsigned i, nelt = d->nelt;
+ 
+   for (i = 0; i < nelt; ++i)
+     if (d->perm[i] != i)
+       return false;
+ 
+   if (!d->testing_p)
+     emit_move_insn (d->target, d->op0);
+ 
+   return true;
+ }
+ 
+ /* Try to expand D via a shrp instruction.  */
+ 
+ static bool
+ expand_vec_perm_shrp (struct expand_vec_perm_d *d)
+ {
+   unsigned i, nelt = d->nelt, shift, mask;
+   rtx tmp, hi, lo;
+ 
+   /* ??? Don't force V2SFmode into the integer registers.  */
+   if (d->vmode == V2SFmode)
+     return false;
+ 
+   mask = (d->one_operand_p ? nelt - 1 : 2 * nelt - 1);
+ 
+   shift = d->perm[0];
+   if (BYTES_BIG_ENDIAN && shift > nelt)
+     return false;
+ 
+   for (i = 1; i < nelt; ++i)
+     if (d->perm[i] != ((shift + i) & mask))
+       return false;
+ 
+   if (d->testing_p)
+     return true;
+ 
+   hi = shift < nelt ? d->op1 : d->op0;
+   lo = shift < nelt ? d->op0 : d->op1;
+ 
+   shift %= nelt;
+ 
+   shift *= GET_MODE_UNIT_SIZE (d->vmode) * BITS_PER_UNIT;
+ 
+   /* We've eliminated the shift 0 case via expand_vec_perm_identity.  */
+   gcc_assert (IN_RANGE (shift, 1, 63));
+ 
+   /* Recall that big-endian elements are numbered starting at the top of
+      the register.  Ideally we'd have a shift-left-pair.  But since we
+      don't, convert to a shift the other direction.  */
+   if (BYTES_BIG_ENDIAN)
+     shift = 64 - shift;
+ 
+   tmp = gen_reg_rtx (DImode);
+   hi = gen_lowpart (DImode, hi);
+   lo = gen_lowpart (DImode, lo);
+   emit_insn (gen_shrp (tmp, hi, lo, GEN_INT (shift)));
+ 
+   emit_move_insn (d->target, gen_lowpart (d->vmode, tmp));
+   return true;
+ }
+ 
+ /* Try to instantiate D in a single instruction.  */
+ 
+ static bool
+ expand_vec_perm_1 (struct expand_vec_perm_d *d)
+ {     
+   unsigned i, nelt = d->nelt;
+   unsigned char perm2[MAX_VECT_LEN];
+ 
+   /* Try single-operand selections.  */
+   if (d->one_operand_p)
+     {
+       if (expand_vec_perm_identity (d))
+ 	return true;
+       if (expand_vselect (d->target, d->op0, d->perm, nelt))
+ 	return true;
+     }
+ 
+   /* Try two operand selections.  */
+   if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
+     return true;
+ 
+   /* Recognize interleave style patterns with reversed operands.  */
+   if (!d->one_operand_p)
+     {
+       for (i = 0; i < nelt; ++i)
+ 	{
+ 	  unsigned e = d->perm[i];
+ 	  if (e >= nelt)
+ 	    e -= nelt;
+ 	  else
+ 	    e += nelt;
+ 	  perm2[i] = e;
+ 	}
+ 
+       if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
+ 	return true;
+     }
+ 
+   if (expand_vec_perm_shrp (d))
+     return true;
+ 
+   /* ??? Look for deposit-like permutations where most of the result 
+      comes from one vector unchanged and the rest comes from a 
+      sequential hunk of the other vector.  */
+ 
+   return false;
+ }
+ 
+ /* Pattern match broadcast permutations.  */
+ 
+ static bool
+ expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
+ {
+   unsigned i, elt, nelt = d->nelt;
+   unsigned char perm2[2];
+   rtx temp;
+   bool ok;
+ 
+   if (!d->one_operand_p)
+     return false;
+ 
+   elt = d->perm[0];
+   for (i = 1; i < nelt; ++i)
+     if (d->perm[i] != elt)
+       return false;
+ 
+   switch (d->vmode)
+     {
+     case E_V2SImode:
+     case E_V2SFmode:
+       /* Implementable by interleave.  */
+       perm2[0] = elt;
+       perm2[1] = elt + 2;
+       ok = expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, 2);
+       gcc_assert (ok);
+       break;
+ 
+     case E_V8QImode:
+       /* Implementable by extract + broadcast.  */
+       if (BYTES_BIG_ENDIAN)
+ 	elt = 7 - elt;
+       elt *= BITS_PER_UNIT;
+       temp = gen_reg_rtx (DImode);
+       emit_insn (gen_extzv (temp, gen_lowpart (DImode, d->op0),
+ 			    GEN_INT (8), GEN_INT (elt)));
+       emit_insn (gen_mux1_brcst_qi (d->target, gen_lowpart (QImode, temp)));
+       break;
+ 
+     case E_V4HImode:
+       /* Should have been matched directly by vec_select.  */
+     default:
+       gcc_unreachable ();
+     }
+ 
+   return true;
+ }
+ 
+ /* A subroutine of ia64_expand_vec_perm_const_1.  Try to simplify a
+    two vector permutation into a single vector permutation by using
+    an interleave operation to merge the vectors.  */
+ 
+ static bool
+ expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d)
+ {
+   struct expand_vec_perm_d dremap, dfinal;
+   unsigned char remap[2 * MAX_VECT_LEN];
+   unsigned contents, i, nelt, nelt2;
+   unsigned h0, h1, h2, h3;
+   rtx_insn *seq;
+   bool ok;
+ 
+   if (d->one_operand_p)
+     return false;
+ 
+   nelt = d->nelt;
+   nelt2 = nelt / 2;
+ 
+   /* Examine from whence the elements come.  */
+   contents = 0;
+   for (i = 0; i < nelt; ++i)
+     contents |= 1u << d->perm[i];
+ 
+   memset (remap, 0xff, sizeof (remap));
+   dremap = *d;
+ 
+   h0 = (1u << nelt2) - 1;
+   h1 = h0 << nelt2;
+   h2 = h0 << nelt;
+   h3 = h0 << (nelt + nelt2);
+   
+   if ((contents & (h0 | h2)) == contents)	/* punpck even halves */
+     {
+       for (i = 0; i < nelt; ++i)
+ 	{
+ 	  unsigned which = i / 2 + (i & 1 ? nelt : 0);
+ 	  remap[which] = i;
+ 	  dremap.perm[i] = which;
+ 	}
+     }
+   else if ((contents & (h1 | h3)) == contents)	/* punpck odd halves */
+     {
+       for (i = 0; i < nelt; ++i)
+ 	{
+ 	  unsigned which = i / 2 + nelt2 + (i & 1 ? nelt : 0);
+ 	  remap[which] = i;
+ 	  dremap.perm[i] = which;
+ 	}
+     }
+   else if ((contents & 0x5555) == contents)	/* mix even elements */
+     {
+       for (i = 0; i < nelt; ++i)
+ 	{
+ 	  unsigned which = (i & ~1) + (i & 1 ? nelt : 0);
+ 	  remap[which] = i;
+ 	  dremap.perm[i] = which;
+ 	}
+     }
+   else if ((contents & 0xaaaa) == contents)	/* mix odd elements */
+     {
+       for (i = 0; i < nelt; ++i)
+ 	{
+ 	  unsigned which = (i | 1) + (i & 1 ? nelt : 0);
+ 	  remap[which] = i;
+ 	  dremap.perm[i] = which;
+ 	}
+     }
+   else if (floor_log2 (contents) - ctz_hwi (contents) < (int)nelt) /* shrp */
+     {
+       unsigned shift = ctz_hwi (contents);
+       for (i = 0; i < nelt; ++i)
+ 	{
+ 	  unsigned which = (i + shift) & (2 * nelt - 1);
+ 	  remap[which] = i;
+ 	  dremap.perm[i] = which;
+ 	}
+     }
+   else
+     return false;
+ 
+   /* Use the remapping array set up above to move the elements from their
+      swizzled locations into their final destinations.  */
+   dfinal = *d;
+   for (i = 0; i < nelt; ++i)
+     {
+       unsigned e = remap[d->perm[i]];
+       gcc_assert (e < nelt);
+       dfinal.perm[i] = e;
+     }
+   if (d->testing_p)
+     dfinal.op0 = gen_raw_REG (dfinal.vmode, LAST_VIRTUAL_REGISTER + 1);
+   else
+     dfinal.op0 = gen_reg_rtx (dfinal.vmode);
+   dfinal.op1 = dfinal.op0;
+   dfinal.one_operand_p = true;
+   dremap.target = dfinal.op0;
+ 
+   /* Test if the final remap can be done with a single insn.  For V4HImode
+      this *will* succeed.  For V8QImode or V2SImode it may not.  */
+   start_sequence ();
+   ok = expand_vec_perm_1 (&dfinal);
+   seq = get_insns ();
+   end_sequence ();
+   if (!ok)
+     return false;
+   if (d->testing_p)
+     return true;
+ 
+   ok = expand_vec_perm_1 (&dremap);
+   gcc_assert (ok);
+ 
+   emit_insn (seq);
+   return true;
+ }
+ 
+ /* A subroutine of ia64_expand_vec_perm_const_1.  Emit a full V4HImode
+    constant permutation via two mux2 and a merge.  */
+ 
+ static bool
+ expand_vec_perm_v4hi_5 (struct expand_vec_perm_d *d)
+ {
+   unsigned char perm2[4];
+   rtx rmask[4];
+   unsigned i;
+   rtx t0, t1, mask, x;
+   bool ok;
+ 
+   if (d->vmode != V4HImode || d->one_operand_p)
+     return false;
+   if (d->testing_p)
+     return true;
+ 
+   for (i = 0; i < 4; ++i)
+     {
+       perm2[i] = d->perm[i] & 3;
+       rmask[i] = (d->perm[i] & 4 ? const0_rtx : constm1_rtx);
+     }
+   mask = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmask));
+   mask = force_reg (V4HImode, mask);
+ 
+   t0 = gen_reg_rtx (V4HImode);
+   t1 = gen_reg_rtx (V4HImode);
+ 
+   ok = expand_vselect (t0, d->op0, perm2, 4);
+   gcc_assert (ok);
+   ok = expand_vselect (t1, d->op1, perm2, 4);
+   gcc_assert (ok);
+ 
+   x = gen_rtx_AND (V4HImode, mask, t0);
+   emit_insn (gen_rtx_SET (t0, x));
+ 
+   x = gen_rtx_NOT (V4HImode, mask);
+   x = gen_rtx_AND (V4HImode, x, t1);
+   emit_insn (gen_rtx_SET (t1, x));
+ 
+   x = gen_rtx_IOR (V4HImode, t0, t1);
+   emit_insn (gen_rtx_SET (d->target, x));
+ 
+   return true;
+ }
+ 
+ /* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
+    With all of the interface bits taken care of, perform the expansion
+    in D and return true on success.  */
+ 
+ static bool
+ ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
+ {
+   if (expand_vec_perm_1 (d))
+     return true;
+   if (expand_vec_perm_broadcast (d))
+     return true;
+   if (expand_vec_perm_interleave_2 (d))
+     return true;
+   if (expand_vec_perm_v4hi_5 (d))
+     return true;
+   return false;
+ }
+ 
+ /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
+ 
+ static bool
+ ia64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
+ 			       rtx op1, const vec_perm_indices &sel)
+ {
+   struct expand_vec_perm_d d;
+   unsigned char perm[MAX_VECT_LEN];
+   unsigned int i, nelt, which;
+ 
+   d.target = target;
+   if (op0)
+     {
+       rtx nop0 = force_reg (vmode, op0);
+       if (op0 == op1)
+         op1 = nop0;
+       op0 = nop0;
+     }
+   if (op1)
+     op1 = force_reg (vmode, op1);
+   d.op0 = op0;
+   d.op1 = op1;
+ 
+   d.vmode = vmode;
+   gcc_assert (VECTOR_MODE_P (d.vmode));
+   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
+   d.testing_p = !target;
+ 
+   gcc_assert (sel.length () == nelt);
+   gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
+ 
+   for (i = which = 0; i < nelt; ++i)
+     {
+       unsigned int ei = sel[i] & (2 * nelt - 1);
+ 
+       which |= (ei < nelt ? 1 : 2);
+       d.perm[i] = ei;
+       perm[i] = ei;
+     }
+ 
+   switch (which)
+     {
+     default:
+       gcc_unreachable();
+ 
+     case 3:
+       if (d.testing_p || !rtx_equal_p (d.op0, d.op1))
+ 	{
+ 	  d.one_operand_p = false;
+ 	  break;
+ 	}
+ 
+       /* The elements of PERM do not suggest that only the first operand
+ 	 is used, but both operands are identical.  Allow easier matching
+ 	 of the permutation by folding the permutation into the single
+ 	 input vector.  */
+       for (i = 0; i < nelt; ++i)
+ 	if (d.perm[i] >= nelt)
+ 	  d.perm[i] -= nelt;
+       /* FALLTHRU */
+ 
+     case 1:
+       d.op1 = d.op0;
+       d.one_operand_p = true;
+       break;
+ 
+     case 2:
+       for (i = 0; i < nelt; ++i)
+         d.perm[i] -= nelt;
+       d.op0 = d.op1;
+       d.one_operand_p = true;
+       break;
+     }
+ 
+   if (d.testing_p)
+     {
+       /* We have to go through the motions and see if we can
+ 	 figure out how to generate the requested permutation.  */
+       d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
+       d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
+       if (!d.one_operand_p)
+ 	d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
+ 
+       start_sequence ();
+       bool ret = ia64_expand_vec_perm_const_1 (&d);
+       end_sequence ();
+ 
+       return ret;
+     }
+ 
+   if (ia64_expand_vec_perm_const_1 (&d))
+     return true;
+ 
+   /* If the mask says both arguments are needed, but they are the same,
+      the above tried to expand with one_operand_p true.  If that didn't
+      work, retry with one_operand_p false, as that's what we used in _ok.  */
+   if (which == 3 && d.one_operand_p)
+     {
+       memcpy (d.perm, perm, sizeof (perm));
+       d.one_operand_p = false;
+       return ia64_expand_vec_perm_const_1 (&d);
+     }
+ 
+   return false;
+ }
+ 
+ void
+ ia64_expand_vec_setv2sf (rtx operands[3])
+ {
+   struct expand_vec_perm_d d;
+   unsigned int which;
+   bool ok;
+   
+   d.target = operands[0];
+   d.op0 = operands[0];
+   d.op1 = gen_reg_rtx (V2SFmode);
+   d.vmode = V2SFmode;
+   d.nelt = 2;
+   d.one_operand_p = false;
+   d.testing_p = false;
+ 
+   which = INTVAL (operands[2]);
+   gcc_assert (which <= 1);
+   d.perm[0] = 1 - which;
+   d.perm[1] = which + 2;
+ 
+   emit_insn (gen_fpack (d.op1, operands[1], CONST0_RTX (SFmode)));
+ 
+   ok = ia64_expand_vec_perm_const_1 (&d);
+   gcc_assert (ok);
+ }
+ 
+ void
+ ia64_expand_vec_perm_even_odd (rtx target, rtx op0, rtx op1, int odd)
+ {
+   struct expand_vec_perm_d d;
+   machine_mode vmode = GET_MODE (target);
+   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
+   bool ok;
+ 
+   d.target = target;
+   d.op0 = op0;
+   d.op1 = op1;
+   d.vmode = vmode;
+   d.nelt = nelt;
+   d.one_operand_p = false;
+   d.testing_p = false;
+ 
+   for (i = 0; i < nelt; ++i)
+     d.perm[i] = i * 2 + odd;
+ 
+   ok = ia64_expand_vec_perm_const_1 (&d);
+   gcc_assert (ok);
+ }
+ 
+ /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
+ 
+    In BR regs, we can't change the DImode at all.
+    In FP regs, we can't change FP values to integer values and vice versa,
+    but we can change e.g. DImode to SImode, and V2SFmode into DImode.  */
+ 
+ static bool
+ ia64_can_change_mode_class (machine_mode from, machine_mode to,
+ 			    reg_class_t rclass)
+ {
+   if (reg_classes_intersect_p (rclass, BR_REGS))
+     return from == to;
+   if (SCALAR_FLOAT_MODE_P (from) != SCALAR_FLOAT_MODE_P (to))
+     return !reg_classes_intersect_p (rclass, FR_REGS);
+   return true;
+ }
+ 
++static void
++ia64_linux_file_end (void)
++{
++  int saved_trampolines_created = trampolines_created;
++  trampolines_created = 0;
++  file_end_indicate_exec_stack ();
++  trampolines_created = saved_trampolines_created;
++}
++
+ #include "gt-ia64.h"
diff --cc gcc/config/rs6000/rs6000.cc
index 00000000000,7a4ef5e6c0a..3b21fdc8bb0
mode 000000,100644..100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@@ -1,0 -1,28925 +1,28942 @@@
+ // SPDX-License-Identifier: GPL-3.0-or-later
+ /* Subroutines used for code generation on IBM RS/6000.
+    Copyright (C) 1991-2022 Free Software Foundation, Inc.
+    Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
+ 
+    This file is part of GCC.
+ 
+    GCC is free software; you can redistribute it and/or modify it
+    under the terms of the GNU General Public License as published
+    by the Free Software Foundation; either version 3, or (at your
+    option) any later version.
+ 
+    GCC is distributed in the hope that it will be useful, but WITHOUT
+    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+    License for more details.
+ 
+    You should have received a copy of the GNU General Public License
+    along with GCC; see the file COPYING3.  If not see
+    <http://www.gnu.org/licenses/>.  */
+ 
+ #define IN_TARGET_CODE 1
+ 
+ #include "config.h"
+ #include "system.h"
+ #include "coretypes.h"
+ #include "backend.h"
+ #include "rtl.h"
+ #include "tree.h"
+ #include "memmodel.h"
+ #include "gimple.h"
+ #include "cfghooks.h"
+ #include "cfgloop.h"
+ #include "df.h"
+ #include "tm_p.h"
+ #include "stringpool.h"
+ #include "expmed.h"
+ #include "optabs.h"
+ #include "regs.h"
+ #include "ira.h"
+ #include "recog.h"
+ #include "cgraph.h"
+ #include "diagnostic-core.h"
+ #include "insn-attr.h"
+ #include "flags.h"
+ #include "alias.h"
+ #include "fold-const.h"
+ #include "attribs.h"
+ #include "stor-layout.h"
+ #include "calls.h"
+ #include "print-tree.h"
+ #include "varasm.h"
+ #include "explow.h"
+ #include "expr.h"
+ #include "output.h"
+ #include "common/common-target.h"
+ #include "langhooks.h"
+ #include "reload.h"
+ #include "sched-int.h"
+ #include "gimplify.h"
+ #include "gimple-fold.h"
+ #include "gimple-iterator.h"
+ #include "gimple-walk.h"
+ #include "ssa.h"
+ #include "tree-vectorizer.h"
+ #include "tree-ssa-propagate.h"
+ #include "intl.h"
+ #include "tm-constrs.h"
+ #include "target-globals.h"
+ #include "builtins.h"
+ #include "tree-vector-builder.h"
+ #include "context.h"
+ #include "tree-pass.h"
+ #include "symbol-summary.h"
+ #include "ipa-prop.h"
+ #include "ipa-fnsummary.h"
+ #include "except.h"
+ #if TARGET_XCOFF
+ #include "xcoffout.h"  /* get declarations of xcoff_*_section_name */
+ #endif
+ #include "case-cfn-macros.h"
+ #include "ppc-auxv.h"
+ #include "rs6000-internal.h"
+ #include "opts.h"
+ 
+ /* This file should be included last.  */
+ #include "target-def.h"
+ 
+   /* Set -mabi=ieeelongdouble on some old targets.  In the future, power server
+      systems will also set long double to be IEEE 128-bit.  AIX and Darwin
+      explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
+      those systems will not pick up this default.  This needs to be after all
+      of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
+      properly defined.  */
+ #ifndef TARGET_IEEEQUAD_DEFAULT
+ #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
+ #define TARGET_IEEEQUAD_DEFAULT 1
+ #else
+ #define TARGET_IEEEQUAD_DEFAULT 0
+ #endif
+ #endif
+ 
+ /* Don't enable PC-relative addressing if the target does not support it.  */
+ #ifndef PCREL_SUPPORTED_BY_OS
+ #define PCREL_SUPPORTED_BY_OS	0
+ #endif
+ 
+ /* Support targetm.vectorize.builtin_mask_for_load.  */
+ tree altivec_builtin_mask_for_load;
+ 
+ #ifdef USING_ELFOS_H
+ /* Counter for labels which are to be placed in .fixup.  */
+ int fixuplabelno = 0;
+ #endif
+ 
+ /* Whether to use variant of AIX ABI for PowerPC64 Linux.  */
+ int dot_symbols;
+ 
+ /* Specify the machine mode that pointers have.  After generation of rtl, the
+    compiler makes no further distinction between pointers and any other objects
+    of this machine mode.  */
+ scalar_int_mode rs6000_pmode;
+ 
+ #if TARGET_ELF
+ /* Note whether IEEE 128-bit floating point was passed or returned, either as
+    the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
+    floating point.  We changed the default C++ mangling for these types and we
+    may want to generate a weak alias of the old mangling (U10__float128) to the
+    new mangling (u9__ieee128).  */
+ bool rs6000_passes_ieee128 = false;
+ #endif
+ 
+ /* Track use of r13 in 64bit AIX TLS.  */
+ static bool xcoff_tls_exec_model_detected = false;
+ 
+ /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
+    name used in current releases (i.e. u9__ieee128).  */
+ static bool ieee128_mangling_gcc_8_1;
+ 
+ /* Width in bits of a pointer.  */
+ unsigned rs6000_pointer_size;
+ 
+ #ifdef HAVE_AS_GNU_ATTRIBUTE
+ # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
+ # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
+ # endif
+ /* Flag whether floating point values have been passed/returned.
+    Note that this doesn't say whether fprs are used, since the
+    Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
+    should be set for soft-float values passed in gprs and ieee128
+    values passed in vsx registers.  */
+ bool rs6000_passes_float = false;
+ bool rs6000_passes_long_double = false;
+ /* Flag whether vector values have been passed/returned.  */
+ bool rs6000_passes_vector = false;
+ /* Flag whether small (<= 8 byte) structures have been returned.  */
+ bool rs6000_returns_struct = false;
+ #endif
+ 
+ /* Value is TRUE if register/mode pair is acceptable.  */
+ static bool rs6000_hard_regno_mode_ok_p
+   [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
+ 
+ /* Maximum number of registers needed for a given register class and mode.  */
+ unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
+ 
+ /* How many registers are needed for a given register and mode.  */
+ unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
+ 
+ /* Map register number to register class.  */
+ enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
+ 
+ static int dbg_cost_ctrl;
+ 
+ /* Built in types.  */
+ tree rs6000_builtin_types[RS6000_BTI_MAX];
+ 
+ /* Flag to say the TOC is initialized */
+ int toc_initialized, need_toc_init;
+ char toc_label_name[10];
+ 
+ /* Cached value of rs6000_variable_issue. This is cached in
+    rs6000_variable_issue hook and returned from rs6000_sched_reorder2.  */
+ static short cached_can_issue_more;
+ 
+ static GTY(()) section *read_only_data_section;
+ static GTY(()) section *private_data_section;
+ static GTY(()) section *tls_data_section;
+ static GTY(()) section *tls_private_data_section;
+ static GTY(()) section *read_only_private_data_section;
+ static GTY(()) section *sdata2_section;
+ 
+ section *toc_section = 0;
+ 
+ /* Describe the vector unit used for modes.  */
+ enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
+ enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
+ 
+ /* Register classes for various constraints that are based on the target
+    switches.  */
+ enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
+ 
+ /* Describe the alignment of a vector.  */
+ int rs6000_vector_align[NUM_MACHINE_MODES];
+ 
+ /* Map selected modes to types for builtins.  */
+ tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
+ 
+ /* What modes to automatically generate reciprocal divide estimate (fre) and
+    reciprocal sqrt (frsqrte) for.  */
+ unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
+ 
+ /* Masks to determine which reciprocal esitmate instructions to generate
+    automatically.  */
+ enum rs6000_recip_mask {
+   RECIP_SF_DIV		= 0x001,	/* Use divide estimate */
+   RECIP_DF_DIV		= 0x002,
+   RECIP_V4SF_DIV	= 0x004,
+   RECIP_V2DF_DIV	= 0x008,
+ 
+   RECIP_SF_RSQRT	= 0x010,	/* Use reciprocal sqrt estimate.  */
+   RECIP_DF_RSQRT	= 0x020,
+   RECIP_V4SF_RSQRT	= 0x040,
+   RECIP_V2DF_RSQRT	= 0x080,
+ 
+   /* Various combination of flags for -mrecip=xxx.  */
+   RECIP_NONE		= 0,
+   RECIP_ALL		= (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
+ 			   | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
+ 			   | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
+ 
+   RECIP_HIGH_PRECISION	= RECIP_ALL,
+ 
+   /* On low precision machines like the power5, don't enable double precision
+      reciprocal square root estimate, since it isn't accurate enough.  */
+   RECIP_LOW_PRECISION	= (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
+ };
+ 
+ /* -mrecip options.  */
+ static struct
+ {
+   const char *string;		/* option name */
+   unsigned int mask;		/* mask bits to set */
+ } recip_options[] = {
+   { "all",	 RECIP_ALL },
+   { "none",	 RECIP_NONE },
+   { "div",	 (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
+ 		  | RECIP_V2DF_DIV) },
+   { "divf",	 (RECIP_SF_DIV | RECIP_V4SF_DIV) },
+   { "divd",	 (RECIP_DF_DIV | RECIP_V2DF_DIV) },
+   { "rsqrt",	 (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
+ 		  | RECIP_V2DF_RSQRT) },
+   { "rsqrtf",	 (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
+   { "rsqrtd",	 (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
+ };
+ 
+ /* On PowerPC, we have a limited number of target clones that we care about
+    which means we can use an array to hold the options, rather than having more
+    elaborate data structures to identify each possible variation.  Order the
+    clones from the default to the highest ISA.  */
+ enum {
+   CLONE_DEFAULT		= 0,		/* default clone.  */
+   CLONE_ISA_2_05,			/* ISA 2.05 (power6).  */
+   CLONE_ISA_2_06,			/* ISA 2.06 (power7).  */
+   CLONE_ISA_2_07,			/* ISA 2.07 (power8).  */
+   CLONE_ISA_3_00,			/* ISA 3.0 (power9).  */
+   CLONE_ISA_3_1,			/* ISA 3.1 (power10).  */
+   CLONE_MAX
+ };
+ 
+ /* Map compiler ISA bits into HWCAP names.  */
+ struct clone_map {
+   HOST_WIDE_INT isa_mask;	/* rs6000_isa mask */
+   const char *name;		/* name to use in __builtin_cpu_supports.  */
+ };
+ 
+ static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
+   { 0,				"" },		/* Default options.  */
+   { OPTION_MASK_CMPB,		"arch_2_05" },	/* ISA 2.05 (power6).  */
+   { OPTION_MASK_POPCNTD,	"arch_2_06" },	/* ISA 2.06 (power7).  */
+   { OPTION_MASK_P8_VECTOR,	"arch_2_07" },	/* ISA 2.07 (power8).  */
+   { OPTION_MASK_P9_VECTOR,	"arch_3_00" },	/* ISA 3.0 (power9).  */
+   { OPTION_MASK_POWER10,	"arch_3_1" },	/* ISA 3.1 (power10).  */
+ };
+ 
+ 
+ /* Newer LIBCs explicitly export this symbol to declare that they provide
+    the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB.  We emit a
+    reference to this symbol whenever we expand a CPU builtin, so that
+    we never link against an old LIBC.  */
+ const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
+ 
+ /* True if we have expanded a CPU builtin.  */
+ bool cpu_builtin_p = false;
+ 
+ /* Pointer to function (in rs6000-c.cc) that can define or undefine target
+    macros that have changed.  Languages that don't support the preprocessor
+    don't link in rs6000-c.cc, so we can't call it directly.  */
+ void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
+ 
+ /* Simplfy register classes into simpler classifications.  We assume
+    GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
+    check for standard register classes (gpr/floating/altivec/vsx) and
+    floating/vector classes (float/altivec/vsx).  */
+ 
+ enum rs6000_reg_type {
+   NO_REG_TYPE,
+   PSEUDO_REG_TYPE,
+   GPR_REG_TYPE,
+   VSX_REG_TYPE,
+   ALTIVEC_REG_TYPE,
+   FPR_REG_TYPE,
+   SPR_REG_TYPE,
+   CR_REG_TYPE
+ };
+ 
+ /* Map register class to register type.  */
+ static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
+ 
+ /* First/last register type for the 'normal' register types (i.e. general
+    purpose, floating point, altivec, and VSX registers).  */
+ #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
+ 
+ #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
+ 
+ 
+ /* Register classes we care about in secondary reload or go if legitimate
+    address.  We only need to worry about GPR, FPR, and Altivec registers here,
+    along an ANY field that is the OR of the 3 register classes.  */
+ 
+ enum rs6000_reload_reg_type {
+   RELOAD_REG_GPR,			/* General purpose registers.  */
+   RELOAD_REG_FPR,			/* Traditional floating point regs.  */
+   RELOAD_REG_VMX,			/* Altivec (VMX) registers.  */
+   RELOAD_REG_ANY,			/* OR of GPR, FPR, Altivec masks.  */
+   N_RELOAD_REG
+ };
+ 
+ /* For setting up register classes, loop through the 3 register classes mapping
+    into real registers, and skip the ANY class, which is just an OR of the
+    bits.  */
+ #define FIRST_RELOAD_REG_CLASS	RELOAD_REG_GPR
+ #define LAST_RELOAD_REG_CLASS	RELOAD_REG_VMX
+ 
+ /* Map reload register type to a register in the register class.  */
+ struct reload_reg_map_type {
+   const char *name;			/* Register class name.  */
+   int reg;				/* Register in the register class.  */
+ };
+ 
+ static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
+   { "Gpr",	FIRST_GPR_REGNO },	/* RELOAD_REG_GPR.  */
+   { "Fpr",	FIRST_FPR_REGNO },	/* RELOAD_REG_FPR.  */
+   { "VMX",	FIRST_ALTIVEC_REGNO },	/* RELOAD_REG_VMX.  */
+   { "Any",	-1 },			/* RELOAD_REG_ANY.  */
+ };
+ 
+ /* Mask bits for each register class, indexed per mode.  Historically the
+    compiler has been more restrictive which types can do PRE_MODIFY instead of
+    PRE_INC and PRE_DEC, so keep track of sepaate bits for these two.  */
+ typedef unsigned char addr_mask_type;
+ 
+ #define RELOAD_REG_VALID	0x01	/* Mode valid in register..  */
+ #define RELOAD_REG_MULTIPLE	0x02	/* Mode takes multiple registers.  */
+ #define RELOAD_REG_INDEXED	0x04	/* Reg+reg addressing.  */
+ #define RELOAD_REG_OFFSET	0x08	/* Reg+offset addressing. */
+ #define RELOAD_REG_PRE_INCDEC	0x10	/* PRE_INC/PRE_DEC valid.  */
+ #define RELOAD_REG_PRE_MODIFY	0x20	/* PRE_MODIFY valid.  */
+ #define RELOAD_REG_AND_M16	0x40	/* AND -16 addressing.  */
+ #define RELOAD_REG_QUAD_OFFSET	0x80	/* quad offset is limited.  */
+ 
+ /* Register type masks based on the type, of valid addressing modes.  */
+ struct rs6000_reg_addr {
+   enum insn_code reload_load;		/* INSN to reload for loading. */
+   enum insn_code reload_store;		/* INSN to reload for storing.  */
+   enum insn_code reload_fpr_gpr;	/* INSN to move from FPR to GPR.  */
+   enum insn_code reload_gpr_vsx;	/* INSN to move from GPR to VSX.  */
+   enum insn_code reload_vsx_gpr;	/* INSN to move from VSX to GPR.  */
+   addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks.  */
+   bool scalar_in_vmx_p;			/* Scalar value can go in VMX.  */
+ };
+ 
+ static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
+ 
+ /* Helper function to say whether a mode supports PRE_INC or PRE_DEC.  */
+ static inline bool
+ mode_supports_pre_incdec_p (machine_mode mode)
+ {
+   return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
+ 	  != 0);
+ }
+ 
+ /* Helper function to say whether a mode supports PRE_MODIFY.  */
+ static inline bool
+ mode_supports_pre_modify_p (machine_mode mode)
+ {
+   return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
+ 	  != 0);
+ }
+ 
+ /* Return true if we have D-form addressing in altivec registers.  */
+ static inline bool
+ mode_supports_vmx_dform (machine_mode mode)
+ {
+   return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
+ }
+ 
+ /* Return true if we have D-form addressing in VSX registers.  This addressing
+    is more limited than normal d-form addressing in that the offset must be
+    aligned on a 16-byte boundary.  */
+ static inline bool
+ mode_supports_dq_form (machine_mode mode)
+ {
+   return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
+ 	  != 0);
+ }
+ 
+ /* Given that there exists at least one variable that is set (produced)
+    by OUT_INSN and read (consumed) by IN_INSN, return true iff
+    IN_INSN represents one or more memory store operations and none of
+    the variables set by OUT_INSN is used by IN_INSN as the address of a
+    store operation.  If either IN_INSN or OUT_INSN does not represent
+    a "single" RTL SET expression (as loosely defined by the
+    implementation of the single_set function) or a PARALLEL with only
+    SETs, CLOBBERs, and USEs inside, this function returns false.
+ 
+    This rs6000-specific version of store_data_bypass_p checks for
+    certain conditions that result in assertion failures (and internal
+    compiler errors) in the generic store_data_bypass_p function and
+    returns false rather than calling store_data_bypass_p if one of the
+    problematic conditions is detected.  */
+ 
+ int
+ rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
+ {
+   rtx out_set, in_set;
+   rtx out_pat, in_pat;
+   rtx out_exp, in_exp;
+   int i, j;
+ 
+   in_set = single_set (in_insn);
+   if (in_set)
+     {
+       if (MEM_P (SET_DEST (in_set)))
+ 	{
+ 	  out_set = single_set (out_insn);
+ 	  if (!out_set)
+ 	    {
+ 	      out_pat = PATTERN (out_insn);
+ 	      if (GET_CODE (out_pat) == PARALLEL)
+ 		{
+ 		  for (i = 0; i < XVECLEN (out_pat, 0); i++)
+ 		    {
+ 		      out_exp = XVECEXP (out_pat, 0, i);
+ 		      if ((GET_CODE (out_exp) == CLOBBER)
+ 			  || (GET_CODE (out_exp) == USE))
+ 			continue;
+ 		      else if (GET_CODE (out_exp) != SET)
+ 			return false;
+ 		    }
+ 		}
+ 	    }
+ 	}
+     }
+   else
+     {
+       in_pat = PATTERN (in_insn);
+       if (GET_CODE (in_pat) != PARALLEL)
+ 	return false;
+ 
+       for (i = 0; i < XVECLEN (in_pat, 0); i++)
+ 	{
+ 	  in_exp = XVECEXP (in_pat, 0, i);
+ 	  if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
+ 	    continue;
+ 	  else if (GET_CODE (in_exp) != SET)
+ 	    return false;
+ 
+ 	  if (MEM_P (SET_DEST (in_exp)))
+ 	    {
+ 	      out_set = single_set (out_insn);
+ 	      if (!out_set)
+ 		{
+ 		  out_pat = PATTERN (out_insn);
+ 		  if (GET_CODE (out_pat) != PARALLEL)
+ 		    return false;
+ 		  for (j = 0; j < XVECLEN (out_pat, 0); j++)
+ 		    {
+ 		      out_exp = XVECEXP (out_pat, 0, j);
+ 		      if ((GET_CODE (out_exp) == CLOBBER)
+ 			  || (GET_CODE (out_exp) == USE))
+ 			continue;
+ 		      else if (GET_CODE (out_exp) != SET)
+ 			return false;
+ 		    }
+ 		}
+ 	    }
+ 	}
+     }
+   return store_data_bypass_p (out_insn, in_insn);
+ }
+ 
+ 
+ /* Processor costs (relative to an add) */
+ 
+ const struct processor_costs *rs6000_cost;
+ 
+ /* Instruction size costs on 32bit processors.  */
+ static const
+ struct processor_costs size32_cost = {
+   COSTS_N_INSNS (1),    /* mulsi */
+   COSTS_N_INSNS (1),    /* mulsi_const */
+   COSTS_N_INSNS (1),    /* mulsi_const9 */
+   COSTS_N_INSNS (1),    /* muldi */
+   COSTS_N_INSNS (1),    /* divsi */
+   COSTS_N_INSNS (1),    /* divdi */
+   COSTS_N_INSNS (1),    /* fp */
+   COSTS_N_INSNS (1),    /* dmul */
+   COSTS_N_INSNS (1),    /* sdiv */
+   COSTS_N_INSNS (1),    /* ddiv */
+   32,			/* cache line size */
+   0,			/* l1 cache */
+   0,			/* l2 cache */
+   0,			/* streams */
+   0,			/* SF->DF convert */
+ };
+ 
+ /* Instruction size costs on 64bit processors.  */
+ static const
+ struct processor_costs size64_cost = {
+   COSTS_N_INSNS (1),    /* mulsi */
+   COSTS_N_INSNS (1),    /* mulsi_const */
+   COSTS_N_INSNS (1),    /* mulsi_const9 */
+   COSTS_N_INSNS (1),    /* muldi */
+   COSTS_N_INSNS (1),    /* divsi */
+   COSTS_N_INSNS (1),    /* divdi */
+   COSTS_N_INSNS (1),    /* fp */
+   COSTS_N_INSNS (1),    /* dmul */
+   COSTS_N_INSNS (1),    /* sdiv */
+   COSTS_N_INSNS (1),    /* ddiv */
+   128,			/* cache line size */
+   0,			/* l1 cache */
+   0,			/* l2 cache */
+   0,			/* streams */
+   0,			/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on RS64A processors.  */
+ static const
+ struct processor_costs rs64a_cost = {
+   COSTS_N_INSNS (20),   /* mulsi */
+   COSTS_N_INSNS (12),   /* mulsi_const */
+   COSTS_N_INSNS (8),    /* mulsi_const9 */
+   COSTS_N_INSNS (34),   /* muldi */
+   COSTS_N_INSNS (65),   /* divsi */
+   COSTS_N_INSNS (67),   /* divdi */
+   COSTS_N_INSNS (4),    /* fp */
+   COSTS_N_INSNS (4),    /* dmul */
+   COSTS_N_INSNS (31),   /* sdiv */
+   COSTS_N_INSNS (31),   /* ddiv */
+   128,			/* cache line size */
+   128,			/* l1 cache */
+   2048,			/* l2 cache */
+   1,			/* streams */
+   0,			/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on MPCCORE processors.  */
+ static const
+ struct processor_costs mpccore_cost = {
+   COSTS_N_INSNS (2),    /* mulsi */
+   COSTS_N_INSNS (2),    /* mulsi_const */
+   COSTS_N_INSNS (2),    /* mulsi_const9 */
+   COSTS_N_INSNS (2),    /* muldi */
+   COSTS_N_INSNS (6),    /* divsi */
+   COSTS_N_INSNS (6),    /* divdi */
+   COSTS_N_INSNS (4),    /* fp */
+   COSTS_N_INSNS (5),    /* dmul */
+   COSTS_N_INSNS (10),   /* sdiv */
+   COSTS_N_INSNS (17),   /* ddiv */
+   32,			/* cache line size */
+   4,			/* l1 cache */
+   16,			/* l2 cache */
+   1,			/* streams */
+   0,			/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on PPC403 processors.  */
+ static const
+ struct processor_costs ppc403_cost = {
+   COSTS_N_INSNS (4),    /* mulsi */
+   COSTS_N_INSNS (4),    /* mulsi_const */
+   COSTS_N_INSNS (4),    /* mulsi_const9 */
+   COSTS_N_INSNS (4),    /* muldi */
+   COSTS_N_INSNS (33),   /* divsi */
+   COSTS_N_INSNS (33),   /* divdi */
+   COSTS_N_INSNS (11),   /* fp */
+   COSTS_N_INSNS (11),   /* dmul */
+   COSTS_N_INSNS (11),   /* sdiv */
+   COSTS_N_INSNS (11),   /* ddiv */
+   32,			/* cache line size */
+   4,			/* l1 cache */
+   16,			/* l2 cache */
+   1,			/* streams */
+   0,			/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on PPC405 processors.  */
+ static const
+ struct processor_costs ppc405_cost = {
+   COSTS_N_INSNS (5),    /* mulsi */
+   COSTS_N_INSNS (4),    /* mulsi_const */
+   COSTS_N_INSNS (3),    /* mulsi_const9 */
+   COSTS_N_INSNS (5),    /* muldi */
+   COSTS_N_INSNS (35),   /* divsi */
+   COSTS_N_INSNS (35),   /* divdi */
+   COSTS_N_INSNS (11),   /* fp */
+   COSTS_N_INSNS (11),   /* dmul */
+   COSTS_N_INSNS (11),   /* sdiv */
+   COSTS_N_INSNS (11),   /* ddiv */
+   32,			/* cache line size */
+   16,			/* l1 cache */
+   128,			/* l2 cache */
+   1,			/* streams */
+   0,			/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on PPC440 processors.  */
+ static const
+ struct processor_costs ppc440_cost = {
+   COSTS_N_INSNS (3),    /* mulsi */
+   COSTS_N_INSNS (2),    /* mulsi_const */
+   COSTS_N_INSNS (2),    /* mulsi_const9 */
+   COSTS_N_INSNS (3),    /* muldi */
+   COSTS_N_INSNS (34),   /* divsi */
+   COSTS_N_INSNS (34),   /* divdi */
+   COSTS_N_INSNS (5),    /* fp */
+   COSTS_N_INSNS (5),    /* dmul */
+   COSTS_N_INSNS (19),   /* sdiv */
+   COSTS_N_INSNS (33),   /* ddiv */
+   32,			/* cache line size */
+   32,			/* l1 cache */
+   256,			/* l2 cache */
+   1,			/* streams */
+   0,			/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on PPC476 processors.  */
+ static const
+ struct processor_costs ppc476_cost = {
+   COSTS_N_INSNS (4),    /* mulsi */
+   COSTS_N_INSNS (4),    /* mulsi_const */
+   COSTS_N_INSNS (4),    /* mulsi_const9 */
+   COSTS_N_INSNS (4),    /* muldi */
+   COSTS_N_INSNS (11),   /* divsi */
+   COSTS_N_INSNS (11),   /* divdi */
+   COSTS_N_INSNS (6),    /* fp */
+   COSTS_N_INSNS (6),    /* dmul */
+   COSTS_N_INSNS (19),   /* sdiv */
+   COSTS_N_INSNS (33),   /* ddiv */
+   32,			/* l1 cache line size */
+   32,			/* l1 cache */
+   512,			/* l2 cache */
+   1,			/* streams */
+   0,			/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on PPC601 processors.  */
+ static const
+ struct processor_costs ppc601_cost = {
+   COSTS_N_INSNS (5),    /* mulsi */
+   COSTS_N_INSNS (5),    /* mulsi_const */
+   COSTS_N_INSNS (5),    /* mulsi_const9 */
+   COSTS_N_INSNS (5),    /* muldi */
+   COSTS_N_INSNS (36),   /* divsi */
+   COSTS_N_INSNS (36),   /* divdi */
+   COSTS_N_INSNS (4),    /* fp */
+   COSTS_N_INSNS (5),    /* dmul */
+   COSTS_N_INSNS (17),   /* sdiv */
+   COSTS_N_INSNS (31),   /* ddiv */
+   32,			/* cache line size */
+   32,			/* l1 cache */
+   256,			/* l2 cache */
+   1,			/* streams */
+   0,			/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on PPC603 processors.  */
+ static const
+ struct processor_costs ppc603_cost = {
+   COSTS_N_INSNS (5),    /* mulsi */
+   COSTS_N_INSNS (3),    /* mulsi_const */
+   COSTS_N_INSNS (2),    /* mulsi_const9 */
+   COSTS_N_INSNS (5),    /* muldi */
+   COSTS_N_INSNS (37),   /* divsi */
+   COSTS_N_INSNS (37),   /* divdi */
+   COSTS_N_INSNS (3),    /* fp */
+   COSTS_N_INSNS (4),    /* dmul */
+   COSTS_N_INSNS (18),   /* sdiv */
+   COSTS_N_INSNS (33),   /* ddiv */
+   32,			/* cache line size */
+   8,			/* l1 cache */
+   64,			/* l2 cache */
+   1,			/* streams */
+   0,			/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on PPC604 processors.  */
+ static const
+ struct processor_costs ppc604_cost = {
+   COSTS_N_INSNS (4),    /* mulsi */
+   COSTS_N_INSNS (4),    /* mulsi_const */
+   COSTS_N_INSNS (4),    /* mulsi_const9 */
+   COSTS_N_INSNS (4),    /* muldi */
+   COSTS_N_INSNS (20),   /* divsi */
+   COSTS_N_INSNS (20),   /* divdi */
+   COSTS_N_INSNS (3),    /* fp */
+   COSTS_N_INSNS (3),    /* dmul */
+   COSTS_N_INSNS (18),   /* sdiv */
+   COSTS_N_INSNS (32),   /* ddiv */
+   32,			/* cache line size */
+   16,			/* l1 cache */
+   512,			/* l2 cache */
+   1,			/* streams */
+   0,			/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on PPC604e processors.  */
+ static const
+ struct processor_costs ppc604e_cost = {
+   COSTS_N_INSNS (2),    /* mulsi */
+   COSTS_N_INSNS (2),    /* mulsi_const */
+   COSTS_N_INSNS (2),    /* mulsi_const9 */
+   COSTS_N_INSNS (2),    /* muldi */
+   COSTS_N_INSNS (20),   /* divsi */
+   COSTS_N_INSNS (20),   /* divdi */
+   COSTS_N_INSNS (3),    /* fp */
+   COSTS_N_INSNS (3),    /* dmul */
+   COSTS_N_INSNS (18),   /* sdiv */
+   COSTS_N_INSNS (32),   /* ddiv */
+   32,			/* cache line size */
+   32,			/* l1 cache */
+   1024,			/* l2 cache */
+   1,			/* streams */
+   0,			/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on PPC620 processors.  */
+ static const
+ struct processor_costs ppc620_cost = {
+   COSTS_N_INSNS (5),    /* mulsi */
+   COSTS_N_INSNS (4),    /* mulsi_const */
+   COSTS_N_INSNS (3),    /* mulsi_const9 */
+   COSTS_N_INSNS (7),    /* muldi */
+   COSTS_N_INSNS (21),   /* divsi */
+   COSTS_N_INSNS (37),   /* divdi */
+   COSTS_N_INSNS (3),    /* fp */
+   COSTS_N_INSNS (3),    /* dmul */
+   COSTS_N_INSNS (18),   /* sdiv */
+   COSTS_N_INSNS (32),   /* ddiv */
+   128,			/* cache line size */
+   32,			/* l1 cache */
+   1024,			/* l2 cache */
+   1,			/* streams */
+   0,			/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on PPC630 processors.  */
+ static const
+ struct processor_costs ppc630_cost = {
+   COSTS_N_INSNS (5),    /* mulsi */
+   COSTS_N_INSNS (4),    /* mulsi_const */
+   COSTS_N_INSNS (3),    /* mulsi_const9 */
+   COSTS_N_INSNS (7),    /* muldi */
+   COSTS_N_INSNS (21),   /* divsi */
+   COSTS_N_INSNS (37),   /* divdi */
+   COSTS_N_INSNS (3),    /* fp */
+   COSTS_N_INSNS (3),    /* dmul */
+   COSTS_N_INSNS (17),   /* sdiv */
+   COSTS_N_INSNS (21),   /* ddiv */
+   128,			/* cache line size */
+   64,			/* l1 cache */
+   1024,			/* l2 cache */
+   1,			/* streams */
+   0,			/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on Cell processor.  */
+ /* COSTS_N_INSNS (1) ~ one add.  */
+ static const
+ struct processor_costs ppccell_cost = {
+   COSTS_N_INSNS (9/2)+2,    /* mulsi */
+   COSTS_N_INSNS (6/2),    /* mulsi_const */
+   COSTS_N_INSNS (6/2),    /* mulsi_const9 */
+   COSTS_N_INSNS (15/2)+2,   /* muldi */
+   COSTS_N_INSNS (38/2),   /* divsi */
+   COSTS_N_INSNS (70/2),   /* divdi */
+   COSTS_N_INSNS (10/2),   /* fp */
+   COSTS_N_INSNS (10/2),   /* dmul */
+   COSTS_N_INSNS (74/2),   /* sdiv */
+   COSTS_N_INSNS (74/2),   /* ddiv */
+   128,			/* cache line size */
+   32,			/* l1 cache */
+   512,			/* l2 cache */
+   6,			/* streams */
+   0,			/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on PPC750 and PPC7400 processors.  */
+ static const
+ struct processor_costs ppc750_cost = {
+   COSTS_N_INSNS (5),    /* mulsi */
+   COSTS_N_INSNS (3),    /* mulsi_const */
+   COSTS_N_INSNS (2),    /* mulsi_const9 */
+   COSTS_N_INSNS (5),    /* muldi */
+   COSTS_N_INSNS (17),   /* divsi */
+   COSTS_N_INSNS (17),   /* divdi */
+   COSTS_N_INSNS (3),    /* fp */
+   COSTS_N_INSNS (3),    /* dmul */
+   COSTS_N_INSNS (17),   /* sdiv */
+   COSTS_N_INSNS (31),   /* ddiv */
+   32,			/* cache line size */
+   32,			/* l1 cache */
+   512,			/* l2 cache */
+   1,			/* streams */
+   0,			/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on PPC7450 processors.  */
+ static const
+ struct processor_costs ppc7450_cost = {
+   COSTS_N_INSNS (4),    /* mulsi */
+   COSTS_N_INSNS (3),    /* mulsi_const */
+   COSTS_N_INSNS (3),    /* mulsi_const9 */
+   COSTS_N_INSNS (4),    /* muldi */
+   COSTS_N_INSNS (23),   /* divsi */
+   COSTS_N_INSNS (23),   /* divdi */
+   COSTS_N_INSNS (5),    /* fp */
+   COSTS_N_INSNS (5),    /* dmul */
+   COSTS_N_INSNS (21),   /* sdiv */
+   COSTS_N_INSNS (35),   /* ddiv */
+   32,			/* cache line size */
+   32,			/* l1 cache */
+   1024,			/* l2 cache */
+   1,			/* streams */
+   0,			/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on PPC8540 processors.  */
+ static const
+ struct processor_costs ppc8540_cost = {
+   COSTS_N_INSNS (4),    /* mulsi */
+   COSTS_N_INSNS (4),    /* mulsi_const */
+   COSTS_N_INSNS (4),    /* mulsi_const9 */
+   COSTS_N_INSNS (4),    /* muldi */
+   COSTS_N_INSNS (19),   /* divsi */
+   COSTS_N_INSNS (19),   /* divdi */
+   COSTS_N_INSNS (4),    /* fp */
+   COSTS_N_INSNS (4),    /* dmul */
+   COSTS_N_INSNS (29),   /* sdiv */
+   COSTS_N_INSNS (29),   /* ddiv */
+   32,			/* cache line size */
+   32,			/* l1 cache */
+   256,			/* l2 cache */
+   1,			/* prefetch streams /*/
+   0,			/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on E300C2 and E300C3 cores.  */
+ static const
+ struct processor_costs ppce300c2c3_cost = {
+   COSTS_N_INSNS (4),    /* mulsi */
+   COSTS_N_INSNS (4),    /* mulsi_const */
+   COSTS_N_INSNS (4),    /* mulsi_const9 */
+   COSTS_N_INSNS (4),    /* muldi */
+   COSTS_N_INSNS (19),   /* divsi */
+   COSTS_N_INSNS (19),   /* divdi */
+   COSTS_N_INSNS (3),    /* fp */
+   COSTS_N_INSNS (4),    /* dmul */
+   COSTS_N_INSNS (18),   /* sdiv */
+   COSTS_N_INSNS (33),   /* ddiv */
+   32,
+   16,			/* l1 cache */
+   16,			/* l2 cache */
+   1,			/* prefetch streams /*/
+   0,			/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on PPCE500MC processors.  */
+ static const
+ struct processor_costs ppce500mc_cost = {
+   COSTS_N_INSNS (4),    /* mulsi */
+   COSTS_N_INSNS (4),    /* mulsi_const */
+   COSTS_N_INSNS (4),    /* mulsi_const9 */
+   COSTS_N_INSNS (4),    /* muldi */
+   COSTS_N_INSNS (14),   /* divsi */
+   COSTS_N_INSNS (14),   /* divdi */
+   COSTS_N_INSNS (8),    /* fp */
+   COSTS_N_INSNS (10),   /* dmul */
+   COSTS_N_INSNS (36),   /* sdiv */
+   COSTS_N_INSNS (66),   /* ddiv */
+   64,			/* cache line size */
+   32,			/* l1 cache */
+   128,			/* l2 cache */
+   1,			/* prefetch streams /*/
+   0,			/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on PPCE500MC64 processors.  */
+ static const
+ struct processor_costs ppce500mc64_cost = {
+   COSTS_N_INSNS (4),    /* mulsi */
+   COSTS_N_INSNS (4),    /* mulsi_const */
+   COSTS_N_INSNS (4),    /* mulsi_const9 */
+   COSTS_N_INSNS (4),    /* muldi */
+   COSTS_N_INSNS (14),   /* divsi */
+   COSTS_N_INSNS (14),   /* divdi */
+   COSTS_N_INSNS (4),    /* fp */
+   COSTS_N_INSNS (10),   /* dmul */
+   COSTS_N_INSNS (36),   /* sdiv */
+   COSTS_N_INSNS (66),   /* ddiv */
+   64,			/* cache line size */
+   32,			/* l1 cache */
+   128,			/* l2 cache */
+   1,			/* prefetch streams /*/
+   0,			/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on PPCE5500 processors.  */
+ static const
+ struct processor_costs ppce5500_cost = {
+   COSTS_N_INSNS (5),    /* mulsi */
+   COSTS_N_INSNS (5),    /* mulsi_const */
+   COSTS_N_INSNS (4),    /* mulsi_const9 */
+   COSTS_N_INSNS (5),    /* muldi */
+   COSTS_N_INSNS (14),   /* divsi */
+   COSTS_N_INSNS (14),   /* divdi */
+   COSTS_N_INSNS (7),    /* fp */
+   COSTS_N_INSNS (10),   /* dmul */
+   COSTS_N_INSNS (36),   /* sdiv */
+   COSTS_N_INSNS (66),   /* ddiv */
+   64,			/* cache line size */
+   32,			/* l1 cache */
+   128,			/* l2 cache */
+   1,			/* prefetch streams /*/
+   0,			/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on PPCE6500 processors.  */
+ static const
+ struct processor_costs ppce6500_cost = {
+   COSTS_N_INSNS (5),    /* mulsi */
+   COSTS_N_INSNS (5),    /* mulsi_const */
+   COSTS_N_INSNS (4),    /* mulsi_const9 */
+   COSTS_N_INSNS (5),    /* muldi */
+   COSTS_N_INSNS (14),   /* divsi */
+   COSTS_N_INSNS (14),   /* divdi */
+   COSTS_N_INSNS (7),    /* fp */
+   COSTS_N_INSNS (10),   /* dmul */
+   COSTS_N_INSNS (36),   /* sdiv */
+   COSTS_N_INSNS (66),   /* ddiv */
+   64,			/* cache line size */
+   32,			/* l1 cache */
+   128,			/* l2 cache */
+   1,			/* prefetch streams /*/
+   0,			/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on AppliedMicro Titan processors.  */
+ static const
+ struct processor_costs titan_cost = {
+   COSTS_N_INSNS (5),    /* mulsi */
+   COSTS_N_INSNS (5),    /* mulsi_const */
+   COSTS_N_INSNS (5),    /* mulsi_const9 */
+   COSTS_N_INSNS (5),    /* muldi */
+   COSTS_N_INSNS (18),   /* divsi */
+   COSTS_N_INSNS (18),   /* divdi */
+   COSTS_N_INSNS (10),   /* fp */
+   COSTS_N_INSNS (10),   /* dmul */
+   COSTS_N_INSNS (46),   /* sdiv */
+   COSTS_N_INSNS (72),   /* ddiv */
+   32,			/* cache line size */
+   32,			/* l1 cache */
+   512,			/* l2 cache */
+   1,			/* prefetch streams /*/
+   0,			/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on POWER4 and POWER5 processors.  */
+ static const
+ struct processor_costs power4_cost = {
+   COSTS_N_INSNS (3),    /* mulsi */
+   COSTS_N_INSNS (2),    /* mulsi_const */
+   COSTS_N_INSNS (2),    /* mulsi_const9 */
+   COSTS_N_INSNS (4),    /* muldi */
+   COSTS_N_INSNS (18),   /* divsi */
+   COSTS_N_INSNS (34),   /* divdi */
+   COSTS_N_INSNS (3),    /* fp */
+   COSTS_N_INSNS (3),    /* dmul */
+   COSTS_N_INSNS (17),   /* sdiv */
+   COSTS_N_INSNS (17),   /* ddiv */
+   128,			/* cache line size */
+   32,			/* l1 cache */
+   1024,			/* l2 cache */
+   8,			/* prefetch streams /*/
+   0,			/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on POWER6 processors.  */
+ static const
+ struct processor_costs power6_cost = {
+   COSTS_N_INSNS (8),    /* mulsi */
+   COSTS_N_INSNS (8),    /* mulsi_const */
+   COSTS_N_INSNS (8),    /* mulsi_const9 */
+   COSTS_N_INSNS (8),    /* muldi */
+   COSTS_N_INSNS (22),   /* divsi */
+   COSTS_N_INSNS (28),   /* divdi */
+   COSTS_N_INSNS (3),    /* fp */
+   COSTS_N_INSNS (3),    /* dmul */
+   COSTS_N_INSNS (13),   /* sdiv */
+   COSTS_N_INSNS (16),   /* ddiv */
+   128,			/* cache line size */
+   64,			/* l1 cache */
+   2048,			/* l2 cache */
+   16,			/* prefetch streams */
+   0,			/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on POWER7 processors.  */
+ static const
+ struct processor_costs power7_cost = {
+   COSTS_N_INSNS (2),	/* mulsi */
+   COSTS_N_INSNS (2),	/* mulsi_const */
+   COSTS_N_INSNS (2),	/* mulsi_const9 */
+   COSTS_N_INSNS (2),	/* muldi */
+   COSTS_N_INSNS (18),	/* divsi */
+   COSTS_N_INSNS (34),	/* divdi */
+   COSTS_N_INSNS (3),	/* fp */
+   COSTS_N_INSNS (3),	/* dmul */
+   COSTS_N_INSNS (13),	/* sdiv */
+   COSTS_N_INSNS (16),	/* ddiv */
+   128,			/* cache line size */
+   32,			/* l1 cache */
+   256,			/* l2 cache */
+   12,			/* prefetch streams */
+   COSTS_N_INSNS (3),	/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on POWER8 processors.  */
+ static const
+ struct processor_costs power8_cost = {
+   COSTS_N_INSNS (3),	/* mulsi */
+   COSTS_N_INSNS (3),	/* mulsi_const */
+   COSTS_N_INSNS (3),	/* mulsi_const9 */
+   COSTS_N_INSNS (3),	/* muldi */
+   COSTS_N_INSNS (19),	/* divsi */
+   COSTS_N_INSNS (35),	/* divdi */
+   COSTS_N_INSNS (3),	/* fp */
+   COSTS_N_INSNS (3),	/* dmul */
+   COSTS_N_INSNS (14),	/* sdiv */
+   COSTS_N_INSNS (17),	/* ddiv */
+   128,			/* cache line size */
+   32,			/* l1 cache */
+   512,			/* l2 cache */
+   12,			/* prefetch streams */
+   COSTS_N_INSNS (3),	/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on POWER9 processors.  */
+ static const
+ struct processor_costs power9_cost = {
+   COSTS_N_INSNS (3),	/* mulsi */
+   COSTS_N_INSNS (3),	/* mulsi_const */
+   COSTS_N_INSNS (3),	/* mulsi_const9 */
+   COSTS_N_INSNS (3),	/* muldi */
+   COSTS_N_INSNS (8),	/* divsi */
+   COSTS_N_INSNS (12),	/* divdi */
+   COSTS_N_INSNS (3),	/* fp */
+   COSTS_N_INSNS (3),	/* dmul */
+   COSTS_N_INSNS (13),	/* sdiv */
+   COSTS_N_INSNS (18),	/* ddiv */
+   128,			/* cache line size */
+   32,			/* l1 cache */
+   512,			/* l2 cache */
+   8,			/* prefetch streams */
+   COSTS_N_INSNS (3),	/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on POWER10 processors.  */
+ static const
+ struct processor_costs power10_cost = {
+   COSTS_N_INSNS (2),	/* mulsi */
+   COSTS_N_INSNS (2),	/* mulsi_const */
+   COSTS_N_INSNS (2),	/* mulsi_const9 */
+   COSTS_N_INSNS (2),	/* muldi */
+   COSTS_N_INSNS (6),	/* divsi */
+   COSTS_N_INSNS (6),	/* divdi */
+   COSTS_N_INSNS (2),	/* fp */
+   COSTS_N_INSNS (2),	/* dmul */
+   COSTS_N_INSNS (11),	/* sdiv */
+   COSTS_N_INSNS (13),	/* ddiv */
+   128,			/* cache line size */
+   32,			/* l1 cache */
+   512,			/* l2 cache */
+   16,			/* prefetch streams */
+   COSTS_N_INSNS (2),	/* SF->DF convert */
+ };
+ 
+ /* Instruction costs on POWER A2 processors.  */
+ static const
+ struct processor_costs ppca2_cost = {
+   COSTS_N_INSNS (16),    /* mulsi */
+   COSTS_N_INSNS (16),    /* mulsi_const */
+   COSTS_N_INSNS (16),    /* mulsi_const9 */
+   COSTS_N_INSNS (16),   /* muldi */
+   COST[...]

[diff truncated at 524288 bytes]