From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2153) id EF84D3857C51; Tue, 18 Jan 2022 12:42:24 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org EF84D3857C51 Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Jakub Jelinek To: gcc-cvs@gcc.gnu.org, libstdc++-cvs@gcc.gnu.org Subject: [gcc(refs/vendors/redhat/heads/gcc-12-branch)] Merge commit 'r12-6669-g38ec23fafb167ddfe840d7bb22b3e943d8a7d29e' into redhat/gcc-12-branch X-Act-Checkin: gcc X-Git-Author: Jakub Jelinek X-Git-Refname: refs/vendors/redhat/heads/gcc-12-branch X-Git-Oldrev: d8c9e50646a688fa39fd228289164868692b3474 X-Git-Newrev: 880787aef7a985a80f88a14f830fb554a33b1a87 Message-Id: <20220118124224.EF84D3857C51@sourceware.org> Date: Tue, 18 Jan 2022 12:42:24 +0000 (GMT) X-BeenThere: libstdc++-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libstdc++-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 18 Jan 2022 12:42:25 -0000 https://gcc.gnu.org/g:880787aef7a985a80f88a14f830fb554a33b1a87 commit 880787aef7a985a80f88a14f830fb554a33b1a87 Merge: d8c9e50646a 38ec23fafb1 Author: Jakub Jelinek Date: Tue Jan 18 13:41:24 2022 +0100 Merge commit 'r12-6669-g38ec23fafb167ddfe840d7bb22b3e943d8a7d29e' into redhat/gcc-12-branch Diff: ChangeLog | 4 + MAINTAINERS | 14 +- contrib/ChangeLog | 14 + contrib/filter-clang-warnings.py | 14 +- contrib/gcc_update | 6 +- contrib/git-backport.py | 48 + contrib/header-tools/ChangeLog | 4 + contrib/header-tools/README | 34 +- contrib/maintainers-verify.sh | 45 - contrib/paranoia.cc | 2 +- fixincludes/ChangeLog | 18 + fixincludes/fixincl.x | 180 +- fixincludes/inclhack.def | 123 + fixincludes/tests/base/fcntl.h | 33 + fixincludes/tests/base/math.h | 34 + fixincludes/tests/base/time.h | 15 + gcc/BASE-VER | 2 +- gcc/ChangeLog | 2645 + gcc/DATESTAMP | 2 +- gcc/Makefile.in | 322 +- gcc/ada/ChangeLog | 123 + gcc/ada/gcc-interface/Make-lang.in | 6 +- gcc/ada/gcc-interface/config-lang.in | 2 +- gcc/ada/gcc-interface/{cuintp.c => cuintp.cc} | 0 gcc/ada/gcc-interface/decl.c | 10661 ---- gcc/ada/gcc-interface/decl.cc | 10661 ++++ gcc/ada/gcc-interface/gigi.h | 16 +- gcc/ada/gcc-interface/lang-specs.h | 2 +- gcc/ada/gcc-interface/{misc.c => misc.cc} | 0 gcc/ada/gcc-interface/{targtyps.c => targtyps.cc} | 0 gcc/ada/gcc-interface/{trans.c => trans.cc} | 0 gcc/ada/gcc-interface/utils.c | 7156 --- gcc/ada/gcc-interface/utils.cc | 7156 +++ gcc/ada/gcc-interface/utils2.c | 3050 -- gcc/ada/gcc-interface/utils2.cc | 3050 ++ gcc/ada/init.c | 2 +- gcc/ada/set_targ.ads | 4 +- gcc/{adjust-alignment.c => adjust-alignment.cc} | 0 gcc/{alias.c => alias.cc} | 0 gcc/alias.h | 2 +- gcc/{alloc-pool.c => alloc-pool.cc} | 0 gcc/analyzer/ChangeLog | 39 + gcc/analyzer/analyzer.cc | 4 +- gcc/analyzer/region-model-asm.cc | 2 +- gcc/analyzer/region.cc | 2 +- gcc/analyzer/sm-malloc.cc | 2 +- gcc/analyzer/supergraph.cc | 2 +- gcc/asan.c | 4692 -- gcc/asan.cc | 4692 ++ gcc/{attribs.c => attribs.cc} | 0 gcc/{auto-inc-dec.c => auto-inc-dec.cc} | 0 gcc/{auto-profile.c => auto-profile.cc} | 0 gcc/auto-profile.h | 2 +- gcc/basic-block.h | 2 +- gcc/{bb-reorder.c => bb-reorder.cc} | 0 gcc/{bitmap.c => bitmap.cc} | 0 gcc/btfout.c | 1133 - gcc/btfout.cc | 1133 + gcc/builtins.c | 11184 ----- gcc/builtins.cc | 11184 +++++ gcc/c-family/ChangeLog | 86 + gcc/c-family/c-ada-spec.c | 3528 -- gcc/c-family/c-ada-spec.cc | 3528 ++ gcc/c-family/c-ada-spec.h | 2 +- gcc/c-family/{c-attribs.c => c-attribs.cc} | 0 gcc/c-family/c-common.c | 9466 ---- gcc/c-family/c-common.cc | 9466 ++++ gcc/c-family/c-common.h | 30 +- gcc/c-family/c-cppbuiltin.c | 2009 - gcc/c-family/c-cppbuiltin.cc | 2009 + gcc/c-family/{c-dump.c => c-dump.cc} | 0 gcc/c-family/c-format.c | 5439 --- gcc/c-family/c-format.cc | 5439 +++ gcc/c-family/c-gimplify.c | 738 - gcc/c-family/c-gimplify.cc | 738 + gcc/c-family/{c-indentation.c => c-indentation.cc} | 0 gcc/c-family/c-indentation.h | 2 +- gcc/c-family/{c-lex.c => c-lex.cc} | 0 gcc/c-family/c-objc.h | 2 +- gcc/c-family/c-omp.c | 3265 -- gcc/c-family/c-omp.cc | 3265 ++ gcc/c-family/c-opts.c | 1842 - gcc/c-family/c-opts.cc | 1842 + gcc/c-family/{c-pch.c => c-pch.cc} | 0 gcc/c-family/{c-ppoutput.c => c-ppoutput.cc} | 0 gcc/c-family/c-pragma.c | 1656 - gcc/c-family/c-pragma.cc | 1656 + .../{c-pretty-print.c => c-pretty-print.cc} | 0 gcc/c-family/c-pretty-print.h | 2 +- gcc/c-family/{c-semantics.c => c-semantics.cc} | 0 gcc/c-family/{c-ubsan.c => c-ubsan.cc} | 0 gcc/c-family/{c-warn.c => c-warn.cc} | 0 gcc/c-family/c.opt | 20 + gcc/c-family/{cppspec.c => cppspec.cc} | 0 gcc/c-family/{stub-objc.c => stub-objc.cc} | 0 gcc/c/ChangeLog | 55 + gcc/c/Make-lang.in | 10 +- gcc/c/{c-aux-info.c => c-aux-info.cc} | 0 gcc/c/c-convert.c | 207 - gcc/c/c-convert.cc | 207 + gcc/c/c-decl.c | 12469 ----- gcc/c/c-decl.cc | 12469 +++++ gcc/c/{c-errors.c => c-errors.cc} | 0 gcc/c/{c-fold.c => c-fold.cc} | 0 gcc/c/{c-lang.c => c-lang.cc} | 0 gcc/c/{c-objc-common.c => c-objc-common.cc} | 0 gcc/c/c-objc-common.h | 2 +- gcc/c/c-parser.c | 23404 --------- gcc/c/c-parser.cc | 23404 +++++++++ gcc/c/c-parser.h | 2 +- gcc/c/c-tree.h | 22 +- gcc/c/c-typeck.c | 16079 ------ gcc/c/c-typeck.cc | 16079 ++++++ gcc/c/config-lang.in | 2 +- gcc/c/{gccspec.c => gccspec.cc} | 0 gcc/c/{gimple-parser.c => gimple-parser.cc} | 0 gcc/caller-save.c | 1400 - gcc/caller-save.cc | 1400 + gcc/calls.c | 5254 -- gcc/calls.cc | 5254 ++ gcc/{ccmp.c => ccmp.cc} | 0 gcc/cfg-flags.def | 6 +- gcc/{cfg.c => cfg.cc} | 0 gcc/cfganal.c | 1934 - gcc/cfganal.cc | 1934 + gcc/{cfgbuild.c => cfgbuild.cc} | 0 gcc/cfgcleanup.c | 3339 -- gcc/cfgcleanup.cc | 3339 ++ gcc/cfgexpand.c | 7030 --- gcc/cfgexpand.cc | 7030 +++ gcc/cfghooks.c | 1560 - gcc/cfghooks.cc | 1560 + gcc/cfghooks.h | 4 +- gcc/{cfgloop.c => cfgloop.cc} | 0 gcc/{cfgloopanal.c => cfgloopanal.cc} | 0 gcc/{cfgloopmanip.c => cfgloopmanip.cc} | 0 gcc/cfgrtl.c | 5366 -- gcc/cfgrtl.cc | 5366 ++ gcc/cgraph.c | 4273 -- gcc/cgraph.cc | 4273 ++ gcc/cgraph.h | 28 +- gcc/{cgraphbuild.c => cgraphbuild.cc} | 0 gcc/cgraphclones.c | 1160 - gcc/cgraphclones.cc | 1160 + gcc/cgraphunit.c | 2595 - gcc/cgraphunit.cc | 2595 + gcc/{collect-utils.c => collect-utils.cc} | 0 gcc/collect-utils.h | 2 +- gcc/{collect2-aix.c => collect2-aix.cc} | 0 gcc/collect2-aix.h | 2 +- gcc/collect2.c | 3078 -- gcc/collect2.cc | 3078 ++ gcc/combine-stack-adj.c | 854 - gcc/combine-stack-adj.cc | 854 + gcc/combine.c | 14960 ------ gcc/combine.cc | 14960 ++++++ gcc/common.opt | 10 +- .../{common-targhooks.c => common-targhooks.cc} | 0 gcc/common/config/aarch64/aarch64-common.c | 551 - gcc/common/config/aarch64/aarch64-common.cc | 551 + .../alpha/{alpha-common.c => alpha-common.cc} | 0 .../config/arc/{arc-common.c => arc-common.cc} | 0 gcc/common/config/arm/arm-common.c | 1116 - gcc/common/config/arm/arm-common.cc | 1116 + gcc/common/config/avr/avr-common.c | 153 - gcc/common/config/avr/avr-common.cc | 153 + .../config/bfin/{bfin-common.c => bfin-common.cc} | 0 .../config/bpf/{bpf-common.c => bpf-common.cc} | 0 .../config/c6x/{c6x-common.c => c6x-common.cc} | 0 .../config/cr16/{cr16-common.c => cr16-common.cc} | 0 .../config/cris/{cris-common.c => cris-common.cc} | 0 .../config/csky/{csky-common.c => csky-common.cc} | 0 .../config/{default-common.c => default-common.cc} | 0 .../{epiphany-common.c => epiphany-common.cc} | 0 .../config/fr30/{fr30-common.c => fr30-common.cc} | 0 .../config/frv/{frv-common.c => frv-common.cc} | 0 .../config/gcn/{gcn-common.c => gcn-common.cc} | 0 .../h8300/{h8300-common.c => h8300-common.cc} | 0 .../config/i386/{i386-common.c => i386-common.cc} | 0 gcc/common/config/i386/i386-isas.h | 4 +- .../config/ia64/{ia64-common.c => ia64-common.cc} | 0 .../iq2000/{iq2000-common.c => iq2000-common.cc} | 0 .../config/lm32/{lm32-common.c => lm32-common.cc} | 0 .../config/m32r/{m32r-common.c => m32r-common.cc} | 0 .../config/m68k/{m68k-common.c => m68k-common.cc} | 0 .../mcore/{mcore-common.c => mcore-common.cc} | 0 .../{microblaze-common.c => microblaze-common.cc} | 0 .../config/mips/{mips-common.c => mips-common.cc} | 0 .../config/mmix/{mmix-common.c => mmix-common.cc} | 0 .../{mn10300-common.c => mn10300-common.cc} | 0 .../msp430/{msp430-common.c => msp430-common.cc} | 0 .../nds32/{nds32-common.c => nds32-common.cc} | 0 .../nios2/{nios2-common.c => nios2-common.cc} | 0 .../nvptx/{nvptx-common.c => nvptx-common.cc} | 0 .../config/or1k/{or1k-common.c => or1k-common.cc} | 0 gcc/common/config/pa/{pa-common.c => pa-common.cc} | 0 .../pdp11/{pdp11-common.c => pdp11-common.cc} | 0 .../config/pru/{pru-common.c => pru-common.cc} | 0 .../riscv/{riscv-common.c => riscv-common.cc} | 0 .../rs6000/{rs6000-common.c => rs6000-common.cc} | 0 gcc/common/config/rx/{rx-common.c => rx-common.cc} | 0 .../config/s390/{s390-common.c => s390-common.cc} | 0 gcc/common/config/sh/{sh-common.c => sh-common.cc} | 0 .../sparc/{sparc-common.c => sparc-common.cc} | 0 .../tilegx/{tilegx-common.c => tilegx-common.cc} | 0 .../{tilepro-common.c => tilepro-common.cc} | 0 .../config/v850/{v850-common.c => v850-common.cc} | 0 .../config/vax/{vax-common.c => vax-common.cc} | 0 .../visium/{visium-common.c => visium-common.cc} | 0 .../{xstormy16-common.c => xstormy16-common.cc} | 0 .../xtensa/{xtensa-common.c => xtensa-common.cc} | 0 gcc/{compare-elim.c => compare-elim.cc} | 0 gcc/conditions.h | 2 +- gcc/config.gcc | 42 +- ...{aarch64-bti-insert.c => aarch64-bti-insert.cc} | 0 gcc/config/aarch64/aarch64-builtins.c | 3214 -- gcc/config/aarch64/aarch64-builtins.cc | 3214 ++ gcc/config/aarch64/{aarch64-c.c => aarch64-c.cc} | 0 gcc/config/aarch64/{aarch64-d.c => aarch64-d.cc} | 0 gcc/config/aarch64/aarch64-protos.h | 4 +- gcc/config/aarch64/aarch64-sve-builtins.cc | 2 +- gcc/config/aarch64/aarch64.c | 26861 ----------- gcc/config/aarch64/aarch64.cc | 26862 +++++++++++ gcc/config/aarch64/aarch64.h | 6 +- gcc/config/aarch64/cortex-a57-fma-steering.c | 1096 - gcc/config/aarch64/cortex-a57-fma-steering.cc | 1096 + gcc/config/aarch64/driver-aarch64.c | 470 - gcc/config/aarch64/driver-aarch64.cc | 470 + ...oidance.c => falkor-tag-collision-avoidance.cc} | 0 ...ost-aarch64-darwin.c => host-aarch64-darwin.cc} | 0 gcc/config/aarch64/t-aarch64 | 26 +- gcc/config/aarch64/x-aarch64 | 2 +- gcc/config/aarch64/x-darwin | 2 +- gcc/config/alpha/alpha-protos.h | 2 +- gcc/config/alpha/alpha.c | 10058 ---- gcc/config/alpha/alpha.cc | 10058 ++++ gcc/config/alpha/alpha.h | 8 +- gcc/config/alpha/alpha.md | 4 +- gcc/config/alpha/driver-alpha.c | 101 - gcc/config/alpha/driver-alpha.cc | 101 + gcc/config/alpha/x-alpha | 2 +- gcc/config/arc/{arc-c.c => arc-c.cc} | 0 gcc/config/arc/arc-protos.h | 2 +- gcc/config/arc/arc.c | 11769 ----- gcc/config/arc/arc.cc | 11769 +++++ gcc/config/arc/arc.md | 8 +- gcc/config/arc/builtins.def | 4 +- gcc/config/arc/{driver-arc.c => driver-arc.cc} | 0 gcc/config/arc/t-arc | 6 +- gcc/config/arm/{aarch-common.c => aarch-common.cc} | 0 gcc/config/arm/{arm-builtins.c => arm-builtins.cc} | 0 gcc/config/arm/arm-c.c | 505 - gcc/config/arm/arm-c.cc | 505 + gcc/config/arm/{arm-d.c => arm-d.cc} | 0 gcc/config/arm/arm-protos.h | 8 +- gcc/config/arm/arm.c | 34143 ------------- gcc/config/arm/arm.cc | 34143 +++++++++++++ gcc/config/arm/arm.h | 8 +- gcc/config/arm/arm.md | 2 +- gcc/config/arm/driver-arm.c | 137 - gcc/config/arm/driver-arm.cc | 137 + gcc/config/arm/symbian.h | 2 +- gcc/config/arm/t-arm | 16 +- gcc/config/arm/thumb1.md | 10 +- gcc/config/arm/x-arm | 2 +- gcc/config/avr/avr-c.c | 509 - gcc/config/avr/avr-c.cc | 509 + gcc/config/avr/{avr-devices.c => avr-devices.cc} | 0 gcc/config/avr/avr-fixed.md | 2 +- gcc/config/avr/avr-log.c | 325 - gcc/config/avr/avr-log.cc | 325 + gcc/config/avr/avr-mcus.def | 2 +- gcc/config/avr/avr-modes.def | 2 +- gcc/config/avr/avr-passes.def | 2 +- gcc/config/avr/avr-protos.h | 4 +- gcc/config/avr/avr.c | 14717 ------ gcc/config/avr/avr.cc | 14717 ++++++ gcc/config/avr/avr.h | 4 +- gcc/config/avr/avr.md | 6 +- gcc/config/avr/builtins.def | 4 +- gcc/config/avr/{driver-avr.c => driver-avr.cc} | 0 gcc/config/avr/gen-avr-mmcu-specs.c | 323 - gcc/config/avr/gen-avr-mmcu-specs.cc | 323 + gcc/config/avr/gen-avr-mmcu-texi.c | 202 - gcc/config/avr/gen-avr-mmcu-texi.cc | 202 + gcc/config/avr/t-avr | 18 +- gcc/config/bfin/bfin.c | 5883 --- gcc/config/bfin/bfin.cc | 5883 +++ gcc/config/bpf/bpf-protos.h | 2 +- gcc/config/bpf/{bpf.c => bpf.cc} | 0 gcc/config/bpf/bpf.h | 2 +- gcc/config/bpf/{coreout.c => coreout.cc} | 0 gcc/config/bpf/t-bpf | 2 +- gcc/config/c6x/c6x-protos.h | 4 +- gcc/config/c6x/{c6x.c => c6x.cc} | 0 gcc/config/cr16/cr16-protos.h | 2 +- gcc/config/cr16/{cr16.c => cr16.cc} | 0 gcc/config/cris/cris.c | 3729 -- gcc/config/cris/cris.cc | 3729 ++ gcc/config/cris/cris.h | 8 +- gcc/config/cris/cris.opt | 2 +- gcc/config/cris/sync.md | 2 +- gcc/config/csky/csky.c | 7329 --- gcc/config/csky/csky.cc | 7329 +++ gcc/config/darwin-c.c | 889 - gcc/config/darwin-c.cc | 889 + gcc/config/{darwin-d.c => darwin-d.cc} | 0 gcc/config/{darwin-driver.c => darwin-driver.cc} | 0 gcc/config/darwin-f.c | 60 - gcc/config/darwin-f.cc | 60 + gcc/config/darwin-sections.def | 8 +- gcc/config/darwin.c | 3886 -- gcc/config/darwin.cc | 3886 ++ gcc/config/darwin.h | 4 +- gcc/config/{default-c.c => default-c.cc} | 0 gcc/config/{default-d.c => default-d.cc} | 0 gcc/config/{dragonfly-d.c => dragonfly-d.cc} | 0 gcc/config/elfos.h | 2 +- gcc/config/epiphany/epiphany-sched.md | 2 +- gcc/config/epiphany/epiphany.c | 3047 -- gcc/config/epiphany/epiphany.cc | 3047 ++ gcc/config/epiphany/epiphany.h | 6 +- gcc/config/epiphany/mode-switch-use.c | 107 - gcc/config/epiphany/mode-switch-use.cc | 107 + gcc/config/epiphany/predicates.md | 2 +- .../{resolve-sw-modes.c => resolve-sw-modes.cc} | 0 gcc/config/epiphany/t-epiphany | 4 +- gcc/config/fr30/fr30-protos.h | 2 +- gcc/config/fr30/{fr30.c => fr30.cc} | 0 gcc/config/{freebsd-d.c => freebsd-d.cc} | 0 gcc/config/frv/frv-protos.h | 2 +- gcc/config/frv/frv.c | 9451 ---- gcc/config/frv/frv.cc | 9451 ++++ gcc/config/frv/frv.h | 2 +- gcc/config/ft32/ft32-protos.h | 2 +- gcc/config/ft32/{ft32.c => ft32.cc} | 0 gcc/config/gcn/{driver-gcn.c => driver-gcn.cc} | 0 gcc/config/gcn/gcn-hsa.h | 2 +- gcc/config/gcn/{gcn-run.c => gcn-run.cc} | 0 gcc/config/gcn/{gcn-tree.c => gcn-tree.cc} | 0 gcc/config/gcn/gcn.c | 6672 --- gcc/config/gcn/gcn.cc | 6672 +++ gcc/config/gcn/mkoffload.c | 1174 - gcc/config/gcn/mkoffload.cc | 1174 + gcc/config/gcn/t-gcn-hsa | 8 +- gcc/config/gcn/t-omp-device | 2 +- gcc/config/{glibc-c.c => glibc-c.cc} | 0 gcc/config/{glibc-d.c => glibc-d.cc} | 0 gcc/config/h8300/h8300-protos.h | 4 +- gcc/config/h8300/h8300.c | 5632 --- gcc/config/h8300/h8300.cc | 5632 +++ gcc/config/h8300/h8300.h | 6 +- gcc/config/{host-darwin.c => host-darwin.cc} | 0 gcc/config/{host-hpux.c => host-hpux.cc} | 0 gcc/config/{host-linux.c => host-linux.cc} | 0 gcc/config/{host-netbsd.c => host-netbsd.cc} | 0 gcc/config/{host-openbsd.c => host-openbsd.cc} | 0 gcc/config/{host-solaris.c => host-solaris.cc} | 0 gcc/config/i386/cygming.h | 4 +- gcc/config/i386/{djgpp.c => djgpp.cc} | 0 gcc/config/i386/djgpp.h | 2 +- gcc/config/i386/dragonfly.h | 2 +- gcc/config/i386/driver-i386.c | 841 - gcc/config/i386/driver-i386.cc | 841 + .../i386/{driver-mingw32.c => driver-mingw32.cc} | 0 gcc/config/i386/freebsd.h | 2 +- .../i386/{gnu-property.c => gnu-property.cc} | 0 gcc/config/i386/{host-cygwin.c => host-cygwin.cc} | 0 .../{host-i386-darwin.c => host-i386-darwin.cc} | 0 .../i386/{host-mingw32.c => host-mingw32.cc} | 0 .../i386/{i386-builtins.c => i386-builtins.cc} | 0 gcc/config/i386/i386-c.c | 817 - gcc/config/i386/i386-c.cc | 817 + gcc/config/i386/{i386-d.c => i386-d.cc} | 0 gcc/config/i386/i386-expand.c | 23247 --------- gcc/config/i386/i386-expand.cc | 23280 +++++++++ .../i386/{i386-features.c => i386-features.cc} | 0 gcc/config/i386/i386-options.c | 3863 -- gcc/config/i386/i386-options.cc | 3863 ++ gcc/config/i386/i386-protos.h | 13 +- gcc/config/i386/i386.c | 24709 ---------- gcc/config/i386/i386.cc | 24709 ++++++++++ gcc/config/i386/i386.h | 18 +- gcc/config/i386/i386.md | 28 +- ...{intelmic-mkoffload.c => intelmic-mkoffload.cc} | 0 gcc/config/i386/lynx.h | 2 +- gcc/config/i386/mmx.md | 8 +- gcc/config/i386/{msformat-c.c => msformat-c.cc} | 0 gcc/config/i386/sse.md | 87 +- gcc/config/i386/subst.md | 7 + gcc/config/i386/t-cygming | 18 +- gcc/config/i386/t-djgpp | 4 +- gcc/config/i386/t-gnu-property | 2 +- gcc/config/i386/t-i386 | 20 +- gcc/config/i386/t-intelmic | 2 +- gcc/config/i386/t-omp-device | 4 +- gcc/config/i386/winnt-cxx.c | 177 - gcc/config/i386/winnt-cxx.cc | 177 + gcc/config/i386/{winnt-d.c => winnt-d.cc} | 0 gcc/config/i386/{winnt-stubs.c => winnt-stubs.cc} | 0 gcc/config/i386/winnt.c | 1377 - gcc/config/i386/winnt.cc | 1377 + gcc/config/i386/x-cygwin | 4 +- gcc/config/i386/x-darwin | 2 +- gcc/config/i386/x-i386 | 2 +- gcc/config/i386/x-mingw32 | 6 +- ...86-tune-sched-atom.c => x86-tune-sched-atom.cc} | 0 .../{x86-tune-sched-bd.c => x86-tune-sched-bd.cc} | 0 gcc/config/i386/x86-tune-sched-core.c | 257 - gcc/config/i386/x86-tune-sched-core.cc | 257 + .../i386/{x86-tune-sched.c => x86-tune-sched.cc} | 0 gcc/config/i386/x86-tune.def | 10 +- gcc/config/i386/xm-djgpp.h | 4 +- gcc/config/ia64/freebsd.h | 4 +- gcc/config/ia64/hpux.h | 2 +- gcc/config/ia64/{ia64-c.c => ia64-c.cc} | 0 gcc/config/ia64/ia64-protos.h | 2 +- gcc/config/ia64/ia64.c | 11927 ----- gcc/config/ia64/ia64.cc | 11927 +++++ gcc/config/ia64/ia64.h | 2 +- gcc/config/ia64/ia64.md | 4 +- gcc/config/ia64/predicates.md | 2 +- gcc/config/ia64/sysv4.h | 4 +- gcc/config/ia64/t-ia64 | 6 +- gcc/config/iq2000/{iq2000.c => iq2000.cc} | 0 gcc/config/iq2000/iq2000.h | 2 +- gcc/config/iq2000/iq2000.md | 4 +- gcc/config/{linux.c => linux.cc} | 0 gcc/config/linux.h | 4 +- gcc/config/lm32/{lm32.c => lm32.cc} | 0 gcc/config/m32c/{m32c-pragma.c => m32c-pragma.cc} | 0 gcc/config/m32c/m32c.c | 4506 -- gcc/config/m32c/m32c.cc | 4506 ++ gcc/config/m32c/m32c.h | 2 +- gcc/config/m32c/t-m32c | 2 +- gcc/config/m32r/m32r-protos.h | 2 +- gcc/config/m32r/m32r.c | 2959 -- gcc/config/m32r/m32r.cc | 2959 ++ gcc/config/m32r/m32r.h | 4 +- gcc/config/m32r/m32r.md | 2 +- gcc/config/m68k/m68k-isas.def | 2 +- gcc/config/m68k/m68k-microarchs.def | 2 +- gcc/config/m68k/m68k-protos.h | 6 +- gcc/config/m68k/m68k.c | 7154 --- gcc/config/m68k/m68k.cc | 7154 +++ gcc/config/m68k/m68k.h | 4 +- gcc/config/m68k/m68k.md | 4 +- gcc/config/m68k/m68kemb.h | 2 +- gcc/config/m68k/uclinux.h | 2 +- gcc/config/mcore/mcore-protos.h | 2 +- gcc/config/mcore/mcore.c | 3290 -- gcc/config/mcore/mcore.cc | 3290 ++ gcc/config/mcore/mcore.h | 4 +- gcc/config/mcore/mcore.md | 2 +- .../microblaze/{microblaze-c.c => microblaze-c.cc} | 0 gcc/config/microblaze/microblaze-protos.h | 2 +- gcc/config/microblaze/microblaze.c | 4070 -- gcc/config/microblaze/microblaze.cc | 4070 ++ gcc/config/microblaze/microblaze.h | 2 +- gcc/config/microblaze/microblaze.md | 4 +- gcc/config/microblaze/t-microblaze | 4 +- gcc/config/mips/driver-native.c | 91 - gcc/config/mips/driver-native.cc | 91 + .../{frame-header-opt.c => frame-header-opt.cc} | 0 gcc/config/mips/loongson2ef.md | 2 +- gcc/config/mips/{mips-d.c => mips-d.cc} | 0 gcc/config/mips/mips-protos.h | 2 +- gcc/config/mips/mips.c | 22925 --------- gcc/config/mips/mips.cc | 22925 +++++++++ gcc/config/mips/mips.h | 2 +- gcc/config/mips/mips.md | 2 +- gcc/config/mips/t-mips | 4 +- gcc/config/mips/x-native | 2 +- gcc/config/mmix/mmix-protos.h | 2 +- gcc/config/mmix/mmix.c | 2872 -- gcc/config/mmix/mmix.cc | 2872 ++ gcc/config/mmix/mmix.h | 2 +- gcc/config/mmix/mmix.md | 4 +- gcc/config/mmix/predicates.md | 2 +- gcc/config/mn10300/mn10300.c | 3423 -- gcc/config/mn10300/mn10300.cc | 3423 ++ gcc/config/mn10300/mn10300.h | 6 +- gcc/config/moxie/moxie-protos.h | 2 +- gcc/config/moxie/{moxie.c => moxie.cc} | 0 gcc/config/moxie/uclinux.h | 2 +- .../msp430/{driver-msp430.c => driver-msp430.cc} | 0 gcc/config/msp430/{msp430-c.c => msp430-c.cc} | 0 gcc/config/msp430/msp430-devices.c | 1110 - gcc/config/msp430/msp430-devices.cc | 1110 + gcc/config/msp430/msp430.c | 4521 -- gcc/config/msp430/msp430.cc | 4521 ++ gcc/config/msp430/msp430.h | 4 +- gcc/config/msp430/t-msp430 | 6 +- gcc/config/nds32/nds32-cost.c | 726 - gcc/config/nds32/nds32-cost.cc | 726 + gcc/config/nds32/nds32-doubleword.md | 2 +- .../nds32/{nds32-fp-as-gp.c => nds32-fp-as-gp.cc} | 0 .../{nds32-intrinsic.c => nds32-intrinsic.cc} | 0 gcc/config/nds32/{nds32-isr.c => nds32-isr.cc} | 0 ...{nds32-md-auxiliary.c => nds32-md-auxiliary.cc} | 0 ...manipulation.c => nds32-memory-manipulation.cc} | 0 ...es-auxiliary.c => nds32-pipelines-auxiliary.cc} | 0 .../{nds32-predicates.c => nds32-predicates.cc} | 0 .../{nds32-relax-opt.c => nds32-relax-opt.cc} | 0 gcc/config/nds32/{nds32-utils.c => nds32-utils.cc} | 0 gcc/config/nds32/nds32.c | 5895 --- gcc/config/nds32/nds32.cc | 5895 +++ gcc/config/nds32/nds32.h | 10 +- gcc/config/nds32/predicates.md | 12 +- gcc/config/nds32/t-nds32 | 40 +- gcc/config/{netbsd-d.c => netbsd-d.cc} | 0 gcc/config/{netbsd.c => netbsd.cc} | 0 gcc/config/nios2/nios2.c | 5624 --- gcc/config/nios2/nios2.cc | 5624 +++ gcc/config/nvptx/{mkoffload.c => mkoffload.cc} | 0 gcc/config/nvptx/{nvptx-c.c => nvptx-c.cc} | 0 gcc/config/nvptx/nvptx-protos.h | 2 +- gcc/config/nvptx/nvptx.c | 7011 --- gcc/config/nvptx/nvptx.cc | 7011 +++ gcc/config/nvptx/nvptx.h | 2 +- gcc/config/nvptx/t-nvptx | 4 +- gcc/config/nvptx/t-omp-device | 2 +- gcc/config/{openbsd-d.c => openbsd-d.cc} | 0 gcc/config/or1k/{or1k.c => or1k.cc} | 0 gcc/config/pa/elf.h | 2 +- gcc/config/pa/{pa-d.c => pa-d.cc} | 0 gcc/config/pa/pa-linux.h | 2 +- gcc/config/pa/pa-netbsd.h | 2 +- gcc/config/pa/pa-openbsd.h | 2 +- gcc/config/pa/pa-protos.h | 10 +- gcc/config/pa/pa.c | 11080 ----- gcc/config/pa/pa.cc | 11080 +++++ gcc/config/pa/pa.h | 8 +- gcc/config/pa/pa.md | 8 +- gcc/config/pa/som.h | 2 +- gcc/config/pa/t-pa | 2 +- gcc/config/pdp11/pdp11.c | 2417 - gcc/config/pdp11/pdp11.cc | 2417 + gcc/config/pdp11/pdp11.h | 2 +- gcc/config/pdp11/pdp11.md | 2 +- gcc/config/pdp11/t-pdp11 | 2 +- gcc/config/pru/{pru-passes.c => pru-passes.cc} | 0 gcc/config/pru/{pru-pragma.c => pru-pragma.cc} | 0 gcc/config/pru/{pru.c => pru.cc} | 0 gcc/config/pru/pru.md | 2 +- gcc/config/pru/t-pru | 4 +- .../riscv/{riscv-builtins.c => riscv-builtins.cc} | 0 gcc/config/riscv/{riscv-c.c => riscv-c.cc} | 0 gcc/config/riscv/{riscv-d.c => riscv-d.cc} | 0 gcc/config/riscv/riscv-protos.h | 10 +- ...-shorten-memrefs.c => riscv-shorten-memrefs.cc} | 0 gcc/config/riscv/{riscv-sr.c => riscv-sr.cc} | 0 gcc/config/riscv/riscv.c | 5783 --- gcc/config/riscv/riscv.cc | 5783 +++ gcc/config/riscv/riscv.h | 4 +- gcc/config/riscv/t-riscv | 16 +- gcc/config/rl78/{rl78-c.c => rl78-c.cc} | 0 gcc/config/rl78/rl78.c | 4977 -- gcc/config/rl78/rl78.cc | 4977 ++ gcc/config/rl78/t-rl78 | 2 +- gcc/config/rs6000/aix.h | 4 +- gcc/config/rs6000/aix71.h | 2 +- gcc/config/rs6000/aix72.h | 2 +- gcc/config/rs6000/aix73.h | 2 +- gcc/config/rs6000/altivec.md | 25 - gcc/config/rs6000/darwin.h | 6 +- gcc/config/rs6000/driver-rs6000.c | 638 - gcc/config/rs6000/driver-rs6000.cc | 638 + gcc/config/rs6000/freebsd.h | 4 +- gcc/config/rs6000/freebsd64.h | 4 +- .../rs6000/{host-darwin.c => host-darwin.cc} | 0 .../{host-ppc64-darwin.c => host-ppc64-darwin.cc} | 0 gcc/config/rs6000/lynx.h | 2 +- gcc/config/rs6000/rbtree.c | 242 - gcc/config/rs6000/rbtree.cc | 242 + gcc/config/rs6000/rbtree.h | 2 +- gcc/config/rs6000/rs6000-c.c | 2076 - gcc/config/rs6000/rs6000-c.cc | 2076 + gcc/config/rs6000/rs6000-call.c | 6521 --- gcc/config/rs6000/rs6000-call.cc | 6521 +++ gcc/config/rs6000/rs6000-cpus.def | 2 +- gcc/config/rs6000/{rs6000-d.c => rs6000-d.cc} | 0 gcc/config/rs6000/rs6000-gen-builtins.c | 3076 -- gcc/config/rs6000/rs6000-gen-builtins.cc | 3076 ++ gcc/config/rs6000/rs6000-internal.h | 10 +- .../rs6000/{rs6000-linux.c => rs6000-linux.cc} | 0 gcc/config/rs6000/rs6000-logue.c | 5689 --- gcc/config/rs6000/rs6000-logue.cc | 5689 +++ gcc/config/rs6000/rs6000-overload.def | 10 +- gcc/config/rs6000/rs6000-p8swap.c | 2788 -- gcc/config/rs6000/rs6000-p8swap.cc | 2788 ++ .../{rs6000-pcrel-opt.c => rs6000-pcrel-opt.cc} | 0 gcc/config/rs6000/rs6000-protos.h | 6 +- .../rs6000/{rs6000-string.c => rs6000-string.cc} | 0 gcc/config/rs6000/rs6000.c | 28942 ----------- gcc/config/rs6000/rs6000.cc | 28942 +++++++++++ gcc/config/rs6000/rs6000.h | 16 +- gcc/config/rs6000/rs6000.md | 2 +- gcc/config/rs6000/sysv4.h | 2 +- gcc/config/rs6000/t-linux | 2 +- gcc/config/rs6000/t-linux64 | 2 +- gcc/config/rs6000/t-rs6000 | 34 +- gcc/config/rs6000/vsx.md | 32 +- gcc/config/rs6000/x-darwin | 2 +- gcc/config/rs6000/x-darwin64 | 2 +- gcc/config/rs6000/x-rs6000 | 2 +- gcc/config/rs6000/xcoff.h | 4 +- gcc/config/rx/rx.c | 3812 -- gcc/config/rx/rx.cc | 3812 ++ gcc/config/s390/constraints.md | 8 +- gcc/config/s390/driver-native.c | 189 - gcc/config/s390/driver-native.cc | 189 + gcc/config/s390/htmxlintrin.h | 2 +- gcc/config/s390/s390-builtins.def | 8 +- gcc/config/s390/s390-builtins.h | 2 +- gcc/config/s390/s390-c.c | 1100 - gcc/config/s390/s390-c.cc | 1100 + gcc/config/s390/{s390-d.c => s390-d.cc} | 0 gcc/config/s390/s390-opts.h | 2 +- gcc/config/s390/s390-protos.h | 10 +- gcc/config/s390/s390.c | 17478 ------- gcc/config/s390/s390.cc | 17478 +++++++ gcc/config/s390/s390.h | 8 +- gcc/config/s390/s390.md | 8 +- gcc/config/s390/t-s390 | 6 +- gcc/config/s390/vx-builtins.md | 2 +- gcc/config/s390/x-native | 2 +- gcc/config/sh/divtab-sh4-300.c | 77 - gcc/config/sh/divtab-sh4-300.cc | 77 + gcc/config/sh/divtab-sh4.c | 85 - gcc/config/sh/divtab-sh4.cc | 85 + gcc/config/sh/divtab.c | 203 - gcc/config/sh/divtab.cc | 203 + gcc/config/sh/elf.h | 2 +- gcc/config/sh/{sh-c.c => sh-c.cc} | 0 gcc/config/sh/sh-protos.h | 2 +- gcc/config/sh/sh.c | 12630 ----- gcc/config/sh/sh.cc | 12630 +++++ gcc/config/sh/sh.h | 8 +- gcc/config/sh/t-sh | 4 +- gcc/config/{sol2-c.c => sol2-c.cc} | 0 gcc/config/{sol2-cxx.c => sol2-cxx.cc} | 0 gcc/config/{sol2-d.c => sol2-d.cc} | 0 gcc/config/sol2-protos.h | 6 +- gcc/config/{sol2-stubs.c => sol2-stubs.cc} | 0 gcc/config/{sol2.c => sol2.cc} | 0 gcc/config/sol2.h | 4 +- gcc/config/sparc/driver-sparc.c | 169 - gcc/config/sparc/driver-sparc.cc | 169 + gcc/config/sparc/freebsd.h | 4 +- gcc/config/sparc/{sparc-c.c => sparc-c.cc} | 0 gcc/config/sparc/{sparc-d.c => sparc-d.cc} | 0 gcc/config/sparc/sparc-protos.h | 2 +- gcc/config/sparc/sparc.c | 13958 ------ gcc/config/sparc/sparc.cc | 13958 ++++++ gcc/config/sparc/sparc.h | 6 +- gcc/config/sparc/sparc.md | 4 +- gcc/config/sparc/t-sparc | 4 +- gcc/config/sparc/x-sparc | 2 +- gcc/config/stormy16/stormy16.c | 2749 -- gcc/config/stormy16/stormy16.cc | 2749 ++ gcc/config/t-darwin | 10 +- gcc/config/t-dragonfly | 2 +- gcc/config/t-freebsd | 2 +- gcc/config/t-glibc | 4 +- gcc/config/t-linux | 2 +- gcc/config/t-netbsd | 4 +- gcc/config/t-openbsd | 2 +- gcc/config/t-pnt16-warn | 2 +- gcc/config/t-sol2 | 10 +- gcc/config/t-vxworks | 4 +- gcc/config/t-winnt | 2 +- gcc/config/tilegx/{mul-tables.c => mul-tables.cc} | 0 gcc/config/tilegx/t-tilegx | 6 +- gcc/config/tilegx/tilegx-c.c | 55 - gcc/config/tilegx/tilegx-c.cc | 55 + gcc/config/tilegx/tilegx-protos.h | 2 +- gcc/config/tilegx/{tilegx.c => tilegx.cc} | 0 gcc/config/tilegx/tilegx.md | 2 +- gcc/config/tilepro/{mul-tables.c => mul-tables.cc} | 0 gcc/config/tilepro/t-tilepro | 6 +- gcc/config/tilepro/tilepro-c.c | 54 - gcc/config/tilepro/tilepro-c.cc | 54 + gcc/config/tilepro/{tilepro.c => tilepro.cc} | 0 gcc/config/v850/t-v850 | 4 +- gcc/config/v850/{v850-c.c => v850-c.cc} | 0 gcc/config/v850/v850-protos.h | 2 +- gcc/config/v850/v850.c | 3343 -- gcc/config/v850/v850.cc | 3343 ++ gcc/config/v850/v850.h | 6 +- gcc/config/vax/vax.c | 2212 - gcc/config/vax/vax.cc | 2212 + gcc/config/vax/vax.h | 2 +- gcc/config/vax/vax.md | 2 +- gcc/config/visium/visium.c | 4332 -- gcc/config/visium/visium.cc | 4332 ++ gcc/config/visium/visium.h | 6 +- gcc/config/vms/t-vms | 6 +- gcc/config/vms/{vms-c.c => vms-c.cc} | 0 gcc/config/vms/vms-crtlmap.map | 2 +- gcc/config/vms/{vms-f.c => vms-f.cc} | 0 gcc/config/vms/vms-protos.h | 4 +- gcc/config/vms/{vms.c => vms.cc} | 0 gcc/config/vx-common.h | 2 +- gcc/config/{vxworks-c.c => vxworks-c.cc} | 0 gcc/config/{vxworks.c => vxworks.cc} | 0 gcc/config/{winnt-c.c => winnt-c.cc} | 0 gcc/config/x-darwin | 2 +- gcc/config/x-hpux | 4 +- gcc/config/x-linux | 2 +- gcc/config/x-netbsd | 4 +- gcc/config/x-openbsd | 4 +- gcc/config/x-solaris | 2 +- gcc/config/xtensa/xtensa-protos.h | 2 +- gcc/config/xtensa/xtensa.c | 4489 -- gcc/config/xtensa/xtensa.cc | 4489 ++ gcc/config/xtensa/xtensa.h | 2 +- gcc/configure | 14 +- gcc/configure.ac | 14 +- gcc/context.c | 44 - gcc/context.cc | 44 + gcc/{convert.c => convert.cc} | 0 gcc/convert.h | 2 +- gcc/coretypes.h | 2 +- gcc/coverage.c | 1392 - gcc/coverage.cc | 1392 + gcc/coverage.h | 2 +- gcc/cp/ChangeLog | 177 + gcc/cp/Make-lang.in | 8 +- gcc/cp/{call.c => call.cc} | 0 gcc/cp/{class.c => class.cc} | 0 gcc/cp/config-lang.in | 34 +- gcc/cp/constexpr.c | 9561 ---- gcc/cp/constexpr.cc | 9561 ++++ gcc/cp/coroutines.cc | 2 +- gcc/cp/cp-gimplify.c | 3280 -- gcc/cp/cp-gimplify.cc | 3285 ++ gcc/cp/cp-lang.c | 291 - gcc/cp/cp-lang.cc | 291 + gcc/cp/cp-objcp-common.c | 594 - gcc/cp/cp-objcp-common.cc | 594 + gcc/cp/cp-objcp-common.h | 4 +- gcc/cp/cp-tree.h | 84 +- gcc/cp/{cp-ubsan.c => cp-ubsan.cc} | 0 gcc/cp/cvt.c | 2153 - gcc/cp/cvt.cc | 2153 + gcc/cp/{cxx-pretty-print.c => cxx-pretty-print.cc} | 0 gcc/cp/decl.c | 18355 ------- gcc/cp/decl.cc | 18355 +++++++ gcc/cp/decl2.c | 5992 --- gcc/cp/decl2.cc | 5992 +++ gcc/cp/{dump.c => dump.cc} | 0 gcc/cp/error.c | 4659 -- gcc/cp/error.cc | 4659 ++ gcc/cp/{except.c => except.cc} | 0 gcc/cp/{expr.c => expr.cc} | 0 gcc/cp/{friend.c => friend.cc} | 0 gcc/cp/{g++spec.c => g++spec.cc} | 0 gcc/cp/init.c | 5354 -- gcc/cp/init.cc | 5354 ++ gcc/cp/{lambda.c => lambda.cc} | 0 gcc/cp/lang-specs.h | 2 +- gcc/cp/{lex.c => lex.cc} | 0 gcc/cp/{mangle.c => mangle.cc} | 0 gcc/cp/method.c | 3506 -- gcc/cp/method.cc | 3506 ++ gcc/cp/module.cc | 4 +- gcc/cp/name-lookup.c | 8944 ---- gcc/cp/name-lookup.cc | 8944 ++++ gcc/cp/name-lookup.h | 2 +- gcc/cp/{optimize.c => optimize.cc} | 0 gcc/cp/parser.c | 48319 ------------------- gcc/cp/parser.cc | 48319 +++++++++++++++++++ gcc/cp/parser.h | 2 +- gcc/cp/pt.c | 30596 ------------ gcc/cp/pt.cc | 30596 ++++++++++++ gcc/cp/{ptree.c => ptree.cc} | 0 gcc/cp/{rtti.c => rtti.cc} | 0 gcc/cp/{search.c => search.cc} | 0 gcc/cp/semantics.c | 12340 ----- gcc/cp/semantics.cc | 12340 +++++ gcc/cp/tree.c | 6180 --- gcc/cp/tree.cc | 6180 +++ gcc/cp/typeck.c | 11350 ----- gcc/cp/typeck.cc | 11350 +++++ gcc/cp/{typeck2.c => typeck2.cc} | 0 gcc/cp/vtable-class-hierarchy.c | 1346 - gcc/cp/vtable-class-hierarchy.cc | 1346 + gcc/{cppbuiltin.c => cppbuiltin.cc} | 0 gcc/{cppdefault.c => cppdefault.cc} | 0 gcc/cppdefault.h | 2 +- gcc/cprop.c | 1974 - gcc/cprop.cc | 1974 + gcc/cse.c | 7736 --- gcc/cse.cc | 7736 +++ gcc/{cselib.c => cselib.cc} | 0 gcc/{ctfc.c => ctfc.cc} | 0 gcc/ctfc.h | 4 +- gcc/{ctfout.c => ctfout.cc} | 0 gcc/d/ChangeLog | 6 + gcc/d/d-gimplify.cc | 2 +- gcc/d/d-incpath.cc | 2 +- gcc/d/lang-specs.h | 2 +- gcc/{data-streamer-in.c => data-streamer-in.cc} | 0 gcc/{data-streamer-out.c => data-streamer-out.cc} | 0 gcc/{data-streamer.c => data-streamer.cc} | 0 gcc/data-streamer.h | 6 +- gcc/{dbgcnt.c => dbgcnt.cc} | 0 gcc/dbgcnt.def | 2 +- gcc/dbxout.c | 3936 -- gcc/dbxout.cc | 3936 ++ gcc/dbxout.h | 2 +- gcc/{dce.c => dce.cc} | 0 gcc/{ddg.c => ddg.cc} | 0 gcc/{debug.c => debug.cc} | 0 gcc/debug.h | 8 +- gcc/df-core.c | 2472 - gcc/df-core.cc | 2472 + gcc/{df-problems.c => df-problems.cc} | 0 gcc/df-scan.c | 4252 -- gcc/df-scan.cc | 4252 ++ gcc/df.h | 8 +- gcc/dfp.c | 745 - gcc/dfp.cc | 745 + gcc/diagnostic-color.c | 348 - gcc/diagnostic-color.cc | 348 + gcc/diagnostic-event-id.h | 2 +- gcc/diagnostic-show-locus.c | 5703 --- gcc/diagnostic-show-locus.cc | 5703 +++ gcc/diagnostic-spec.c | 196 - gcc/diagnostic-spec.cc | 205 + gcc/diagnostic-spec.h | 6 +- gcc/diagnostic.c | 2456 - gcc/diagnostic.cc | 2488 + gcc/diagnostic.h | 4 + gcc/doc/avr-mmcu.texi | 4 +- gcc/doc/cfg.texi | 2 +- gcc/doc/contrib.texi | 2 +- gcc/doc/cppinternals.texi | 6 +- gcc/doc/extend.texi | 2 +- gcc/doc/generic.texi | 8 +- gcc/doc/gimple.texi | 8 +- gcc/doc/gty.texi | 8 +- gcc/doc/install.texi | 2 +- gcc/doc/invoke.texi | 128 +- gcc/doc/loop.texi | 2 +- gcc/doc/lto.texi | 40 +- gcc/doc/match-and-simplify.texi | 2 +- gcc/doc/md.texi | 15 +- gcc/doc/optinfo.texi | 4 +- gcc/doc/options.texi | 2 +- gcc/doc/passes.texi | 288 +- gcc/doc/plugins.texi | 4 +- gcc/doc/rtl.texi | 10 +- gcc/doc/sourcebuild.texi | 6 +- gcc/doc/tm.texi | 46 +- gcc/doc/tm.texi.in | 28 +- gcc/doc/tree-ssa.texi | 6 +- gcc/dojump.c | 1300 - gcc/dojump.cc | 1300 + gcc/dojump.h | 2 +- gcc/{dominance.c => dominance.cc} | 0 gcc/{domwalk.c => domwalk.cc} | 0 gcc/{double-int.c => double-int.cc} | 0 gcc/{dse.c => dse.cc} | 0 gcc/dumpfile.c | 2778 -- gcc/dumpfile.cc | 2778 ++ gcc/dumpfile.h | 18 +- gcc/dwarf2asm.c | 1162 - gcc/dwarf2asm.cc | 1162 + gcc/{dwarf2cfi.c => dwarf2cfi.cc} | 0 gcc/{dwarf2ctf.c => dwarf2ctf.cc} | 0 gcc/dwarf2ctf.h | 4 +- gcc/dwarf2out.c | 33147 ------------- gcc/dwarf2out.cc | 33147 +++++++++++++ gcc/dwarf2out.h | 8 +- gcc/{early-remat.c => early-remat.cc} | 0 gcc/{edit-context.c => edit-context.cc} | 0 gcc/emit-rtl.c | 6596 --- gcc/emit-rtl.cc | 6596 +++ gcc/emit-rtl.h | 6 +- gcc/errors.c | 134 - gcc/errors.cc | 134 + gcc/et-forest.c | 884 - gcc/et-forest.cc | 884 + gcc/except.c | 3522 -- gcc/except.cc | 3522 ++ gcc/explow.c | 2321 - gcc/explow.cc | 2321 + gcc/explow.h | 2 +- gcc/{expmed.c => expmed.cc} | 0 gcc/expmed.h | 2 +- gcc/expr.c | 13145 ----- gcc/expr.cc | 13145 +++++ gcc/{fibonacci_heap.c => fibonacci_heap.cc} | 0 gcc/{file-find.c => file-find.cc} | 0 gcc/file-prefix-map.c | 149 - gcc/file-prefix-map.cc | 149 + gcc/final.c | 4662 -- gcc/final.cc | 4662 ++ gcc/{fixed-value.c => fixed-value.cc} | 0 gcc/fixed-value.h | 2 +- gcc/flag-types.h | 20 +- gcc/{fold-const-call.c => fold-const-call.cc} | 0 gcc/fold-const.c | 16787 ------- gcc/fold-const.cc | 16787 +++++++ gcc/fortran/ChangeLog | 254 + gcc/fortran/{arith.c => arith.cc} | 0 gcc/fortran/{array.c => array.cc} | 0 gcc/fortran/{bbt.c => bbt.cc} | 0 gcc/fortran/check.c | 7523 --- gcc/fortran/check.cc | 7523 +++ gcc/fortran/class.c | 3073 -- gcc/fortran/class.cc | 3073 ++ gcc/fortran/config-lang.in | 2 +- gcc/fortran/{constructor.c => constructor.cc} | 0 gcc/fortran/{convert.c => convert.cc} | 0 gcc/fortran/cpp.c | 1203 - gcc/fortran/cpp.cc | 1203 + gcc/fortran/data.c | 848 - gcc/fortran/data.cc | 848 + gcc/fortran/decl.c | 11910 ----- gcc/fortran/decl.cc | 11910 +++++ gcc/fortran/dependency.c | 2336 - gcc/fortran/dependency.cc | 2336 + .../{dump-parse-tree.c => dump-parse-tree.cc} | 0 gcc/fortran/error.c | 1656 - gcc/fortran/error.cc | 1656 + gcc/fortran/expr.c | 6507 --- gcc/fortran/expr.cc | 6507 +++ gcc/fortran/f95-lang.c | 1306 - gcc/fortran/f95-lang.cc | 1306 + .../{frontend-passes.c => frontend-passes.cc} | 0 gcc/fortran/gfc-internals.texi | 12 +- gcc/fortran/gfortran.h | 86 +- gcc/fortran/gfortranspec.c | 450 - gcc/fortran/gfortranspec.cc | 450 + gcc/fortran/interface.c | 5589 --- gcc/fortran/interface.cc | 5589 +++ gcc/fortran/intrinsic.c | 5503 --- gcc/fortran/intrinsic.cc | 5503 +++ gcc/fortran/{io.c => io.cc} | 0 gcc/fortran/iresolve.c | 4050 -- gcc/fortran/iresolve.cc | 4050 ++ gcc/fortran/iso-c-binding.def | 2 +- gcc/fortran/lang-specs.h | 4 +- gcc/fortran/libgfortran.h | 2 +- gcc/fortran/match.c | 7264 --- gcc/fortran/match.cc | 7264 +++ gcc/fortran/match.h | 28 +- gcc/fortran/matchexp.c | 903 - gcc/fortran/matchexp.cc | 903 + gcc/fortran/mathbuiltins.def | 2 +- gcc/fortran/{misc.c => misc.cc} | 0 gcc/fortran/module.c | 7581 --- gcc/fortran/module.cc | 7581 +++ gcc/fortran/openmp.c | 9410 ---- gcc/fortran/openmp.cc | 9411 ++++ gcc/fortran/{options.c => options.cc} | 0 gcc/fortran/parse.c | 6987 --- gcc/fortran/parse.cc | 6987 +++ gcc/fortran/{primary.c => primary.cc} | 0 gcc/fortran/resolve.c | 17582 ------- gcc/fortran/resolve.cc | 17582 +++++++ gcc/fortran/{scanner.c => scanner.cc} | 0 gcc/fortran/simplify.c | 8966 ---- gcc/fortran/simplify.cc | 8966 ++++ gcc/fortran/{st.c => st.cc} | 0 gcc/fortran/symbol.c | 5251 -- gcc/fortran/symbol.cc | 5251 ++ gcc/fortran/target-memory.c | 806 - gcc/fortran/target-memory.cc | 806 + gcc/fortran/target-memory.h | 2 +- gcc/fortran/trans-array.c | 11714 ----- gcc/fortran/trans-array.cc | 11714 +++++ gcc/fortran/{trans-common.c => trans-common.cc} | 0 gcc/fortran/trans-const.c | 430 - gcc/fortran/trans-const.cc | 430 + gcc/fortran/trans-decl.c | 7956 --- gcc/fortran/trans-decl.cc | 7956 +++ gcc/fortran/trans-expr.c | 12125 ----- gcc/fortran/trans-expr.cc | 12125 +++++ gcc/fortran/trans-intrinsic.c | 12446 ----- gcc/fortran/trans-intrinsic.cc | 12457 +++++ gcc/fortran/trans-io.c | 2686 -- gcc/fortran/trans-io.cc | 2686 ++ gcc/fortran/trans-openmp.c | 7701 --- gcc/fortran/trans-openmp.cc | 7701 +++ gcc/fortran/trans-stmt.c | 7468 --- gcc/fortran/trans-stmt.cc | 7468 +++ gcc/fortran/trans-stmt.h | 8 +- gcc/fortran/trans-types.c | 3838 -- gcc/fortran/trans-types.cc | 3838 ++ gcc/fortran/trans-types.h | 2 +- gcc/fortran/trans.c | 2452 - gcc/fortran/trans.cc | 2452 + gcc/fortran/trans.h | 14 +- gcc/fp-test.c | 251 - gcc/fp-test.cc | 251 + gcc/{function-tests.c => function-tests.cc} | 0 gcc/function.c | 6964 --- gcc/function.cc | 6964 +++ gcc/function.h | 16 +- gcc/fwprop.c | 1079 - gcc/fwprop.cc | 1079 + gcc/{gcc-ar.c => gcc-ar.cc} | 0 gcc/gcc-main.c | 48 - gcc/gcc-main.cc | 48 + gcc/{gcc-rich-location.c => gcc-rich-location.cc} | 0 gcc/gcc-rich-location.h | 2 +- gcc/gcc-symtab.h | 2 +- gcc/gcc.c | 11276 ----- gcc/gcc.cc | 11276 +++++ gcc/gcc.h | 6 +- gcc/gcov-dump.c | 479 - gcc/gcov-dump.cc | 479 + gcc/{gcov-io.c => gcov-io.cc} | 0 gcc/{gcov-tool.c => gcov-tool.cc} | 0 gcc/gcov.c | 3262 -- gcc/gcov.cc | 3262 ++ gcc/gcse-common.c | 222 - gcc/gcse-common.cc | 222 + gcc/gcse.c | 4136 -- gcc/gcse.cc | 4136 ++ gcc/genattr-common.c | 112 - gcc/genattr-common.cc | 112 + gcc/{genattr.c => genattr.cc} | 0 gcc/genattrtab.c | 5417 --- gcc/genattrtab.cc | 5417 +++ gcc/genautomata.c | 9685 ---- gcc/genautomata.cc | 9685 ++++ gcc/{gencfn-macros.c => gencfn-macros.cc} | 0 gcc/{gencheck.c => gencheck.cc} | 0 gcc/{genchecksum.c => genchecksum.cc} | 0 gcc/{gencodes.c => gencodes.cc} | 0 gcc/genconditions.c | 252 - gcc/genconditions.cc | 252 + gcc/{genconfig.c => genconfig.cc} | 0 gcc/genconstants.c | 105 - gcc/genconstants.cc | 105 + gcc/genemit.c | 952 - gcc/genemit.cc | 952 + gcc/{genenums.c => genenums.cc} | 0 gcc/generic-match-head.c | 101 - gcc/generic-match-head.cc | 101 + gcc/genextract.c | 507 - gcc/genextract.cc | 507 + gcc/{genflags.c => genflags.cc} | 0 gcc/gengenrtl.c | 358 - gcc/gengenrtl.cc | 358 + gcc/gengtype-parse.c | 1179 - gcc/gengtype-parse.cc | 1179 + gcc/gengtype-state.c | 2661 - gcc/gengtype-state.cc | 2661 + gcc/gengtype.c | 5405 --- gcc/gengtype.cc | 5399 +++ gcc/gengtype.h | 8 +- gcc/{genhooks.c => genhooks.cc} | 0 gcc/genmatch.c | 5257 -- gcc/genmatch.cc | 5257 ++ gcc/genmddeps.c | 71 - gcc/genmddeps.cc | 71 + gcc/{genmddump.c => genmddump.cc} | 0 gcc/genmodes.c | 2068 - gcc/genmodes.cc | 2068 + gcc/{genopinit.c => genopinit.cc} | 0 gcc/{genoutput.c => genoutput.cc} | 0 gcc/genpeep.c | 418 - gcc/genpeep.cc | 418 + gcc/genpreds.c | 1682 - gcc/genpreds.cc | 1682 + gcc/genrecog.c | 5447 --- gcc/genrecog.cc | 5447 +++ gcc/gensupport.c | 3316 -- gcc/gensupport.cc | 3316 ++ gcc/gensupport.h | 12 +- gcc/{gentarget-def.c => gentarget-def.cc} | 0 gcc/{genversion.c => genversion.cc} | 0 gcc/{ggc-common.c => ggc-common.cc} | 0 gcc/ggc-internal.h | 2 +- gcc/{ggc-none.c => ggc-none.cc} | 0 gcc/{ggc-page.c => ggc-page.cc} | 0 gcc/{ggc-tests.c => ggc-tests.cc} | 0 gcc/{gimple-builder.c => gimple-builder.cc} | 0 gcc/{gimple-expr.c => gimple-expr.cc} | 0 gcc/gimple-fold.c | 9123 ---- gcc/gimple-fold.cc | 9123 ++++ gcc/gimple-fold.h | 2 +- gcc/{gimple-iterator.c => gimple-iterator.cc} | 0 gcc/{gimple-laddress.c => gimple-laddress.cc} | 0 gcc/{gimple-loop-jam.c => gimple-loop-jam.cc} | 0 gcc/gimple-low.c | 947 - gcc/gimple-low.cc | 947 + gcc/gimple-match-head.c | 1394 - gcc/gimple-match-head.cc | 1394 + ...imple-pretty-print.c => gimple-pretty-print.cc} | 0 gcc/gimple-pretty-print.h | 2 +- ...imple-ssa-backprop.c => gimple-ssa-backprop.cc} | 0 ...a-evrp-analyze.c => gimple-ssa-evrp-analyze.cc} | 0 gcc/{gimple-ssa-evrp.c => gimple-ssa-evrp.cc} | 0 ...isolate-paths.c => gimple-ssa-isolate-paths.cc} | 0 ...ull-compare.c => gimple-ssa-nonnull-compare.cc} | 0 ...ssa-split-paths.c => gimple-ssa-split-paths.cc} | 0 gcc/gimple-ssa-sprintf.c | 4728 -- gcc/gimple-ssa-sprintf.cc | 4728 ++ ...store-merging.c => gimple-ssa-store-merging.cc} | 0 ...eduction.c => gimple-ssa-strength-reduction.cc} | 0 gcc/gimple-ssa-warn-access.cc | 992 +- ...ssa-warn-alloca.c => gimple-ssa-warn-alloca.cc} | 0 ...warn-restrict.c => gimple-ssa-warn-restrict.cc} | 0 ...{gimple-streamer-in.c => gimple-streamer-in.cc} | 0 ...imple-streamer-out.c => gimple-streamer-out.cc} | 0 gcc/gimple-streamer.h | 4 +- gcc/{gimple-walk.c => gimple-walk.cc} | 0 ...e-warn-recursion.c => gimple-warn-recursion.cc} | 0 gcc/{gimple.c => gimple.cc} | 0 gcc/gimple.h | 4 +- gcc/{gimplify-me.c => gimplify-me.cc} | 0 gcc/gimplify.c | 16582 ------- gcc/gimplify.cc | 16582 +++++++ gcc/go/ChangeLog | 17 + gcc/go/config-lang.in | 2 +- gcc/go/go-backend.c | 194 - gcc/go/go-backend.cc | 194 + gcc/go/go-lang.c | 638 - gcc/go/go-lang.cc | 638 + gcc/go/gospec.c | 466 - gcc/go/gospec.cc | 466 + gcc/go/lang-specs.h | 2 +- gcc/{godump.c => godump.cc} | 0 gcc/{graph.c => graph.cc} | 0 gcc/{graphds.c => graphds.cc} | 0 ...phite-dependences.c => graphite-dependences.cc} | 0 gcc/graphite-isl-ast-to-gimple.c | 1556 - gcc/graphite-isl-ast-to-gimple.cc | 1556 + ...ite-optimize-isl.c => graphite-optimize-isl.cc} | 0 gcc/{graphite-poly.c => graphite-poly.cc} | 0 ...scop-detection.c => graphite-scop-detection.cc} | 0 ...ite-sese-to-poly.c => graphite-sese-to-poly.cc} | 0 gcc/{graphite.c => graphite.cc} | 0 gcc/haifa-sched.c | 9263 ---- gcc/haifa-sched.cc | 9263 ++++ gcc/{hash-map-tests.c => hash-map-tests.cc} | 0 gcc/{hash-set-tests.c => hash-set-tests.cc} | 0 gcc/{hash-table.c => hash-table.cc} | 0 gcc/{hooks.c => hooks.cc} | 0 gcc/{host-default.c => host-default.cc} | 0 gcc/{hw-doloop.c => hw-doloop.cc} | 0 gcc/{hwint.c => hwint.cc} | 0 gcc/ifcvt.c | 5694 --- gcc/ifcvt.cc | 5694 +++ gcc/{inchash.c => inchash.cc} | 0 gcc/inchash.h | 2 +- gcc/incpath.c | 536 - gcc/incpath.cc | 536 + gcc/{init-regs.c => init-regs.cc} | 0 gcc/input.c | 3932 -- gcc/input.cc | 3932 ++ gcc/input.h | 4 +- gcc/internal-fn.c | 4429 -- gcc/internal-fn.cc | 4451 ++ gcc/internal-fn.def | 3 + gcc/internal-fn.h | 1 + gcc/{intl.c => intl.cc} | 0 gcc/{ipa-comdats.c => ipa-comdats.cc} | 0 gcc/ipa-cp.c | 6639 --- gcc/ipa-cp.cc | 6639 +++ gcc/{ipa-devirt.c => ipa-devirt.cc} | 0 gcc/ipa-fnsummary.c | 4972 -- gcc/ipa-fnsummary.cc | 4972 ++ gcc/ipa-fnsummary.h | 6 +- gcc/ipa-free-lang-data.cc | 4 +- gcc/{ipa-icf-gimple.c => ipa-icf-gimple.cc} | 0 gcc/{ipa-icf.c => ipa-icf.cc} | 0 ...pa-inline-analysis.c => ipa-inline-analysis.cc} | 0 ...-inline-transform.c => ipa-inline-transform.cc} | 0 gcc/ipa-inline.c | 3158 -- gcc/ipa-inline.cc | 3158 ++ gcc/ipa-inline.h | 6 +- gcc/{ipa-modref-tree.c => ipa-modref-tree.cc} | 0 gcc/ipa-modref.c | 5509 --- gcc/ipa-modref.cc | 5509 +++ gcc/ipa-param-manipulation.c | 2401 - gcc/ipa-param-manipulation.cc | 2401 + ...-polymorphic-call.c => ipa-polymorphic-call.cc} | 0 gcc/{ipa-predicate.c => ipa-predicate.cc} | 0 gcc/{ipa-profile.c => ipa-profile.cc} | 0 gcc/ipa-prop.c | 6088 --- gcc/ipa-prop.cc | 6088 +++ gcc/ipa-prop.h | 8 +- gcc/{ipa-pure-const.c => ipa-pure-const.cc} | 0 gcc/{ipa-ref.c => ipa-ref.cc} | 0 gcc/ipa-reference.c | 1341 - gcc/ipa-reference.cc | 1341 + gcc/ipa-reference.h | 2 +- gcc/ipa-split.c | 1982 - gcc/ipa-split.cc | 2000 + gcc/ipa-sra.c | 4148 -- gcc/ipa-sra.cc | 4148 ++ gcc/{ipa-utils.c => ipa-utils.cc} | 0 gcc/ipa-utils.h | 8 +- gcc/{ipa-visibility.c => ipa-visibility.cc} | 0 gcc/{ipa.c => ipa.cc} | 0 gcc/ira-build.c | 3568 -- gcc/ira-build.cc | 3568 ++ gcc/ira-color.c | 5274 -- gcc/ira-color.cc | 5274 ++ gcc/ira-conflicts.c | 895 - gcc/ira-conflicts.cc | 895 + gcc/{ira-costs.c => ira-costs.cc} | 0 gcc/{ira-emit.c => ira-emit.cc} | 0 gcc/ira-int.h | 20 +- gcc/ira-lives.c | 1765 - gcc/ira-lives.cc | 1765 + gcc/ira.c | 6132 --- gcc/ira.cc | 6132 +++ gcc/ira.h | 4 +- gcc/jit/ChangeLog | 156 + gcc/jit/config-lang.in | 2 +- gcc/jit/docs/_build/texinfo/libgccjit.texi | 18 +- gcc/jit/docs/internals/index.rst | 12 +- gcc/jit/{dummy-frontend.c => dummy-frontend.cc} | 0 gcc/jit/jit-builtins.c | 707 - gcc/jit/jit-builtins.cc | 707 + gcc/jit/{jit-logging.c => jit-logging.cc} | 0 gcc/jit/jit-playback.c | 3618 -- gcc/jit/jit-playback.cc | 3618 ++ gcc/jit/jit-recording.c | 7537 --- gcc/jit/jit-recording.cc | 7537 +++ gcc/jit/jit-recording.h | 2 +- gcc/jit/{jit-result.c => jit-result.cc} | 0 gcc/jit/{jit-spec.c => jit-spec.cc} | 0 gcc/jit/{jit-tempdir.c => jit-tempdir.cc} | 0 gcc/jit/{jit-w32.c => jit-w32.cc} | 0 gcc/jit/libgccjit.c | 4172 -- gcc/jit/libgccjit.cc | 4172 ++ gcc/jit/notes.txt | 6 +- gcc/{jump.c => jump.cc} | 0 gcc/langhooks.c | 952 - gcc/langhooks.cc | 952 + gcc/langhooks.h | 12 +- gcc/{lcm.c => lcm.cc} | 0 gcc/libfuncs.h | 2 +- gcc/{lists.c => lists.cc} | 0 gcc/loop-doloop.c | 807 - gcc/loop-doloop.cc | 807 + gcc/loop-init.c | 653 - gcc/loop-init.cc | 653 + gcc/loop-invariant.c | 2322 - gcc/loop-invariant.cc | 2322 + gcc/{loop-iv.c => loop-iv.cc} | 0 gcc/{loop-unroll.c => loop-unroll.cc} | 0 gcc/{lower-subreg.c => lower-subreg.cc} | 0 gcc/lower-subreg.h | 2 +- gcc/{lra-assigns.c => lra-assigns.cc} | 0 gcc/{lra-coalesce.c => lra-coalesce.cc} | 0 gcc/lra-constraints.c | 7380 --- gcc/lra-constraints.cc | 7380 +++ gcc/{lra-eliminations.c => lra-eliminations.cc} | 0 gcc/lra-int.h | 24 +- gcc/{lra-lives.c => lra-lives.cc} | 0 gcc/{lra-remat.c => lra-remat.cc} | 0 gcc/lra-spills.c | 880 - gcc/lra-spills.cc | 880 + gcc/lra.c | 2523 - gcc/lra.cc | 2523 + gcc/{lto-cgraph.c => lto-cgraph.cc} | 0 gcc/{lto-compress.c => lto-compress.cc} | 0 gcc/lto-compress.h | 2 +- gcc/{lto-opts.c => lto-opts.cc} | 0 gcc/{lto-section-in.c => lto-section-in.cc} | 0 gcc/{lto-section-out.c => lto-section-out.cc} | 0 gcc/{lto-streamer-in.c => lto-streamer-in.cc} | 0 gcc/lto-streamer-out.c | 3363 -- gcc/lto-streamer-out.cc | 3363 ++ gcc/{lto-streamer.c => lto-streamer.cc} | 0 gcc/lto-streamer.h | 22 +- gcc/lto-wrapper.c | 2146 - gcc/lto-wrapper.cc | 2146 + gcc/lto/ChangeLog | 32 + gcc/lto/{common.c => common.cc} | 0 gcc/lto/config-lang.in | 2 +- gcc/lto/lang-specs.h | 2 +- gcc/lto/lto-common.c | 3106 -- gcc/lto/lto-common.cc | 3106 ++ gcc/lto/lto-common.h | 2 +- gcc/lto/lto-dump.c | 379 - gcc/lto/lto-dump.cc | 379 + gcc/lto/lto-lang.c | 1490 - gcc/lto/lto-lang.cc | 1490 + gcc/lto/{lto-object.c => lto-object.cc} | 0 gcc/lto/{lto-partition.c => lto-partition.cc} | 0 gcc/lto/{lto-symtab.c => lto-symtab.cc} | 0 gcc/lto/lto.c | 670 - gcc/lto/lto.cc | 670 + gcc/lto/lto.h | 4 +- gcc/machmode.def | 2 +- gcc/machmode.h | 4 +- gcc/main.c | 45 - gcc/main.cc | 45 + gcc/match.pd | 16 +- gcc/{mcf.c => mcf.cc} | 0 gcc/{mode-switching.c => mode-switching.cc} | 0 gcc/{modulo-sched.c => modulo-sched.cc} | 0 gcc/{multiple_target.c => multiple_target.cc} | 0 gcc/objc/ChangeLog | 36 + gcc/objc/Make-lang.in | 2 +- gcc/objc/config-lang.in | 2 +- gcc/objc/lang-specs.h | 2 +- gcc/objc/objc-act.c | 10365 ---- gcc/objc/objc-act.cc | 10365 ++++ gcc/objc/objc-act.h | 4 +- gcc/objc/{objc-encoding.c => objc-encoding.cc} | 0 gcc/objc/objc-gnu-runtime-abi-01.c | 2262 - gcc/objc/objc-gnu-runtime-abi-01.cc | 2262 + gcc/objc/objc-lang.c | 56 - gcc/objc/objc-lang.cc | 56 + gcc/objc/objc-map.c | 160 - gcc/objc/objc-map.cc | 160 + gcc/objc/objc-next-runtime-abi-01.c | 2957 -- gcc/objc/objc-next-runtime-abi-01.cc | 2957 ++ ...untime-abi-02.c => objc-next-runtime-abi-02.cc} | 0 gcc/objc/objc-runtime-shared-support.c | 718 - gcc/objc/objc-runtime-shared-support.cc | 718 + gcc/objc/objc-runtime-shared-support.h | 2 +- gcc/objcp/ChangeLog | 15 + gcc/objcp/Make-lang.in | 16 +- gcc/objcp/config-lang.in | 16 +- gcc/objcp/lang-specs.h | 2 +- gcc/objcp/objcp-decl.c | 115 - gcc/objcp/objcp-decl.cc | 115 + gcc/objcp/objcp-lang.c | 92 - gcc/objcp/objcp-lang.cc | 92 + gcc/omp-builtins.def | 4 +- gcc/omp-expand.c | 10808 ----- gcc/omp-expand.cc | 10808 +++++ gcc/{omp-general.c => omp-general.cc} | 0 gcc/omp-low.c | 14777 ------ gcc/omp-low.cc | 14777 ++++++ gcc/omp-oacc-neuter-broadcast.cc | 12 +- gcc/omp-offload.c | 2823 -- gcc/omp-offload.cc | 2823 ++ gcc/{omp-simd-clone.c => omp-simd-clone.cc} | 0 gcc/{opt-suggestions.c => opt-suggestions.cc} | 0 gcc/{optabs-libfuncs.c => optabs-libfuncs.cc} | 0 gcc/optabs-query.c | 765 - gcc/optabs-query.cc | 765 + gcc/{optabs-tree.c => optabs-tree.cc} | 0 gcc/{optabs.c => optabs.cc} | 0 gcc/optabs.def | 1 + gcc/optc-gen.awk | 2 +- gcc/optc-save-gen.awk | 2 +- gcc/optinfo-emit-json.cc | 4 +- gcc/opts-common.c | 1857 - gcc/opts-common.cc | 1857 + gcc/{opts-global.c => opts-global.cc} | 0 gcc/{opts.c => opts.cc} | 0 gcc/output.h | 16 +- gcc/pass_manager.h | 2 +- gcc/{passes.c => passes.cc} | 0 gcc/passes.def | 5 +- gcc/plugin.c | 1011 - gcc/plugin.cc | 1011 + gcc/plugin.def | 2 +- gcc/plugin.h | 2 +- gcc/po/ChangeLog | 4 + gcc/po/EXCLUDES | 44 +- gcc/pointer-query.cc | 2 +- gcc/postreload-gcse.c | 1466 - gcc/postreload-gcse.cc | 1466 + gcc/{postreload.c => postreload.cc} | 0 gcc/{predict.c => predict.cc} | 0 gcc/predict.h | 4 +- gcc/{prefix.c => prefix.cc} | 0 gcc/prefix.h | 2 +- gcc/{pretty-print.c => pretty-print.cc} | 0 ...{print-rtl-function.c => print-rtl-function.cc} | 0 gcc/{print-rtl.c => print-rtl.cc} | 0 gcc/{print-tree.c => print-tree.cc} | 0 gcc/{profile-count.c => profile-count.cc} | 0 gcc/{profile.c => profile.cc} | 0 gcc/profile.h | 2 +- gcc/read-md.c | 1363 - gcc/read-md.cc | 1363 + gcc/read-md.h | 10 +- gcc/read-rtl-function.c | 2230 - gcc/read-rtl-function.cc | 2230 + gcc/read-rtl.c | 2092 - gcc/read-rtl.cc | 2092 + gcc/real.c | 5560 --- gcc/real.cc | 5560 +++ gcc/real.h | 8 +- gcc/{realmpfr.c => realmpfr.cc} | 0 gcc/recog.c | 4625 -- gcc/recog.cc | 4625 ++ gcc/recog.h | 8 +- gcc/{ree.c => ree.cc} | 0 gcc/reg-notes.def | 2 +- gcc/reg-stack.c | 3484 -- gcc/reg-stack.cc | 3484 ++ gcc/{regcprop.c => regcprop.cc} | 0 gcc/{reginfo.c => reginfo.cc} | 0 gcc/{regrename.c => regrename.cc} | 0 gcc/regs.h | 2 +- gcc/regset.h | 2 +- gcc/{regstat.c => regstat.cc} | 0 gcc/reload.c | 7364 --- gcc/reload.cc | 7364 +++ gcc/reload.h | 8 +- gcc/reload1.c | 9069 ---- gcc/reload1.cc | 9069 ++++ gcc/reorg.c | 3939 -- gcc/reorg.cc | 3939 ++ gcc/{resource.c => resource.cc} | 0 gcc/{rtl-error.c => rtl-error.cc} | 0 gcc/{rtl-tests.c => rtl-tests.cc} | 0 gcc/{rtl.c => rtl.cc} | 0 gcc/rtl.def | 12 +- gcc/rtl.h | 146 +- gcc/rtlanal.c | 6992 --- gcc/rtlanal.cc | 6992 +++ gcc/rtlanal.h | 4 +- gcc/{rtlhash.c => rtlhash.cc} | 0 gcc/{rtlhooks.c => rtlhooks.cc} | 0 ...{rtx-vector-builder.c => rtx-vector-builder.cc} | 0 gcc/run-rtl-passes.c | 79 - gcc/run-rtl-passes.cc | 79 + gcc/{sancov.c => sancov.cc} | 0 gcc/sanitizer.def | 6 +- gcc/{sanopt.c => sanopt.cc} | 0 gcc/{sbitmap.c => sbitmap.cc} | 0 gcc/sched-deps.c | 4954 -- gcc/sched-deps.cc | 4954 ++ gcc/sched-ebb.c | 734 - gcc/sched-ebb.cc | 734 + gcc/sched-int.h | 28 +- gcc/sched-rgn.c | 3956 -- gcc/sched-rgn.cc | 3956 ++ gcc/sel-sched-dump.c | 1027 - gcc/sel-sched-dump.cc | 1027 + gcc/sel-sched-dump.h | 2 +- gcc/sel-sched-ir.c | 6461 --- gcc/sel-sched-ir.cc | 6461 +++ gcc/sel-sched-ir.h | 8 +- gcc/sel-sched.c | 7725 --- gcc/sel-sched.cc | 7725 +++ ...elftest-diagnostic.c => selftest-diagnostic.cc} | 0 gcc/{selftest-rtl.c => selftest-rtl.cc} | 0 gcc/selftest-run-tests.c | 135 - gcc/selftest-run-tests.cc | 135 + gcc/{selftest.c => selftest.cc} | 0 gcc/selftest.h | 4 +- gcc/{sese.c => sese.cc} | 0 gcc/shrink-wrap.c | 1890 - gcc/shrink-wrap.cc | 1890 + gcc/shrink-wrap.h | 2 +- gcc/simplify-rtx.c | 8471 ---- gcc/simplify-rtx.cc | 8471 ++++ gcc/{sparseset.c => sparseset.cc} | 0 gcc/{spellcheck-tree.c => spellcheck-tree.cc} | 0 gcc/spellcheck-tree.h | 2 +- gcc/{spellcheck.c => spellcheck.cc} | 0 gcc/spellcheck.h | 2 +- gcc/{sreal.c => sreal.cc} | 0 gcc/{stack-ptr-mod.c => stack-ptr-mod.cc} | 0 gcc/{statistics.c => statistics.cc} | 0 gcc/statistics.h | 2 +- gcc/stmt.c | 1119 - gcc/stmt.cc | 1119 + gcc/stmt.h | 2 +- gcc/{stor-layout.c => stor-layout.cc} | 0 gcc/stor-layout.h | 2 +- gcc/{store-motion.c => store-motion.cc} | 0 gcc/{streamer-hooks.c => streamer-hooks.cc} | 0 gcc/streamer-hooks.h | 2 +- gcc/{stringpool.c => stringpool.cc} | 0 gcc/stringpool.h | 2 +- ...ubstring-locations.c => substring-locations.cc} | 0 gcc/symtab.c | 2586 - gcc/symtab.cc | 2586 + gcc/{target-globals.c => target-globals.cc} | 0 gcc/target.def | 24 +- gcc/target.h | 6 +- gcc/targhooks.c | 2593 - gcc/targhooks.cc | 2593 + gcc/testsuite/ChangeLog | 325 + gcc/testsuite/c-c++-common/Wdangling-pointer-2.c | 437 + gcc/testsuite/c-c++-common/Wdangling-pointer-3.c | 64 + gcc/testsuite/c-c++-common/Wdangling-pointer-4.c | 73 + gcc/testsuite/c-c++-common/Wdangling-pointer-5.c | 90 + gcc/testsuite/c-c++-common/Wdangling-pointer-6.c | 32 + gcc/testsuite/c-c++-common/Wdangling-pointer.c | 434 + gcc/testsuite/c-c++-common/Wuse-after-free-2.c | 169 + gcc/testsuite/c-c++-common/Wuse-after-free-3.c | 83 + gcc/testsuite/c-c++-common/Wuse-after-free-4.c | 102 + gcc/testsuite/c-c++-common/Wuse-after-free-5.c | 103 + gcc/testsuite/c-c++-common/Wuse-after-free-6.c | 105 + gcc/testsuite/c-c++-common/Wuse-after-free-7.c | 103 + gcc/testsuite/c-c++-common/Wuse-after-free.c | 167 + gcc/testsuite/c-c++-common/cpp/line-2.c | 2 +- gcc/testsuite/c-c++-common/cpp/line-3.c | 2 +- gcc/testsuite/g++.dg/cpp1y/pr104031.C | 23 + gcc/testsuite/g++.dg/torture/pr57993-2.C | 22 +- ...nit-pred-loop-1_a.cc => uninit-pred-loop-1_a.C} | 0 gcc/testsuite/g++.dg/uninit-pred-loop-1_b.C | 21 + gcc/testsuite/g++.dg/uninit-pred-loop-1_b.cc | 21 - gcc/testsuite/g++.dg/uninit-pred-loop-1_c.C | 23 + gcc/testsuite/g++.dg/uninit-pred-loop-1_c.cc | 23 - ...{uninit-pred-loop_1.cc => uninit-pred-loop_1.C} | 0 gcc/testsuite/g++.dg/warn/Wdangling-pointer-2.C | 23 + gcc/testsuite/g++.dg/warn/Wdangling-pointer.C | 74 + gcc/testsuite/g++.dg/warn/Wfree-nonheap-object-6.C | 4 +- gcc/testsuite/g++.dg/warn/Wmismatched-dealloc-3.C | 70 + gcc/testsuite/g++.dg/warn/Wuse-after-free.C | 158 + gcc/testsuite/g++.dg/warn/ref-temp1.C | 3 + gcc/testsuite/g++.old-deja/g++.robertl/eb43.C | 1 + gcc/testsuite/g++.target/i386/pr103973-1.C | 71 + gcc/testsuite/g++.target/i386/pr103973-10.C | 7 + gcc/testsuite/g++.target/i386/pr103973-11.C | 8 + gcc/testsuite/g++.target/i386/pr103973-12.C | 8 + gcc/testsuite/g++.target/i386/pr103973-13.C | 62 + gcc/testsuite/g++.target/i386/pr103973-14.C | 7 + gcc/testsuite/g++.target/i386/pr103973-15.C | 8 + gcc/testsuite/g++.target/i386/pr103973-16.C | 8 + gcc/testsuite/g++.target/i386/pr103973-17.C | 8 + gcc/testsuite/g++.target/i386/pr103973-18.C | 8 + gcc/testsuite/g++.target/i386/pr103973-19.C | 8 + gcc/testsuite/g++.target/i386/pr103973-2.C | 7 + gcc/testsuite/g++.target/i386/pr103973-20.C | 8 + gcc/testsuite/g++.target/i386/pr103973-3.C | 8 + gcc/testsuite/g++.target/i386/pr103973-4.C | 8 + gcc/testsuite/g++.target/i386/pr103973-5.C | 66 + gcc/testsuite/g++.target/i386/pr103973-6.C | 7 + gcc/testsuite/g++.target/i386/pr103973-7.C | 8 + gcc/testsuite/g++.target/i386/pr103973-8.C | 8 + gcc/testsuite/g++.target/i386/pr103973-9.C | 67 + gcc/testsuite/gcc.c-torture/compile/pr101941-1.c | 50 + gcc/testsuite/gcc.dg/Wdangling-pointer-2.c | 82 + gcc/testsuite/gcc.dg/Wdangling-pointer.c | 75 + gcc/testsuite/gcc.dg/Wmismatched-dealloc-2.c | 13 +- gcc/testsuite/gcc.dg/Wmismatched-dealloc-3.c | 5 + gcc/testsuite/gcc.dg/analyzer/file-1.c | 3 + gcc/testsuite/gcc.dg/analyzer/file-2.c | 3 + gcc/testsuite/gcc.dg/attr-alloc_size-6.c | 2 +- gcc/testsuite/gcc.dg/attr-alloc_size-7.c | 2 +- gcc/testsuite/gcc.dg/auto-init-uninit-16.c | 4 +- gcc/testsuite/gcc.dg/auto-init-uninit-34.c | 8 +- gcc/testsuite/gcc.dg/auto-init-uninit-37.c | 44 +- gcc/testsuite/gcc.dg/auto-init-uninit-B.c | 4 +- gcc/testsuite/gcc.dg/tree-ssa/pr101941-1.c | 53 + gcc/testsuite/gcc.dg/uninit-pr50476.c | 2 +- gcc/testsuite/gcc.misc-tests/options.exp | 2 +- gcc/testsuite/gcc.src/maintainers.exp | 24 +- gcc/testsuite/gcc.target/aarch64/pr104005.c | 17 + .../gcc.target/i386/avx2-dest-false-dep-for-glc.c | 24 + .../i386/avx512dq-dest-false-dep-for-glc.c | 73 + .../i386/avx512f-dest-false-dep-for-glc.c | 103 + .../i386/avx512fp16-dest-false-dep-for-glc.c | 45 + .../i386/avx512fp16vl-dest-false-dep-for-glc.c | 24 + .../i386/avx512vl-dest-false-dep-for-glc.c | 76 + gcc/testsuite/gcc.target/i386/pr103973-1.c | 98 + gcc/testsuite/gcc.target/i386/pr103973-10.c | 7 + gcc/testsuite/gcc.target/i386/pr103973-11.c | 8 + gcc/testsuite/gcc.target/i386/pr103973-12.c | 8 + gcc/testsuite/gcc.target/i386/pr103973-13.c | 76 + gcc/testsuite/gcc.target/i386/pr103973-14.c | 7 + gcc/testsuite/gcc.target/i386/pr103973-15.c | 8 + gcc/testsuite/gcc.target/i386/pr103973-16.c | 8 + gcc/testsuite/gcc.target/i386/pr103973-17.c | 8 + gcc/testsuite/gcc.target/i386/pr103973-18.c | 8 + gcc/testsuite/gcc.target/i386/pr103973-19.c | 8 + gcc/testsuite/gcc.target/i386/pr103973-2.c | 7 + gcc/testsuite/gcc.target/i386/pr103973-20.c | 8 + gcc/testsuite/gcc.target/i386/pr103973-3.c | 8 + gcc/testsuite/gcc.target/i386/pr103973-4.c | 8 + gcc/testsuite/gcc.target/i386/pr103973-5.c | 85 + gcc/testsuite/gcc.target/i386/pr103973-6.c | 7 + gcc/testsuite/gcc.target/i386/pr103973-7.c | 8 + gcc/testsuite/gcc.target/i386/pr103973-8.c | 8 + gcc/testsuite/gcc.target/i386/pr103973-9.c | 89 + gcc/testsuite/gcc.target/powerpc/pr103124.c | 12 + gcc/testsuite/gfortran.dg/gomp/allocate-2.f90 | 2 +- gcc/testsuite/gfortran.dg/gomp/requires-8.f90 | 2 +- gcc/testsuite/gfortran.dg/ieee/ieee_10.f90 | 12 +- gcc/testsuite/gfortran.dg/ieee/signaling_1.f90 | 6 +- gcc/testsuite/gfortran.dg/ieee/signaling_2.f90 | 72 + gcc/testsuite/gfortran.dg/ieee/signaling_2_c.c | 8 + gcc/testsuite/gfortran.dg/index_5.f90 | 2 + gcc/testsuite/gfortran.dg/len_trim.f90 | 6 + gcc/testsuite/gfortran.dg/maskl_1.f90 | 3 +- gcc/testsuite/gfortran.dg/maskr_1.f90 | 3 +- gcc/testsuite/gfortran.dg/scan_3.f90 | 5 +- gcc/testsuite/gfortran.dg/transfer_char_kind4.f90 | 115 + gcc/testsuite/gfortran.dg/verify_3.f90 | 5 +- gcc/testsuite/lib/gcov.exp | 9 +- gcc/testsuite/lib/target-supports.exp | 36 + gcc/{timevar.c => timevar.cc} | 0 gcc/timevar.def | 2 +- gcc/timevar.h | 2 +- gcc/{toplev.c => toplev.cc} | 0 gcc/toplev.h | 10 +- gcc/{tracer.c => tracer.cc} | 0 gcc/trans-mem.c | 5683 --- gcc/trans-mem.cc | 5683 +++ gcc/{tree-affine.c => tree-affine.cc} | 0 gcc/tree-call-cdce.c | 1240 - gcc/tree-call-cdce.cc | 1240 + gcc/tree-cfg.c | 10214 ---- gcc/tree-cfg.cc | 10239 ++++ gcc/tree-cfg.h | 1 + gcc/{tree-cfgcleanup.c => tree-cfgcleanup.cc} | 0 gcc/tree-cfgcleanup.h | 2 +- gcc/{tree-chrec.c => tree-chrec.cc} | 0 gcc/tree-complex.c | 1956 - gcc/tree-complex.cc | 1956 + gcc/tree-core.h | 6 +- gcc/{tree-data-ref.c => tree-data-ref.cc} | 0 gcc/{tree-dfa.c => tree-dfa.cc} | 0 gcc/{tree-diagnostic.c => tree-diagnostic.cc} | 0 gcc/{tree-dump.c => tree-dump.cc} | 0 gcc/tree-eh.c | 5052 -- gcc/tree-eh.cc | 5052 ++ gcc/{tree-emutls.c => tree-emutls.cc} | 0 gcc/tree-if-conv.c | 3510 -- gcc/tree-if-conv.cc | 3510 ++ gcc/tree-inline.c | 6634 --- gcc/tree-inline.cc | 6634 +++ gcc/tree-inline.h | 2 +- gcc/{tree-into-ssa.c => tree-into-ssa.cc} | 0 gcc/{tree-iterator.c => tree-iterator.cc} | 0 ...op-distribution.c => tree-loop-distribution.cc} | 0 gcc/tree-nested.c | 3755 -- gcc/tree-nested.cc | 3755 ++ gcc/{tree-nrv.c => tree-nrv.cc} | 0 gcc/{tree-object-size.c => tree-object-size.cc} | 0 gcc/tree-object-size.h | 2 +- gcc/tree-outof-ssa.c | 1329 - gcc/tree-outof-ssa.cc | 1329 + gcc/tree-parloops.c | 4241 -- gcc/tree-parloops.cc | 4241 ++ gcc/{tree-phinodes.c => tree-phinodes.cc} | 0 gcc/{tree-predcom.c => tree-predcom.cc} | 0 gcc/tree-pretty-print.c | 4623 -- gcc/tree-pretty-print.cc | 4623 ++ gcc/tree-profile.c | 911 - gcc/tree-profile.cc | 911 + ...scalar-evolution.c => tree-scalar-evolution.cc} | 0 gcc/tree-sra.c | 4794 -- gcc/tree-sra.cc | 4794 ++ gcc/tree-ssa-address.c | 1213 - gcc/tree-ssa-address.cc | 1213 + gcc/tree-ssa-alias.c | 4336 -- gcc/tree-ssa-alias.cc | 4336 ++ gcc/tree-ssa-alias.h | 4 +- gcc/tree-ssa-ccp.c | 4640 -- gcc/tree-ssa-ccp.cc | 4640 ++ gcc/{tree-ssa-coalesce.c => tree-ssa-coalesce.cc} | 0 gcc/tree-ssa-coalesce.h | 2 +- gcc/{tree-ssa-copy.c => tree-ssa-copy.cc} | 0 gcc/{tree-ssa-dce.c => tree-ssa-dce.cc} | 0 gcc/{tree-ssa-dom.c => tree-ssa-dom.cc} | 0 gcc/{tree-ssa-dse.c => tree-ssa-dse.cc} | 0 gcc/{tree-ssa-forwprop.c => tree-ssa-forwprop.cc} | 0 ...{tree-ssa-ifcombine.c => tree-ssa-ifcombine.cc} | 0 gcc/tree-ssa-live.c | 1633 - gcc/tree-ssa-live.cc | 1633 + gcc/{tree-ssa-loop-ch.c => tree-ssa-loop-ch.cc} | 0 gcc/{tree-ssa-loop-im.c => tree-ssa-loop-im.cc} | 0 ...ssa-loop-ivcanon.c => tree-ssa-loop-ivcanon.cc} | 0 ...e-ssa-loop-ivopts.c => tree-ssa-loop-ivopts.cc} | 0 gcc/tree-ssa-loop-manip.c | 1677 - gcc/tree-ssa-loop-manip.cc | 1677 + ...ree-ssa-loop-niter.c => tree-ssa-loop-niter.cc} | 0 ...a-loop-prefetch.c => tree-ssa-loop-prefetch.cc} | 0 ...ree-ssa-loop-split.c => tree-ssa-loop-split.cc} | 0 gcc/tree-ssa-loop-unswitch.c | 1042 - gcc/tree-ssa-loop-unswitch.cc | 1042 + gcc/{tree-ssa-loop.c => tree-ssa-loop.cc} | 0 gcc/tree-ssa-math-opts.c | 4847 -- gcc/tree-ssa-math-opts.cc | 5070 ++ gcc/tree-ssa-operands.c | 1415 - gcc/tree-ssa-operands.cc | 1415 + gcc/tree-ssa-phiopt.c | 3879 -- gcc/tree-ssa-phiopt.cc | 3854 ++ gcc/{tree-ssa-phiprop.c => tree-ssa-phiprop.cc} | 0 gcc/tree-ssa-pre.c | 4481 -- gcc/tree-ssa-pre.cc | 4481 ++ ...{tree-ssa-propagate.c => tree-ssa-propagate.cc} | 0 gcc/tree-ssa-reassoc.c | 7052 --- gcc/tree-ssa-reassoc.cc | 7052 +++ gcc/tree-ssa-sccvn.c | 8250 ---- gcc/tree-ssa-sccvn.cc | 8250 ++++ gcc/tree-ssa-sccvn.h | 2 +- gcc/tree-ssa-scopedtables.c | 1201 - gcc/tree-ssa-scopedtables.cc | 1201 + gcc/{tree-ssa-sink.c => tree-ssa-sink.cc} | 0 gcc/tree-ssa-strlen.c | 5970 --- gcc/tree-ssa-strlen.cc | 5970 +++ gcc/tree-ssa-strlen.h | 2 +- ...e-ssa-structalias.c => tree-ssa-structalias.cc} | 0 gcc/tree-ssa-tail-merge.c | 1818 - gcc/tree-ssa-tail-merge.cc | 1818 + gcc/{tree-ssa-ter.c => tree-ssa-ter.cc} | 0 gcc/tree-ssa-ter.h | 2 +- ...threadbackward.c => tree-ssa-threadbackward.cc} | 0 ...ree-ssa-threadedge.c => tree-ssa-threadedge.cc} | 0 ...ssa-threadupdate.c => tree-ssa-threadupdate.cc} | 0 gcc/tree-ssa-threadupdate.h | 2 +- gcc/{tree-ssa-uncprop.c => tree-ssa-uncprop.cc} | 0 gcc/tree-ssa-uninit.c | 1358 - gcc/tree-ssa-uninit.cc | 1447 + gcc/{tree-ssa.c => tree-ssa.cc} | 0 gcc/{tree-ssanames.c => tree-ssanames.cc} | 0 gcc/{tree-stdarg.c => tree-stdarg.cc} | 0 gcc/tree-streamer-in.c | 1130 - gcc/tree-streamer-in.cc | 1130 + gcc/tree-streamer-out.c | 1045 - gcc/tree-streamer-out.cc | 1045 + gcc/{tree-streamer.c => tree-streamer.cc} | 0 gcc/tree-streamer.h | 6 +- ...itch-conversion.c => tree-switch-conversion.cc} | 0 gcc/{tree-tailcall.c => tree-tailcall.cc} | 0 gcc/tree-vect-data-refs.c | 6814 --- gcc/tree-vect-data-refs.cc | 6817 +++ gcc/{tree-vect-generic.c => tree-vect-generic.cc} | 0 ...e-vect-loop-manip.c => tree-vect-loop-manip.cc} | 0 gcc/{tree-vect-loop.c => tree-vect-loop.cc} | 0 gcc/tree-vect-patterns.c | 5856 --- gcc/tree-vect-patterns.cc | 5856 +++ ...ct-slp-patterns.c => tree-vect-slp-patterns.cc} | 0 gcc/{tree-vect-slp.c => tree-vect-slp.cc} | 0 gcc/tree-vect-stmts.c | 12484 ----- gcc/tree-vect-stmts.cc | 12484 +++++ ...ree-vector-builder.c => tree-vector-builder.cc} | 0 gcc/tree-vectorizer.c | 2026 - gcc/tree-vectorizer.cc | 2026 + gcc/tree-vectorizer.h | 20 +- gcc/{tree-vrp.c => tree-vrp.cc} | 0 gcc/tree.c | 15221 ------ gcc/tree.cc | 15221 ++++++ gcc/tree.def | 2 +- gcc/tree.h | 10 +- gcc/{tsan.c => tsan.cc} | 0 gcc/{typed-splay-tree.c => typed-splay-tree.cc} | 0 gcc/{ubsan.c => ubsan.cc} | 0 gcc/{valtrack.c => valtrack.cc} | 0 gcc/value-prof.c | 1966 - gcc/value-prof.cc | 1966 + gcc/value-prof.h | 4 +- gcc/value-range.cc | 2 +- gcc/value-range.h | 2 +- gcc/var-tracking.c | 10613 ---- gcc/var-tracking.cc | 10613 ++++ gcc/varasm.c | 8509 ---- gcc/varasm.cc | 8509 ++++ gcc/{varpool.c => varpool.cc} | 0 gcc/{vec-perm-indices.c => vec-perm-indices.cc} | 0 gcc/{vec.c => vec.cc} | 0 gcc/vec.h | 2 +- gcc/vmsdbgout.c | 1851 - gcc/vmsdbgout.cc | 1851 + gcc/{vr-values.c => vr-values.cc} | 0 gcc/vtable-verify.c | 850 - gcc/vtable-verify.cc | 850 + gcc/vtable-verify.h | 2 +- gcc/warning-control.cc | 3 +- gcc/{web.c => web.cc} | 0 gcc/xcoffout.c | 494 - gcc/xcoffout.cc | 494 + gcc/xcoffout.h | 8 +- libcpp/ChangeLog | 80 + libcpp/Makefile.in | 10 +- libcpp/charset.c | 2510 - libcpp/charset.cc | 2510 + libcpp/directives.c | 2799 -- libcpp/directives.cc | 2799 ++ libcpp/{errors.c => errors.cc} | 0 libcpp/{expr.c => expr.cc} | 0 libcpp/files.c | 2175 - libcpp/files.cc | 2174 + libcpp/{identifiers.c => identifiers.cc} | 0 libcpp/include/cpplib.h | 22 +- libcpp/include/line-map.h | 6 +- libcpp/include/mkdeps.h | 2 +- libcpp/init.c | 914 - libcpp/init.cc | 914 + libcpp/internal.h | 32 +- libcpp/{lex.c => lex.cc} | 0 libcpp/line-map.c | 2555 - libcpp/line-map.cc | 2555 + libcpp/{macro.c => macro.cc} | 0 libcpp/{makeucnid.c => makeucnid.cc} | 0 libcpp/{mkdeps.c => mkdeps.cc} | 0 libcpp/{pch.c => pch.cc} | 0 libcpp/{symtab.c => symtab.cc} | 0 libcpp/traditional.c | 1321 - libcpp/traditional.cc | 1321 + libgcc/ChangeLog | 16 + libgcc/config/i386/gthr-win32.c | 2 - libgcc/config/i386/gthr-win32.h | 23 +- libgcc/libgcov-driver.c | 2 +- libgfortran/ChangeLog | 30 + libgfortran/ieee/ieee_arithmetic.F90 | 284 +- libgfortran/ieee/ieee_helper.c | 74 + libgfortran/mk-kinds-h.sh | 7 + libgfortran/runtime/environ.c | 111 +- libgomp/ChangeLog | 21 + libgomp/plugin/plugin-gcn.c | 82 +- .../testsuite/libgomp.oacc-c++/privatized-ref-2.C | 178 + .../testsuite/libgomp.oacc-c++/privatized-ref-3.C | 212 + .../libgomp.oacc-fortran/privatized-ref-1.f95 | 163 + libiberty/ChangeLog | 4 + libiberty/regex.c | 4 + libstdc++-v3/ChangeLog | 397 + libstdc++-v3/Makefile.in | 7 + libstdc++-v3/acinclude.m4 | 155 +- libstdc++-v3/config.h.in | 10 + libstdc++-v3/configure | 321 +- libstdc++-v3/configure.ac | 3 + libstdc++-v3/doc/Makefile.in | 7 + libstdc++-v3/doc/html/manual/status.html | 36 +- libstdc++-v3/doc/xml/manual/status_cxx2017.xml | 8 +- libstdc++-v3/doc/xml/manual/status_cxx2020.xml | 24 +- libstdc++-v3/include/Makefile.am | 3 + libstdc++-v3/include/Makefile.in | 10 + libstdc++-v3/include/bits/shared_ptr_atomic.h | 455 + libstdc++-v3/include/bits/shared_ptr_base.h | 17 + libstdc++-v3/include/experimental/bits/simd.h | 389 +- .../include/experimental/bits/simd_builtin.h | 41 +- .../include/experimental/bits/simd_detail.h | 40 + .../include/experimental/bits/simd_fixed_size.h | 39 +- libstdc++-v3/include/experimental/bits/simd_math.h | 45 +- libstdc++-v3/include/experimental/bits/simd_neon.h | 4 +- libstdc++-v3/include/experimental/bits/simd_ppc.h | 4 +- .../include/experimental/bits/simd_scalar.h | 71 +- libstdc++-v3/include/experimental/bits/simd_x86.h | 4 +- libstdc++-v3/include/std/stacktrace | 672 + libstdc++-v3/include/std/version | 4 + libstdc++-v3/libsupc++/Makefile.in | 7 + libstdc++-v3/po/Makefile.in | 7 + libstdc++-v3/python/Makefile.in | 7 + libstdc++-v3/src/Makefile.am | 11 +- libstdc++-v3/src/Makefile.in | 14 +- libstdc++-v3/src/c++11/Makefile.in | 7 + libstdc++-v3/src/c++17/Makefile.in | 7 + libstdc++-v3/src/c++17/fast_float/LOCAL_PATCHES | 4 + libstdc++-v3/src/c++17/fast_float/MERGE | 4 + libstdc++-v3/src/c++17/fast_float/README.md | 218 + libstdc++-v3/src/c++17/fast_float/fast_float.h | 2905 ++ libstdc++-v3/src/c++17/floating_from_chars.cc | 397 + libstdc++-v3/src/c++20/Makefile.in | 7 + libstdc++-v3/src/c++98/Makefile.in | 7 + libstdc++-v3/src/filesystem/Makefile.in | 7 + libstdc++-v3/src/libbacktrace/Makefile.am | 101 + libstdc++-v3/src/libbacktrace/Makefile.in | 860 + libstdc++-v3/src/libbacktrace/backtrace-rename.h | 41 + .../src/libbacktrace/backtrace-supported.h.in | 61 + libstdc++-v3/src/libbacktrace/config.h.in | 184 + .../testsuite/17_intro/headers/c++1998/103650.cc | 13 + libstdc++-v3/testsuite/20_util/from_chars/7.cc | 152 + .../20_util/shared_ptr/atomic/atomic_shared_ptr.cc | 150 + libstdc++-v3/testsuite/20_util/stacktrace/entry.cc | 53 + .../testsuite/20_util/stacktrace/synopsis.cc | 46 + .../testsuite/20_util/stacktrace/version.cc | 11 + .../testsuite/20_util/weak_ptr/atomic_weak_ptr.cc | 95 + libstdc++-v3/testsuite/Makefile.in | 7 + libstdc++-v3/testsuite/lib/libstdc++.exp | 8 + 1867 files changed, 1663961 insertions(+), 1645562 deletions(-) diff --cc gcc/config/ia64/ia64.cc index 00000000000,f9fb681a36c..c68b2ff69bd mode 000000,100644..100644 --- a/gcc/config/ia64/ia64.cc +++ b/gcc/config/ia64/ia64.cc @@@ -1,0 -1,11923 +1,11927 @@@ + /* Definitions of target machine for GNU compiler. + Copyright (C) 1999-2022 Free Software Foundation, Inc. + Contributed by James E. Wilson and + David Mosberger . + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + + #define IN_TARGET_CODE 1 + + #include "config.h" + #include "system.h" + #include "coretypes.h" + #include "backend.h" + #include "target.h" + #include "rtl.h" + #include "tree.h" + #include "memmodel.h" + #include "cfghooks.h" + #include "df.h" + #include "tm_p.h" + #include "stringpool.h" + #include "attribs.h" + #include "optabs.h" + #include "regs.h" + #include "emit-rtl.h" + #include "recog.h" + #include "diagnostic-core.h" + #include "alias.h" + #include "fold-const.h" + #include "stor-layout.h" + #include "calls.h" + #include "varasm.h" + #include "output.h" + #include "insn-attr.h" + #include "flags.h" + #include "explow.h" + #include "expr.h" + #include "cfgrtl.h" + #include "libfuncs.h" + #include "sched-int.h" + #include "common/common-target.h" + #include "langhooks.h" + #include "gimplify.h" + #include "intl.h" + #include "debug.h" + #include "dbgcnt.h" + #include "tm-constrs.h" + #include "sel-sched.h" + #include "reload.h" + #include "opts.h" + #include "dumpfile.h" + #include "builtins.h" + + /* This file should be included last. */ + #include "target-def.h" + + /* This is used for communication between ASM_OUTPUT_LABEL and + ASM_OUTPUT_LABELREF. */ + int ia64_asm_output_label = 0; + + /* Register names for ia64_expand_prologue. */ + static const char * const ia64_reg_numbers[96] = + { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39", + "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47", + "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55", + "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63", + "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71", + "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79", + "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87", + "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95", + "r96", "r97", "r98", "r99", "r100","r101","r102","r103", + "r104","r105","r106","r107","r108","r109","r110","r111", + "r112","r113","r114","r115","r116","r117","r118","r119", + "r120","r121","r122","r123","r124","r125","r126","r127"}; + + /* ??? These strings could be shared with REGISTER_NAMES. */ + static const char * const ia64_input_reg_names[8] = + { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" }; + + /* ??? These strings could be shared with REGISTER_NAMES. */ + static const char * const ia64_local_reg_names[80] = + { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7", + "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15", + "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23", + "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31", + "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39", + "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47", + "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55", + "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63", + "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71", + "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" }; + + /* ??? These strings could be shared with REGISTER_NAMES. */ + static const char * const ia64_output_reg_names[8] = + { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" }; + + /* Variables which are this size or smaller are put in the sdata/sbss + sections. */ + + unsigned int ia64_section_threshold; + + /* The following variable is used by the DFA insn scheduler. The value is + TRUE if we do insn bundling instead of insn scheduling. */ + int bundling_p = 0; + + enum ia64_frame_regs + { + reg_fp, + reg_save_b0, + reg_save_pr, + reg_save_ar_pfs, + reg_save_ar_unat, + reg_save_ar_lc, + reg_save_gp, + number_of_ia64_frame_regs + }; + + /* Structure to be filled in by ia64_compute_frame_size with register + save masks and offsets for the current function. */ + + struct ia64_frame_info + { + HOST_WIDE_INT total_size; /* size of the stack frame, not including + the caller's scratch area. */ + HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */ + HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */ + HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */ + HARD_REG_SET mask; /* mask of saved registers. */ + unsigned int gr_used_mask; /* mask of registers in use as gr spill + registers or long-term scratches. */ + int n_spilled; /* number of spilled registers. */ + int r[number_of_ia64_frame_regs]; /* Frame related registers. */ + int n_input_regs; /* number of input registers used. */ + int n_local_regs; /* number of local registers used. */ + int n_output_regs; /* number of output registers used. */ + int n_rotate_regs; /* number of rotating registers used. */ + + char need_regstk; /* true if a .regstk directive needed. */ + char initialized; /* true if the data is finalized. */ + }; + + /* Current frame information calculated by ia64_compute_frame_size. */ + static struct ia64_frame_info current_frame_info; + /* The actual registers that are emitted. */ + static int emitted_frame_related_regs[number_of_ia64_frame_regs]; + + static int ia64_first_cycle_multipass_dfa_lookahead (void); + static void ia64_dependencies_evaluation_hook (rtx_insn *, rtx_insn *); + static void ia64_init_dfa_pre_cycle_insn (void); + static rtx ia64_dfa_pre_cycle_insn (void); + static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int); + static int ia64_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *); + static void ia64_h_i_d_extended (void); + static void * ia64_alloc_sched_context (void); + static void ia64_init_sched_context (void *, bool); + static void ia64_set_sched_context (void *); + static void ia64_clear_sched_context (void *); + static void ia64_free_sched_context (void *); + static int ia64_mode_to_int (machine_mode); + static void ia64_set_sched_flags (spec_info_t); + static ds_t ia64_get_insn_spec_ds (rtx_insn *); + static ds_t ia64_get_insn_checked_ds (rtx_insn *); + static bool ia64_skip_rtx_p (const_rtx); + static int ia64_speculate_insn (rtx_insn *, ds_t, rtx *); + static bool ia64_needs_block_p (ds_t); + static rtx ia64_gen_spec_check (rtx_insn *, rtx_insn *, ds_t); + static int ia64_spec_check_p (rtx); + static int ia64_spec_check_src_p (rtx); + static rtx gen_tls_get_addr (void); + static rtx gen_thread_pointer (void); + static int find_gr_spill (enum ia64_frame_regs, int); + static int next_scratch_gr_reg (void); + static void mark_reg_gr_used_mask (rtx, void *); + static void ia64_compute_frame_size (HOST_WIDE_INT); + static void setup_spill_pointers (int, rtx, HOST_WIDE_INT); + static void finish_spill_pointers (void); + static rtx spill_restore_mem (rtx, HOST_WIDE_INT); + static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx); + static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT); + static rtx gen_movdi_x (rtx, rtx, rtx); + static rtx gen_fr_spill_x (rtx, rtx, rtx); + static rtx gen_fr_restore_x (rtx, rtx, rtx); + + static void ia64_option_override (void); + static bool ia64_can_eliminate (const int, const int); + static machine_mode hfa_element_mode (const_tree, bool); + static void ia64_setup_incoming_varargs (cumulative_args_t, + const function_arg_info &, + int *, int); + static int ia64_arg_partial_bytes (cumulative_args_t, + const function_arg_info &); + static rtx ia64_function_arg (cumulative_args_t, const function_arg_info &); + static rtx ia64_function_incoming_arg (cumulative_args_t, + const function_arg_info &); + static void ia64_function_arg_advance (cumulative_args_t, + const function_arg_info &); + static pad_direction ia64_function_arg_padding (machine_mode, const_tree); + static unsigned int ia64_function_arg_boundary (machine_mode, + const_tree); + static bool ia64_function_ok_for_sibcall (tree, tree); + static bool ia64_return_in_memory (const_tree, const_tree); + static rtx ia64_function_value (const_tree, const_tree, bool); + static rtx ia64_libcall_value (machine_mode, const_rtx); + static bool ia64_function_value_regno_p (const unsigned int); + static int ia64_register_move_cost (machine_mode, reg_class_t, + reg_class_t); + static int ia64_memory_move_cost (machine_mode mode, reg_class_t, + bool); + static bool ia64_rtx_costs (rtx, machine_mode, int, int, int *, bool); + static int ia64_unspec_may_trap_p (const_rtx, unsigned); + static void fix_range (const char *); + static struct machine_function * ia64_init_machine_status (void); + static void emit_insn_group_barriers (FILE *); + static void emit_all_insn_group_barriers (FILE *); + static void final_emit_insn_group_barriers (FILE *); + static void emit_predicate_relation_info (void); + static void ia64_reorg (void); + static bool ia64_in_small_data_p (const_tree); + static void process_epilogue (FILE *, rtx, bool, bool); + + static bool ia64_assemble_integer (rtx, unsigned int, int); + static void ia64_output_function_prologue (FILE *); + static void ia64_output_function_epilogue (FILE *); + static void ia64_output_function_end_prologue (FILE *); + + static void ia64_print_operand (FILE *, rtx, int); + static void ia64_print_operand_address (FILE *, machine_mode, rtx); + static bool ia64_print_operand_punct_valid_p (unsigned char code); + + static int ia64_issue_rate (void); + static int ia64_adjust_cost (rtx_insn *, int, rtx_insn *, int, dw_t); + static void ia64_sched_init (FILE *, int, int); + static void ia64_sched_init_global (FILE *, int, int); + static void ia64_sched_finish_global (FILE *, int); + static void ia64_sched_finish (FILE *, int); + static int ia64_dfa_sched_reorder (FILE *, int, rtx_insn **, int *, int, int); + static int ia64_sched_reorder (FILE *, int, rtx_insn **, int *, int); + static int ia64_sched_reorder2 (FILE *, int, rtx_insn **, int *, int); + static int ia64_variable_issue (FILE *, int, rtx_insn *, int); + + static void ia64_asm_unwind_emit (FILE *, rtx_insn *); + static void ia64_asm_emit_except_personality (rtx); + static void ia64_asm_init_sections (void); + + static enum unwind_info_type ia64_debug_unwind_info (void); + + static struct bundle_state *get_free_bundle_state (void); + static void free_bundle_state (struct bundle_state *); + static void initiate_bundle_states (void); + static void finish_bundle_states (void); + static int insert_bundle_state (struct bundle_state *); + static void initiate_bundle_state_table (void); + static void finish_bundle_state_table (void); + static int try_issue_nops (struct bundle_state *, int); + static int try_issue_insn (struct bundle_state *, rtx); + static void issue_nops_and_insn (struct bundle_state *, int, rtx_insn *, + int, int); + static int get_max_pos (state_t); + static int get_template (state_t, int); + + static rtx_insn *get_next_important_insn (rtx_insn *, rtx_insn *); + static bool important_for_bundling_p (rtx_insn *); + static bool unknown_for_bundling_p (rtx_insn *); + static void bundling (FILE *, int, rtx_insn *, rtx_insn *); + + static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, + HOST_WIDE_INT, tree); + static void ia64_file_start (void); + static void ia64_globalize_decl_name (FILE *, tree); + + static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED; + static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED; + static section *ia64_select_rtx_section (machine_mode, rtx, + unsigned HOST_WIDE_INT); + static void ia64_output_dwarf_dtprel (FILE *, int, rtx) + ATTRIBUTE_UNUSED; + static unsigned int ia64_section_type_flags (tree, const char *, int); ++static void ia64_linux_file_end (void) ++ ATTRIBUTE_UNUSED; + static void ia64_init_libfuncs (void) + ATTRIBUTE_UNUSED; + static void ia64_hpux_init_libfuncs (void) + ATTRIBUTE_UNUSED; + static void ia64_sysv4_init_libfuncs (void) + ATTRIBUTE_UNUSED; + static void ia64_vms_init_libfuncs (void) + ATTRIBUTE_UNUSED; + static void ia64_soft_fp_init_libfuncs (void) + ATTRIBUTE_UNUSED; + static bool ia64_vms_valid_pointer_mode (scalar_int_mode mode) + ATTRIBUTE_UNUSED; + static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *) + ATTRIBUTE_UNUSED; + + static bool ia64_attribute_takes_identifier_p (const_tree); + static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *); + static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *); + static void ia64_encode_section_info (tree, rtx, int); + static rtx ia64_struct_value_rtx (tree, int); + static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *); + static bool ia64_scalar_mode_supported_p (scalar_mode mode); + static bool ia64_vector_mode_supported_p (machine_mode mode); + static bool ia64_legitimate_constant_p (machine_mode, rtx); + static bool ia64_legitimate_address_p (machine_mode, rtx, bool); + static bool ia64_cannot_force_const_mem (machine_mode, rtx); + static const char *ia64_mangle_type (const_tree); + static const char *ia64_invalid_conversion (const_tree, const_tree); + static const char *ia64_invalid_unary_op (int, const_tree); + static const char *ia64_invalid_binary_op (int, const_tree, const_tree); + static machine_mode ia64_c_mode_for_suffix (char); + static void ia64_trampoline_init (rtx, tree, rtx); + static void ia64_override_options_after_change (void); + static bool ia64_member_type_forces_blk (const_tree, machine_mode); + + static tree ia64_fold_builtin (tree, int, tree *, bool); + static tree ia64_builtin_decl (unsigned, bool); + + static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t); + static fixed_size_mode ia64_get_reg_raw_mode (int regno); + static section * ia64_hpux_function_section (tree, enum node_frequency, + bool, bool); + + static bool ia64_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx, + const vec_perm_indices &); + + static unsigned int ia64_hard_regno_nregs (unsigned int, machine_mode); + static bool ia64_hard_regno_mode_ok (unsigned int, machine_mode); + static bool ia64_modes_tieable_p (machine_mode, machine_mode); + static bool ia64_can_change_mode_class (machine_mode, machine_mode, + reg_class_t); + + #define MAX_VECT_LEN 8 + + struct expand_vec_perm_d + { + rtx target, op0, op1; + unsigned char perm[MAX_VECT_LEN]; + machine_mode vmode; + unsigned char nelt; + bool one_operand_p; + bool testing_p; + }; + + static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d); + + + /* Table of valid machine attributes. */ + static const struct attribute_spec ia64_attribute_table[] = + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ + { "syscall_linkage", 0, 0, false, true, true, false, NULL, NULL }, + { "model", 1, 1, true, false, false, false, + ia64_handle_model_attribute, NULL }, + #if TARGET_ABI_OPEN_VMS + { "common_object", 1, 1, true, false, false, false, + ia64_vms_common_object_attribute, NULL }, + #endif + { "version_id", 1, 1, true, false, false, false, + ia64_handle_version_id_attribute, NULL }, + { NULL, 0, 0, false, false, false, false, NULL, NULL } + }; + + /* Initialize the GCC target structure. */ + #undef TARGET_ATTRIBUTE_TABLE + #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table + + #undef TARGET_INIT_BUILTINS + #define TARGET_INIT_BUILTINS ia64_init_builtins + + #undef TARGET_FOLD_BUILTIN + #define TARGET_FOLD_BUILTIN ia64_fold_builtin + + #undef TARGET_EXPAND_BUILTIN + #define TARGET_EXPAND_BUILTIN ia64_expand_builtin + + #undef TARGET_BUILTIN_DECL + #define TARGET_BUILTIN_DECL ia64_builtin_decl + + #undef TARGET_ASM_BYTE_OP + #define TARGET_ASM_BYTE_OP "\tdata1\t" + #undef TARGET_ASM_ALIGNED_HI_OP + #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t" + #undef TARGET_ASM_ALIGNED_SI_OP + #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t" + #undef TARGET_ASM_ALIGNED_DI_OP + #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t" + #undef TARGET_ASM_UNALIGNED_HI_OP + #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t" + #undef TARGET_ASM_UNALIGNED_SI_OP + #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t" + #undef TARGET_ASM_UNALIGNED_DI_OP + #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t" + #undef TARGET_ASM_INTEGER + #define TARGET_ASM_INTEGER ia64_assemble_integer + + #undef TARGET_OPTION_OVERRIDE + #define TARGET_OPTION_OVERRIDE ia64_option_override + + #undef TARGET_ASM_FUNCTION_PROLOGUE + #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue + #undef TARGET_ASM_FUNCTION_END_PROLOGUE + #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue + #undef TARGET_ASM_FUNCTION_EPILOGUE + #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue + + #undef TARGET_PRINT_OPERAND + #define TARGET_PRINT_OPERAND ia64_print_operand + #undef TARGET_PRINT_OPERAND_ADDRESS + #define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address + #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P + #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p + + #undef TARGET_IN_SMALL_DATA_P + #define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p + + #undef TARGET_SCHED_ADJUST_COST + #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost + #undef TARGET_SCHED_ISSUE_RATE + #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate + #undef TARGET_SCHED_VARIABLE_ISSUE + #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue + #undef TARGET_SCHED_INIT + #define TARGET_SCHED_INIT ia64_sched_init + #undef TARGET_SCHED_FINISH + #define TARGET_SCHED_FINISH ia64_sched_finish + #undef TARGET_SCHED_INIT_GLOBAL + #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global + #undef TARGET_SCHED_FINISH_GLOBAL + #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global + #undef TARGET_SCHED_REORDER + #define TARGET_SCHED_REORDER ia64_sched_reorder + #undef TARGET_SCHED_REORDER2 + #define TARGET_SCHED_REORDER2 ia64_sched_reorder2 + + #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK + #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook + + #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD + #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead + + #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN + #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn + #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN + #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn + + #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD + #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\ + ia64_first_cycle_multipass_dfa_lookahead_guard + + #undef TARGET_SCHED_DFA_NEW_CYCLE + #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle + + #undef TARGET_SCHED_H_I_D_EXTENDED + #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended + + #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT + #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context + + #undef TARGET_SCHED_INIT_SCHED_CONTEXT + #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context + + #undef TARGET_SCHED_SET_SCHED_CONTEXT + #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context + + #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT + #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context + + #undef TARGET_SCHED_FREE_SCHED_CONTEXT + #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context + + #undef TARGET_SCHED_SET_SCHED_FLAGS + #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags + + #undef TARGET_SCHED_GET_INSN_SPEC_DS + #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds + + #undef TARGET_SCHED_GET_INSN_CHECKED_DS + #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds + + #undef TARGET_SCHED_SPECULATE_INSN + #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn + + #undef TARGET_SCHED_NEEDS_BLOCK_P + #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p + + #undef TARGET_SCHED_GEN_SPEC_CHECK + #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check + + #undef TARGET_SCHED_SKIP_RTX_P + #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p + + #undef TARGET_FUNCTION_OK_FOR_SIBCALL + #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall + #undef TARGET_ARG_PARTIAL_BYTES + #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes + #undef TARGET_FUNCTION_ARG + #define TARGET_FUNCTION_ARG ia64_function_arg + #undef TARGET_FUNCTION_INCOMING_ARG + #define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg + #undef TARGET_FUNCTION_ARG_ADVANCE + #define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance + #undef TARGET_FUNCTION_ARG_PADDING + #define TARGET_FUNCTION_ARG_PADDING ia64_function_arg_padding + #undef TARGET_FUNCTION_ARG_BOUNDARY + #define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary + + #undef TARGET_ASM_OUTPUT_MI_THUNK + #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk + #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK + #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true + + #undef TARGET_ASM_FILE_START + #define TARGET_ASM_FILE_START ia64_file_start + + #undef TARGET_ASM_GLOBALIZE_DECL_NAME + #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name + + #undef TARGET_REGISTER_MOVE_COST + #define TARGET_REGISTER_MOVE_COST ia64_register_move_cost + #undef TARGET_MEMORY_MOVE_COST + #define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost + #undef TARGET_RTX_COSTS + #define TARGET_RTX_COSTS ia64_rtx_costs + #undef TARGET_ADDRESS_COST + #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 + + #undef TARGET_UNSPEC_MAY_TRAP_P + #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p + + #undef TARGET_MACHINE_DEPENDENT_REORG + #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg + + #undef TARGET_ENCODE_SECTION_INFO + #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info + + #undef TARGET_SECTION_TYPE_FLAGS + #define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags + + #ifdef HAVE_AS_TLS + #undef TARGET_ASM_OUTPUT_DWARF_DTPREL + #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel + #endif + + /* ??? Investigate. */ + #if 0 + #undef TARGET_PROMOTE_PROTOTYPES + #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true + #endif + + #undef TARGET_FUNCTION_VALUE + #define TARGET_FUNCTION_VALUE ia64_function_value + #undef TARGET_LIBCALL_VALUE + #define TARGET_LIBCALL_VALUE ia64_libcall_value + #undef TARGET_FUNCTION_VALUE_REGNO_P + #define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p + + #undef TARGET_STRUCT_VALUE_RTX + #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx + #undef TARGET_RETURN_IN_MEMORY + #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory + #undef TARGET_SETUP_INCOMING_VARARGS + #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs + #undef TARGET_STRICT_ARGUMENT_NAMING + #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true + #undef TARGET_MUST_PASS_IN_STACK + #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size + #undef TARGET_GET_RAW_RESULT_MODE + #define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode + #undef TARGET_GET_RAW_ARG_MODE + #define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode + + #undef TARGET_MEMBER_TYPE_FORCES_BLK + #define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk + + #undef TARGET_GIMPLIFY_VA_ARG_EXPR + #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg + + #undef TARGET_ASM_UNWIND_EMIT + #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit + #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY + #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality + #undef TARGET_ASM_INIT_SECTIONS + #define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections + + #undef TARGET_DEBUG_UNWIND_INFO + #define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info + + #undef TARGET_SCALAR_MODE_SUPPORTED_P + #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p + #undef TARGET_VECTOR_MODE_SUPPORTED_P + #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p + + #undef TARGET_LEGITIMATE_CONSTANT_P + #define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p + #undef TARGET_LEGITIMATE_ADDRESS_P + #define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p + + #undef TARGET_LRA_P + #define TARGET_LRA_P hook_bool_void_false + + #undef TARGET_CANNOT_FORCE_CONST_MEM + #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem + + #undef TARGET_MANGLE_TYPE + #define TARGET_MANGLE_TYPE ia64_mangle_type + + #undef TARGET_INVALID_CONVERSION + #define TARGET_INVALID_CONVERSION ia64_invalid_conversion + #undef TARGET_INVALID_UNARY_OP + #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op + #undef TARGET_INVALID_BINARY_OP + #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op + + #undef TARGET_C_MODE_FOR_SUFFIX + #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix + + #undef TARGET_CAN_ELIMINATE + #define TARGET_CAN_ELIMINATE ia64_can_eliminate + + #undef TARGET_TRAMPOLINE_INIT + #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init + + #undef TARGET_CAN_USE_DOLOOP_P + #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost + #undef TARGET_INVALID_WITHIN_DOLOOP + #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null + + #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE + #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change + + #undef TARGET_PREFERRED_RELOAD_CLASS + #define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class + + #undef TARGET_DELAY_SCHED2 + #define TARGET_DELAY_SCHED2 true + + /* Variable tracking should be run after all optimizations which + change order of insns. It also needs a valid CFG. */ + #undef TARGET_DELAY_VARTRACK + #define TARGET_DELAY_VARTRACK true + + #undef TARGET_VECTORIZE_VEC_PERM_CONST + #define TARGET_VECTORIZE_VEC_PERM_CONST ia64_vectorize_vec_perm_const + + #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P + #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p + + #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS + #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 0 + + #undef TARGET_HARD_REGNO_NREGS + #define TARGET_HARD_REGNO_NREGS ia64_hard_regno_nregs + #undef TARGET_HARD_REGNO_MODE_OK + #define TARGET_HARD_REGNO_MODE_OK ia64_hard_regno_mode_ok + + #undef TARGET_MODES_TIEABLE_P + #define TARGET_MODES_TIEABLE_P ia64_modes_tieable_p + + #undef TARGET_CAN_CHANGE_MODE_CLASS + #define TARGET_CAN_CHANGE_MODE_CLASS ia64_can_change_mode_class + + #undef TARGET_CONSTANT_ALIGNMENT + #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings + + struct gcc_target targetm = TARGET_INITIALIZER; + + /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain + identifier as an argument, so the front end shouldn't look it up. */ + + static bool + ia64_attribute_takes_identifier_p (const_tree attr_id) + { + if (is_attribute_p ("model", attr_id)) + return true; + #if TARGET_ABI_OPEN_VMS + if (is_attribute_p ("common_object", attr_id)) + return true; + #endif + return false; + } + + typedef enum + { + ADDR_AREA_NORMAL, /* normal address area */ + ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */ + } + ia64_addr_area; + + static GTY(()) tree small_ident1; + static GTY(()) tree small_ident2; + + static void + init_idents (void) + { + if (small_ident1 == 0) + { + small_ident1 = get_identifier ("small"); + small_ident2 = get_identifier ("__small__"); + } + } + + /* Retrieve the address area that has been chosen for the given decl. */ + + static ia64_addr_area + ia64_get_addr_area (tree decl) + { + tree model_attr; + + model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl)); + if (model_attr) + { + tree id; + + init_idents (); + id = TREE_VALUE (TREE_VALUE (model_attr)); + if (id == small_ident1 || id == small_ident2) + return ADDR_AREA_SMALL; + } + return ADDR_AREA_NORMAL; + } + + static tree + ia64_handle_model_attribute (tree *node, tree name, tree args, + int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) + { + ia64_addr_area addr_area = ADDR_AREA_NORMAL; + ia64_addr_area area; + tree arg, decl = *node; + + init_idents (); + arg = TREE_VALUE (args); + if (arg == small_ident1 || arg == small_ident2) + { + addr_area = ADDR_AREA_SMALL; + } + else + { + warning (OPT_Wattributes, "invalid argument of %qE attribute", + name); + *no_add_attrs = true; + } + + switch (TREE_CODE (decl)) + { + case VAR_DECL: + if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl)) + == FUNCTION_DECL) + && !TREE_STATIC (decl)) + { + error_at (DECL_SOURCE_LOCATION (decl), + "an address area attribute cannot be specified for " + "local variables"); + *no_add_attrs = true; + } + area = ia64_get_addr_area (decl); + if (area != ADDR_AREA_NORMAL && addr_area != area) + { + error ("address area of %q+D conflicts with previous " + "declaration", decl); + *no_add_attrs = true; + } + break; + + case FUNCTION_DECL: + error_at (DECL_SOURCE_LOCATION (decl), + "address area attribute cannot be specified for " + "functions"); + *no_add_attrs = true; + break; + + default: + warning (OPT_Wattributes, "%qE attribute ignored", + name); + *no_add_attrs = true; + break; + } + + return NULL_TREE; + } + + /* Part of the low level implementation of DEC Ada pragma Common_Object which + enables the shared use of variables stored in overlaid linker areas + corresponding to the use of Fortran COMMON. */ + + static tree + ia64_vms_common_object_attribute (tree *node, tree name, tree args, + int flags ATTRIBUTE_UNUSED, + bool *no_add_attrs) + { + tree decl = *node; + tree id; + + gcc_assert (DECL_P (decl)); + + DECL_COMMON (decl) = 1; + id = TREE_VALUE (args); + if (TREE_CODE (id) != IDENTIFIER_NODE && TREE_CODE (id) != STRING_CST) + { + error ("%qE attribute requires a string constant argument", name); + *no_add_attrs = true; + return NULL_TREE; + } + return NULL_TREE; + } + + /* Part of the low level implementation of DEC Ada pragma Common_Object. */ + + void + ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name, + unsigned HOST_WIDE_INT size, + unsigned int align) + { + tree attr = DECL_ATTRIBUTES (decl); + + if (attr) + attr = lookup_attribute ("common_object", attr); + if (attr) + { + tree id = TREE_VALUE (TREE_VALUE (attr)); + const char *name; + + if (TREE_CODE (id) == IDENTIFIER_NODE) + name = IDENTIFIER_POINTER (id); + else if (TREE_CODE (id) == STRING_CST) + name = TREE_STRING_POINTER (id); + else + abort (); + + fprintf (file, "\t.vms_common\t\"%s\",", name); + } + else + fprintf (file, "%s", COMMON_ASM_OP); + + /* Code from elfos.h. */ + assemble_name (file, name); + fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED",%u", + size, align / BITS_PER_UNIT); + + fputc ('\n', file); + } + + static void + ia64_encode_addr_area (tree decl, rtx symbol) + { + int flags; + + flags = SYMBOL_REF_FLAGS (symbol); + switch (ia64_get_addr_area (decl)) + { + case ADDR_AREA_NORMAL: break; + case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break; + default: gcc_unreachable (); + } + SYMBOL_REF_FLAGS (symbol) = flags; + } + + static void + ia64_encode_section_info (tree decl, rtx rtl, int first) + { + default_encode_section_info (decl, rtl, first); + + /* Careful not to prod global register variables. */ + if (TREE_CODE (decl) == VAR_DECL + && GET_CODE (DECL_RTL (decl)) == MEM + && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF + && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))) + ia64_encode_addr_area (decl, XEXP (rtl, 0)); + } + + /* Return 1 if the operands of a move are ok. */ + + int + ia64_move_ok (rtx dst, rtx src) + { + /* If we're under init_recog_no_volatile, we'll not be able to use + memory_operand. So check the code directly and don't worry about + the validity of the underlying address, which should have been + checked elsewhere anyway. */ + if (GET_CODE (dst) != MEM) + return 1; + if (GET_CODE (src) == MEM) + return 0; + if (register_operand (src, VOIDmode)) + return 1; + + /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */ + if (INTEGRAL_MODE_P (GET_MODE (dst))) + return src == const0_rtx; + else + return satisfies_constraint_G (src); + } + + /* Return 1 if the operands are ok for a floating point load pair. */ + + int + ia64_load_pair_ok (rtx dst, rtx src) + { + /* ??? There is a thinko in the implementation of the "x" constraint and the + FP_REGS class. The constraint will also reject (reg f30:TI) so we must + also return false for it. */ + if (GET_CODE (dst) != REG + || !(FP_REGNO_P (REGNO (dst)) && FP_REGNO_P (REGNO (dst) + 1))) + return 0; + if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src)) + return 0; + switch (GET_CODE (XEXP (src, 0))) + { + case REG: + case POST_INC: + break; + case POST_DEC: + return 0; + case POST_MODIFY: + { + rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1); + + if (GET_CODE (adjust) != CONST_INT + || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src))) + return 0; + } + break; + default: + abort (); + } + return 1; + } + + int + addp4_optimize_ok (rtx op1, rtx op2) + { + return (basereg_operand (op1, GET_MODE(op1)) != + basereg_operand (op2, GET_MODE(op2))); + } + + /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction. + Return the length of the field, or <= 0 on failure. */ + + int + ia64_depz_field_mask (rtx rop, rtx rshift) + { + unsigned HOST_WIDE_INT op = INTVAL (rop); + unsigned HOST_WIDE_INT shift = INTVAL (rshift); + + /* Get rid of the zero bits we're shifting in. */ + op >>= shift; + + /* We must now have a solid block of 1's at bit 0. */ + return exact_log2 (op + 1); + } + + /* Return the TLS model to use for ADDR. */ + + static enum tls_model + tls_symbolic_operand_type (rtx addr) + { + enum tls_model tls_kind = TLS_MODEL_NONE; + + if (GET_CODE (addr) == CONST) + { + if (GET_CODE (XEXP (addr, 0)) == PLUS + && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF) + tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0)); + } + else if (GET_CODE (addr) == SYMBOL_REF) + tls_kind = SYMBOL_REF_TLS_MODEL (addr); + + return tls_kind; + } + + /* Returns true if REG (assumed to be a `reg' RTX) is valid for use + as a base register. */ + + static inline bool + ia64_reg_ok_for_base_p (const_rtx reg, bool strict) + { + if (strict + && REGNO_OK_FOR_BASE_P (REGNO (reg))) + return true; + else if (!strict + && (GENERAL_REGNO_P (REGNO (reg)) + || !HARD_REGISTER_P (reg))) + return true; + else + return false; + } + + static bool + ia64_legitimate_address_reg (const_rtx reg, bool strict) + { + if ((REG_P (reg) && ia64_reg_ok_for_base_p (reg, strict)) + || (GET_CODE (reg) == SUBREG && REG_P (XEXP (reg, 0)) + && ia64_reg_ok_for_base_p (XEXP (reg, 0), strict))) + return true; + + return false; + } + + static bool + ia64_legitimate_address_disp (const_rtx reg, const_rtx disp, bool strict) + { + if (GET_CODE (disp) == PLUS + && rtx_equal_p (reg, XEXP (disp, 0)) + && (ia64_legitimate_address_reg (XEXP (disp, 1), strict) + || (CONST_INT_P (XEXP (disp, 1)) + && IN_RANGE (INTVAL (XEXP (disp, 1)), -256, 255)))) + return true; + + return false; + } + + /* Implement TARGET_LEGITIMATE_ADDRESS_P. */ + + static bool + ia64_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED, + rtx x, bool strict) + { + if (ia64_legitimate_address_reg (x, strict)) + return true; + else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC) + && ia64_legitimate_address_reg (XEXP (x, 0), strict) + && XEXP (x, 0) != arg_pointer_rtx) + return true; + else if (GET_CODE (x) == POST_MODIFY + && ia64_legitimate_address_reg (XEXP (x, 0), strict) + && XEXP (x, 0) != arg_pointer_rtx + && ia64_legitimate_address_disp (XEXP (x, 0), XEXP (x, 1), strict)) + return true; + else + return false; + } + + /* Return true if X is a constant that is valid for some immediate + field in an instruction. */ + + static bool + ia64_legitimate_constant_p (machine_mode mode, rtx x) + { + switch (GET_CODE (x)) + { + case CONST_INT: + case LABEL_REF: + return true; + + case CONST_DOUBLE: + if (GET_MODE (x) == VOIDmode || mode == SFmode || mode == DFmode) + return true; + return satisfies_constraint_G (x); + + case CONST: + case SYMBOL_REF: + /* ??? Short term workaround for PR 28490. We must make the code here + match the code in ia64_expand_move and move_operand, even though they + are both technically wrong. */ + if (tls_symbolic_operand_type (x) == 0) + { + HOST_WIDE_INT addend = 0; + rtx op = x; + + if (GET_CODE (op) == CONST + && GET_CODE (XEXP (op, 0)) == PLUS + && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT) + { + addend = INTVAL (XEXP (XEXP (op, 0), 1)); + op = XEXP (XEXP (op, 0), 0); + } + + if (any_offset_symbol_operand (op, mode) + || function_operand (op, mode)) + return true; + if (aligned_offset_symbol_operand (op, mode)) + return (addend & 0x3fff) == 0; + return false; + } + return false; + + case CONST_VECTOR: + if (mode == V2SFmode) + return satisfies_constraint_Y (x); + + return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT + && GET_MODE_SIZE (mode) <= 8); + + default: + return false; + } + } + + /* Don't allow TLS addresses to get spilled to memory. */ + + static bool + ia64_cannot_force_const_mem (machine_mode mode, rtx x) + { + if (mode == RFmode) + return true; + return tls_symbolic_operand_type (x) != 0; + } + + /* Expand a symbolic constant load. */ + + bool + ia64_expand_load_address (rtx dest, rtx src) + { + gcc_assert (GET_CODE (dest) == REG); + + /* ILP32 mode still loads 64-bits of data from the GOT. This avoids + having to pointer-extend the value afterward. Other forms of address + computation below are also more natural to compute as 64-bit quantities. + If we've been given an SImode destination register, change it. */ + if (GET_MODE (dest) != Pmode) + dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest), + byte_lowpart_offset (Pmode, GET_MODE (dest))); + + if (TARGET_NO_PIC) + return false; + if (small_addr_symbolic_operand (src, VOIDmode)) + return false; + + if (TARGET_AUTO_PIC) + emit_insn (gen_load_gprel64 (dest, src)); + else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src)) + emit_insn (gen_load_fptr (dest, src)); + else if (sdata_symbolic_operand (src, VOIDmode)) + emit_insn (gen_load_gprel (dest, src)); + else if (local_symbolic_operand64 (src, VOIDmode)) + { + /* We want to use @gprel rather than @ltoff relocations for local + symbols: + - @gprel does not require dynamic linker + - and does not use .sdata section + https://gcc.gnu.org/bugzilla/60465 */ + emit_insn (gen_load_gprel64 (dest, src)); + } + else + { + HOST_WIDE_INT addend = 0; + rtx tmp; + + /* We did split constant offsets in ia64_expand_move, and we did try + to keep them split in move_operand, but we also allowed reload to + rematerialize arbitrary constants rather than spill the value to + the stack and reload it. So we have to be prepared here to split + them apart again. */ + if (GET_CODE (src) == CONST) + { + HOST_WIDE_INT hi, lo; + + hi = INTVAL (XEXP (XEXP (src, 0), 1)); + lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000; + hi = hi - lo; + + if (lo != 0) + { + addend = lo; + src = plus_constant (Pmode, XEXP (XEXP (src, 0), 0), hi); + } + } + + tmp = gen_rtx_HIGH (Pmode, src); + tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx); + emit_insn (gen_rtx_SET (dest, tmp)); + + tmp = gen_rtx_LO_SUM (Pmode, gen_const_mem (Pmode, dest), src); + emit_insn (gen_rtx_SET (dest, tmp)); + + if (addend) + { + tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend)); + emit_insn (gen_rtx_SET (dest, tmp)); + } + } + + return true; + } + + static GTY(()) rtx gen_tls_tga; + static rtx + gen_tls_get_addr (void) + { + if (!gen_tls_tga) + gen_tls_tga = init_one_libfunc ("__tls_get_addr"); + return gen_tls_tga; + } + + static GTY(()) rtx thread_pointer_rtx; + static rtx + gen_thread_pointer (void) + { + if (!thread_pointer_rtx) + thread_pointer_rtx = gen_rtx_REG (Pmode, 13); + return thread_pointer_rtx; + } + + static rtx + ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1, + rtx orig_op1, HOST_WIDE_INT addend) + { + rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp; + rtx_insn *insns; + rtx orig_op0 = op0; + HOST_WIDE_INT addend_lo, addend_hi; + + switch (tls_kind) + { + case TLS_MODEL_GLOBAL_DYNAMIC: + start_sequence (); + + tga_op1 = gen_reg_rtx (Pmode); + emit_insn (gen_load_dtpmod (tga_op1, op1)); + + tga_op2 = gen_reg_rtx (Pmode); + emit_insn (gen_load_dtprel (tga_op2, op1)); + + tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX, + LCT_CONST, Pmode, + tga_op1, Pmode, tga_op2, Pmode); + + insns = get_insns (); + end_sequence (); + + if (GET_MODE (op0) != Pmode) + op0 = tga_ret; + emit_libcall_block (insns, op0, tga_ret, op1); + break; + + case TLS_MODEL_LOCAL_DYNAMIC: + /* ??? This isn't the completely proper way to do local-dynamic + If the call to __tls_get_addr is used only by a single symbol, + then we should (somehow) move the dtprel to the second arg + to avoid the extra add. */ + start_sequence (); + + tga_op1 = gen_reg_rtx (Pmode); + emit_insn (gen_load_dtpmod (tga_op1, op1)); + + tga_op2 = const0_rtx; + + tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX, + LCT_CONST, Pmode, + tga_op1, Pmode, tga_op2, Pmode); + + insns = get_insns (); + end_sequence (); + + tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), + UNSPEC_LD_BASE); + tmp = gen_reg_rtx (Pmode); + emit_libcall_block (insns, tmp, tga_ret, tga_eqv); + + if (!register_operand (op0, Pmode)) + op0 = gen_reg_rtx (Pmode); + if (TARGET_TLS64) + { + emit_insn (gen_load_dtprel (op0, op1)); + emit_insn (gen_adddi3 (op0, tmp, op0)); + } + else + emit_insn (gen_add_dtprel (op0, op1, tmp)); + break; + + case TLS_MODEL_INITIAL_EXEC: + addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000; + addend_hi = addend - addend_lo; + + op1 = plus_constant (Pmode, op1, addend_hi); + addend = addend_lo; + + tmp = gen_reg_rtx (Pmode); + emit_insn (gen_load_tprel (tmp, op1)); + + if (!register_operand (op0, Pmode)) + op0 = gen_reg_rtx (Pmode); + emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ())); + break; + + case TLS_MODEL_LOCAL_EXEC: + if (!register_operand (op0, Pmode)) + op0 = gen_reg_rtx (Pmode); + + op1 = orig_op1; + addend = 0; + if (TARGET_TLS64) + { + emit_insn (gen_load_tprel (op0, op1)); + emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ())); + } + else + emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ())); + break; + + default: + gcc_unreachable (); + } + + if (addend) + op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend), + orig_op0, 1, OPTAB_DIRECT); + if (orig_op0 == op0) + return NULL_RTX; + if (GET_MODE (orig_op0) == Pmode) + return op0; + return gen_lowpart (GET_MODE (orig_op0), op0); + } + + rtx + ia64_expand_move (rtx op0, rtx op1) + { + machine_mode mode = GET_MODE (op0); + + if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1)) + op1 = force_reg (mode, op1); + + if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode)) + { + HOST_WIDE_INT addend = 0; + enum tls_model tls_kind; + rtx sym = op1; + + if (GET_CODE (op1) == CONST + && GET_CODE (XEXP (op1, 0)) == PLUS + && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT) + { + addend = INTVAL (XEXP (XEXP (op1, 0), 1)); + sym = XEXP (XEXP (op1, 0), 0); + } + + tls_kind = tls_symbolic_operand_type (sym); + if (tls_kind) + return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend); + + if (any_offset_symbol_operand (sym, mode)) + addend = 0; + else if (aligned_offset_symbol_operand (sym, mode)) + { + HOST_WIDE_INT addend_lo, addend_hi; + + addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000; + addend_hi = addend - addend_lo; + + if (addend_lo != 0) + { + op1 = plus_constant (mode, sym, addend_hi); + addend = addend_lo; + } + else + addend = 0; + } + else + op1 = sym; + + if (reload_completed) + { + /* We really should have taken care of this offset earlier. */ + gcc_assert (addend == 0); + if (ia64_expand_load_address (op0, op1)) + return NULL_RTX; + } + + if (addend) + { + rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode); + + emit_insn (gen_rtx_SET (subtarget, op1)); + + op1 = expand_simple_binop (mode, PLUS, subtarget, + GEN_INT (addend), op0, 1, OPTAB_DIRECT); + if (op0 == op1) + return NULL_RTX; + } + } + + return op1; + } + + /* Split a move from OP1 to OP0 conditional on COND. */ + + void + ia64_emit_cond_move (rtx op0, rtx op1, rtx cond) + { + rtx_insn *insn, *first = get_last_insn (); + + emit_move_insn (op0, op1); + + for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn)) + if (INSN_P (insn)) + PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), + PATTERN (insn)); + } + + /* Split a post-reload TImode or TFmode reference into two DImode + components. This is made extra difficult by the fact that we do + not get any scratch registers to work with, because reload cannot + be prevented from giving us a scratch that overlaps the register + pair involved. So instead, when addressing memory, we tweak the + pointer register up and back down with POST_INCs. Or up and not + back down when we can get away with it. + + REVERSED is true when the loads must be done in reversed order + (high word first) for correctness. DEAD is true when the pointer + dies with the second insn we generate and therefore the second + address must not carry a postmodify. + + May return an insn which is to be emitted after the moves. */ + + static rtx + ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead) + { + rtx fixup = 0; + + switch (GET_CODE (in)) + { + case REG: + out[reversed] = gen_rtx_REG (DImode, REGNO (in)); + out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1); + break; + + case CONST_INT: + case CONST_DOUBLE: + /* Cannot occur reversed. */ + gcc_assert (!reversed); + + if (GET_MODE (in) != TFmode) + split_double (in, &out[0], &out[1]); + else + /* split_double does not understand how to split a TFmode + quantity into a pair of DImode constants. */ + { + unsigned HOST_WIDE_INT p[2]; + long l[4]; /* TFmode is 128 bits */ + + real_to_target (l, CONST_DOUBLE_REAL_VALUE (in), TFmode); + + if (FLOAT_WORDS_BIG_ENDIAN) + { + p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1]; + p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3]; + } + else + { + p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0]; + p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2]; + } + out[0] = GEN_INT (p[0]); + out[1] = GEN_INT (p[1]); + } + break; + + case MEM: + { + rtx base = XEXP (in, 0); + rtx offset; + + switch (GET_CODE (base)) + { + case REG: + if (!reversed) + { + out[0] = adjust_automodify_address + (in, DImode, gen_rtx_POST_INC (Pmode, base), 0); + out[1] = adjust_automodify_address + (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8); + } + else + { + /* Reversal requires a pre-increment, which can only + be done as a separate insn. */ + emit_insn (gen_adddi3 (base, base, GEN_INT (8))); + out[0] = adjust_automodify_address + (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8); + out[1] = adjust_address (in, DImode, 0); + } + break; + + case POST_INC: + gcc_assert (!reversed && !dead); + + /* Just do the increment in two steps. */ + out[0] = adjust_automodify_address (in, DImode, 0, 0); + out[1] = adjust_automodify_address (in, DImode, 0, 8); + break; + + case POST_DEC: + gcc_assert (!reversed && !dead); + + /* Add 8, subtract 24. */ + base = XEXP (base, 0); + out[0] = adjust_automodify_address + (in, DImode, gen_rtx_POST_INC (Pmode, base), 0); + out[1] = adjust_automodify_address + (in, DImode, + gen_rtx_POST_MODIFY (Pmode, base, + plus_constant (Pmode, base, -24)), + 8); + break; + + case POST_MODIFY: + gcc_assert (!reversed && !dead); + + /* Extract and adjust the modification. This case is + trickier than the others, because we might have an + index register, or we might have a combined offset that + doesn't fit a signed 9-bit displacement field. We can + assume the incoming expression is already legitimate. */ + offset = XEXP (base, 1); + base = XEXP (base, 0); + + out[0] = adjust_automodify_address + (in, DImode, gen_rtx_POST_INC (Pmode, base), 0); + + if (GET_CODE (XEXP (offset, 1)) == REG) + { + /* Can't adjust the postmodify to match. Emit the + original, then a separate addition insn. */ + out[1] = adjust_automodify_address (in, DImode, 0, 8); + fixup = gen_adddi3 (base, base, GEN_INT (-8)); + } + else + { + gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT); + if (INTVAL (XEXP (offset, 1)) < -256 + 8) + { + /* Again the postmodify cannot be made to match, + but in this case it's more efficient to get rid + of the postmodify entirely and fix up with an + add insn. */ + out[1] = adjust_automodify_address (in, DImode, base, 8); + fixup = gen_adddi3 + (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8)); + } + else + { + /* Combined offset still fits in the displacement field. + (We cannot overflow it at the high end.) */ + out[1] = adjust_automodify_address + (in, DImode, gen_rtx_POST_MODIFY + (Pmode, base, gen_rtx_PLUS + (Pmode, base, + GEN_INT (INTVAL (XEXP (offset, 1)) - 8))), + 8); + } + } + break; + + default: + gcc_unreachable (); + } + break; + } + + default: + gcc_unreachable (); + } + + return fixup; + } + + /* Split a TImode or TFmode move instruction after reload. + This is used by *movtf_internal and *movti_internal. */ + void + ia64_split_tmode_move (rtx operands[]) + { + rtx in[2], out[2], insn; + rtx fixup[2]; + bool dead = false; + bool reversed = false; + + /* It is possible for reload to decide to overwrite a pointer with + the value it points to. In that case we have to do the loads in + the appropriate order so that the pointer is not destroyed too + early. Also we must not generate a postmodify for that second + load, or rws_access_regno will die. And we must not generate a + postmodify for the second load if the destination register + overlaps with the base register. */ + if (GET_CODE (operands[1]) == MEM + && reg_overlap_mentioned_p (operands[0], operands[1])) + { + rtx base = XEXP (operands[1], 0); + while (GET_CODE (base) != REG) + base = XEXP (base, 0); + + if (REGNO (base) == REGNO (operands[0])) + reversed = true; + + if (refers_to_regno_p (REGNO (operands[0]), + REGNO (operands[0])+2, + base, 0)) + dead = true; + } + /* Another reason to do the moves in reversed order is if the first + element of the target register pair is also the second element of + the source register pair. */ + if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG + && REGNO (operands[0]) == REGNO (operands[1]) + 1) + reversed = true; + + fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead); + fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead); + + #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \ + if (GET_CODE (EXP) == MEM \ + && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \ + || GET_CODE (XEXP (EXP, 0)) == POST_INC \ + || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \ + add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0)) + + insn = emit_insn (gen_rtx_SET (out[0], in[0])); + MAYBE_ADD_REG_INC_NOTE (insn, in[0]); + MAYBE_ADD_REG_INC_NOTE (insn, out[0]); + + insn = emit_insn (gen_rtx_SET (out[1], in[1])); + MAYBE_ADD_REG_INC_NOTE (insn, in[1]); + MAYBE_ADD_REG_INC_NOTE (insn, out[1]); + + if (fixup[0]) + emit_insn (fixup[0]); + if (fixup[1]) + emit_insn (fixup[1]); + + #undef MAYBE_ADD_REG_INC_NOTE + } + + /* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go + through memory plus an extra GR scratch register. Except that you can + either get the first from TARGET_SECONDARY_MEMORY_NEEDED or the second + from SECONDARY_RELOAD_CLASS, but not both. + + We got into problems in the first place by allowing a construct like + (subreg:XF (reg:TI)), which we got from a union containing a long double. + This solution attempts to prevent this situation from occurring. When + we see something like the above, we spill the inner register to memory. */ + + static rtx + spill_xfmode_rfmode_operand (rtx in, int force, machine_mode mode) + { + if (GET_CODE (in) == SUBREG + && GET_MODE (SUBREG_REG (in)) == TImode + && GET_CODE (SUBREG_REG (in)) == REG) + { + rtx memt = assign_stack_temp (TImode, 16); + emit_move_insn (memt, SUBREG_REG (in)); + return adjust_address (memt, mode, 0); + } + else if (force && GET_CODE (in) == REG) + { + rtx memx = assign_stack_temp (mode, 16); + emit_move_insn (memx, in); + return memx; + } + else + return in; + } + + /* Expand the movxf or movrf pattern (MODE says which) with the given + OPERANDS, returning true if the pattern should then invoke + DONE. */ + + bool + ia64_expand_movxf_movrf (machine_mode mode, rtx operands[]) + { + rtx op0 = operands[0]; + + if (GET_CODE (op0) == SUBREG) + op0 = SUBREG_REG (op0); + + /* We must support XFmode loads into general registers for stdarg/vararg, + unprototyped calls, and a rare case where a long double is passed as + an argument after a float HFA fills the FP registers. We split them into + DImode loads for convenience. We also need to support XFmode stores + for the last case. This case does not happen for stdarg/vararg routines, + because we do a block store to memory of unnamed arguments. */ + + if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0))) + { + rtx out[2]; + + /* We're hoping to transform everything that deals with XFmode + quantities and GR registers early in the compiler. */ + gcc_assert (can_create_pseudo_p ()); + + /* Struct to register can just use TImode instead. */ + if ((GET_CODE (operands[1]) == SUBREG + && GET_MODE (SUBREG_REG (operands[1])) == TImode) + || (GET_CODE (operands[1]) == REG + && GR_REGNO_P (REGNO (operands[1])))) + { + rtx op1 = operands[1]; + + if (GET_CODE (op1) == SUBREG) + op1 = SUBREG_REG (op1); + else + op1 = gen_rtx_REG (TImode, REGNO (op1)); + + emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1); + return true; + } + + if (GET_CODE (operands[1]) == CONST_DOUBLE) + { + /* Don't word-swap when reading in the constant. */ + emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)), + operand_subword (operands[1], WORDS_BIG_ENDIAN, + 0, mode)); + emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1), + operand_subword (operands[1], !WORDS_BIG_ENDIAN, + 0, mode)); + return true; + } + + /* If the quantity is in a register not known to be GR, spill it. */ + if (register_operand (operands[1], mode)) + operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode); + + gcc_assert (GET_CODE (operands[1]) == MEM); + + /* Don't word-swap when reading in the value. */ + out[0] = gen_rtx_REG (DImode, REGNO (op0)); + out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1); + + emit_move_insn (out[0], adjust_address (operands[1], DImode, 0)); + emit_move_insn (out[1], adjust_address (operands[1], DImode, 8)); + return true; + } + + if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1]))) + { + /* We're hoping to transform everything that deals with XFmode + quantities and GR registers early in the compiler. */ + gcc_assert (can_create_pseudo_p ()); + + /* Op0 can't be a GR_REG here, as that case is handled above. + If op0 is a register, then we spill op1, so that we now have a + MEM operand. This requires creating an XFmode subreg of a TImode reg + to force the spill. */ + if (register_operand (operands[0], mode)) + { + rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1])); + op1 = gen_rtx_SUBREG (mode, op1, 0); + operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode); + } + + else + { + rtx in[2]; + + gcc_assert (GET_CODE (operands[0]) == MEM); + + /* Don't word-swap when writing out the value. */ + in[0] = gen_rtx_REG (DImode, REGNO (operands[1])); + in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1); + + emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]); + emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]); + return true; + } + } + + if (!reload_in_progress && !reload_completed) + { + operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode); + + if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG) + { + rtx memt, memx, in = operands[1]; + if (CONSTANT_P (in)) + in = validize_mem (force_const_mem (mode, in)); + if (GET_CODE (in) == MEM) + memt = adjust_address (in, TImode, 0); + else + { + memt = assign_stack_temp (TImode, 16); + memx = adjust_address (memt, mode, 0); + emit_move_insn (memx, in); + } + emit_move_insn (op0, memt); + return true; + } + + if (!ia64_move_ok (operands[0], operands[1])) + operands[1] = force_reg (mode, operands[1]); + } + + return false; + } + + /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1 + with the expression that holds the compare result (in VOIDmode). */ + + static GTY(()) rtx cmptf_libfunc; + + void + ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1) + { + enum rtx_code code = GET_CODE (*expr); + rtx cmp; + + /* If we have a BImode input, then we already have a compare result, and + do not need to emit another comparison. */ + if (GET_MODE (*op0) == BImode) + { + gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx); + cmp = *op0; + } + /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a + magic number as its third argument, that indicates what to do. + The return value is an integer to be compared against zero. */ + else if (TARGET_HPUX && GET_MODE (*op0) == TFmode) + { + enum qfcmp_magic { + QCMP_INV = 1, /* Raise FP_INVALID on NaNs as a side effect. */ + QCMP_UNORD = 2, + QCMP_EQ = 4, + QCMP_LT = 8, + QCMP_GT = 16 + }; + int magic; + enum rtx_code ncode; + rtx ret; + + gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode); + switch (code) + { + /* 1 = equal, 0 = not equal. Equality operators do + not raise FP_INVALID when given a NaN operand. */ + case EQ: magic = QCMP_EQ; ncode = NE; break; + case NE: magic = QCMP_EQ; ncode = EQ; break; + /* isunordered() from C99. */ + case UNORDERED: magic = QCMP_UNORD; ncode = NE; break; + case ORDERED: magic = QCMP_UNORD; ncode = EQ; break; + /* Relational operators raise FP_INVALID when given + a NaN operand. */ + case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break; + case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break; + case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break; + case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break; + /* Unordered relational operators do not raise FP_INVALID + when given a NaN operand. */ + case UNLT: magic = QCMP_LT |QCMP_UNORD; ncode = NE; break; + case UNLE: magic = QCMP_LT|QCMP_EQ|QCMP_UNORD; ncode = NE; break; + case UNGT: magic = QCMP_GT |QCMP_UNORD; ncode = NE; break; + case UNGE: magic = QCMP_GT|QCMP_EQ|QCMP_UNORD; ncode = NE; break; + /* Not supported. */ + case UNEQ: + case LTGT: + default: gcc_unreachable (); + } + + start_sequence (); + + ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, + *op0, TFmode, *op1, TFmode, + GEN_INT (magic), DImode); + cmp = gen_reg_rtx (BImode); + emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (ncode, BImode, + ret, const0_rtx))); + + rtx_insn *insns = get_insns (); + end_sequence (); + + emit_libcall_block (insns, cmp, cmp, + gen_rtx_fmt_ee (code, BImode, *op0, *op1)); + code = NE; + } + else + { + cmp = gen_reg_rtx (BImode); + emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (code, BImode, *op0, *op1))); + code = NE; + } + + *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx); + *op0 = cmp; + *op1 = const0_rtx; + } + + /* Generate an integral vector comparison. Return true if the condition has + been reversed, and so the sense of the comparison should be inverted. */ + + static bool + ia64_expand_vecint_compare (enum rtx_code code, machine_mode mode, + rtx dest, rtx op0, rtx op1) + { + bool negate = false; + rtx x; + + /* Canonicalize the comparison to EQ, GT, GTU. */ + switch (code) + { + case EQ: + case GT: + case GTU: + break; + + case NE: + case LE: + case LEU: + code = reverse_condition (code); + negate = true; + break; + + case GE: + case GEU: + code = reverse_condition (code); + negate = true; + /* FALLTHRU */ + + case LT: + case LTU: + code = swap_condition (code); + x = op0, op0 = op1, op1 = x; + break; + + default: + gcc_unreachable (); + } + + /* Unsigned parallel compare is not supported by the hardware. Play some + tricks to turn this into a signed comparison against 0. */ + if (code == GTU) + { + switch (mode) + { + case E_V2SImode: + { + rtx t1, t2, mask; + + /* Subtract (-(INT MAX) - 1) from both operands to make + them signed. */ + mask = gen_int_mode (0x80000000, SImode); + mask = gen_const_vec_duplicate (V2SImode, mask); + mask = force_reg (mode, mask); + t1 = gen_reg_rtx (mode); + emit_insn (gen_subv2si3 (t1, op0, mask)); + t2 = gen_reg_rtx (mode); + emit_insn (gen_subv2si3 (t2, op1, mask)); + op0 = t1; + op1 = t2; + code = GT; + } + break; + + case E_V8QImode: + case E_V4HImode: + /* Perform a parallel unsigned saturating subtraction. */ + x = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, op0, op1))); + + code = EQ; + op0 = x; + op1 = CONST0_RTX (mode); + negate = !negate; + break; + + default: + gcc_unreachable (); + } + } + + x = gen_rtx_fmt_ee (code, mode, op0, op1); + emit_insn (gen_rtx_SET (dest, x)); + + return negate; + } + + /* Emit an integral vector conditional move. */ + + void + ia64_expand_vecint_cmov (rtx operands[]) + { + machine_mode mode = GET_MODE (operands[0]); + enum rtx_code code = GET_CODE (operands[3]); + bool negate; + rtx cmp, x, ot, of; + + cmp = gen_reg_rtx (mode); + negate = ia64_expand_vecint_compare (code, mode, cmp, + operands[4], operands[5]); + + ot = operands[1+negate]; + of = operands[2-negate]; + + if (ot == CONST0_RTX (mode)) + { + if (of == CONST0_RTX (mode)) + { + emit_move_insn (operands[0], ot); + return; + } + + x = gen_rtx_NOT (mode, cmp); + x = gen_rtx_AND (mode, x, of); + emit_insn (gen_rtx_SET (operands[0], x)); + } + else if (of == CONST0_RTX (mode)) + { + x = gen_rtx_AND (mode, cmp, ot); + emit_insn (gen_rtx_SET (operands[0], x)); + } + else + { + rtx t, f; + + t = gen_reg_rtx (mode); + x = gen_rtx_AND (mode, cmp, operands[1+negate]); + emit_insn (gen_rtx_SET (t, x)); + + f = gen_reg_rtx (mode); + x = gen_rtx_NOT (mode, cmp); + x = gen_rtx_AND (mode, x, operands[2-negate]); + emit_insn (gen_rtx_SET (f, x)); + + x = gen_rtx_IOR (mode, t, f); + emit_insn (gen_rtx_SET (operands[0], x)); + } + } + + /* Emit an integral vector min or max operation. Return true if all done. */ + + bool + ia64_expand_vecint_minmax (enum rtx_code code, machine_mode mode, + rtx operands[]) + { + rtx xops[6]; + + /* These four combinations are supported directly. */ + if (mode == V8QImode && (code == UMIN || code == UMAX)) + return false; + if (mode == V4HImode && (code == SMIN || code == SMAX)) + return false; + + /* This combination can be implemented with only saturating subtraction. */ + if (mode == V4HImode && code == UMAX) + { + rtx x, tmp = gen_reg_rtx (mode); + + x = gen_rtx_US_MINUS (mode, operands[1], operands[2]); + emit_insn (gen_rtx_SET (tmp, x)); + + emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2])); + return true; + } + + /* Everything else implemented via vector comparisons. */ + xops[0] = operands[0]; + xops[4] = xops[1] = operands[1]; + xops[5] = xops[2] = operands[2]; + + switch (code) + { + case UMIN: + code = LTU; + break; + case UMAX: + code = GTU; + break; + case SMIN: + code = LT; + break; + case SMAX: + code = GT; + break; + default: + gcc_unreachable (); + } + xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]); + + ia64_expand_vecint_cmov (xops); + return true; + } + + /* The vectors LO and HI each contain N halves of a double-wide vector. + Reassemble either the first N/2 or the second N/2 elements. */ + + void + ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp) + { + machine_mode vmode = GET_MODE (lo); + unsigned int i, high, nelt = GET_MODE_NUNITS (vmode); + struct expand_vec_perm_d d; + bool ok; + + d.target = gen_lowpart (vmode, out); + d.op0 = (TARGET_BIG_ENDIAN ? hi : lo); + d.op1 = (TARGET_BIG_ENDIAN ? lo : hi); + d.vmode = vmode; + d.nelt = nelt; + d.one_operand_p = false; + d.testing_p = false; + + high = (highp ? nelt / 2 : 0); + for (i = 0; i < nelt / 2; ++i) + { + d.perm[i * 2] = i + high; + d.perm[i * 2 + 1] = i + high + nelt; + } + + ok = ia64_expand_vec_perm_const_1 (&d); + gcc_assert (ok); + } + + /* Return a vector of the sign-extension of VEC. */ + + static rtx + ia64_unpack_sign (rtx vec, bool unsignedp) + { + machine_mode mode = GET_MODE (vec); + rtx zero = CONST0_RTX (mode); + + if (unsignedp) + return zero; + else + { + rtx sign = gen_reg_rtx (mode); + bool neg; + + neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero); + gcc_assert (!neg); + + return sign; + } + } + + /* Emit an integral vector unpack operation. */ + + void + ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp) + { + rtx sign = ia64_unpack_sign (operands[1], unsignedp); + ia64_unpack_assemble (operands[0], operands[1], sign, highp); + } + + /* Emit an integral vector widening sum operations. */ + + void + ia64_expand_widen_sum (rtx operands[3], bool unsignedp) + { + machine_mode wmode; + rtx l, h, t, sign; + + sign = ia64_unpack_sign (operands[1], unsignedp); + + wmode = GET_MODE (operands[0]); + l = gen_reg_rtx (wmode); + h = gen_reg_rtx (wmode); + + ia64_unpack_assemble (l, operands[1], sign, false); + ia64_unpack_assemble (h, operands[1], sign, true); + + t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT); + t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT); + if (t != operands[0]) + emit_move_insn (operands[0], t); + } + + /* Emit the appropriate sequence for a call. */ + + void + ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED, + int sibcall_p) + { + rtx insn, b0; + + addr = XEXP (addr, 0); + addr = convert_memory_address (DImode, addr); + b0 = gen_rtx_REG (DImode, R_BR (0)); + + /* ??? Should do this for functions known to bind local too. */ + if (TARGET_NO_PIC || TARGET_AUTO_PIC) + { + if (sibcall_p) + insn = gen_sibcall_nogp (addr); + else if (! retval) + insn = gen_call_nogp (addr, b0); + else + insn = gen_call_value_nogp (retval, addr, b0); + insn = emit_call_insn (insn); + } + else + { + if (sibcall_p) + insn = gen_sibcall_gp (addr); + else if (! retval) + insn = gen_call_gp (addr, b0); + else + insn = gen_call_value_gp (retval, addr, b0); + insn = emit_call_insn (insn); + + use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx); + } + + if (sibcall_p) + use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0); + + if (TARGET_ABI_OPEN_VMS) + use_reg (&CALL_INSN_FUNCTION_USAGE (insn), + gen_rtx_REG (DImode, GR_REG (25))); + } + + static void + reg_emitted (enum ia64_frame_regs r) + { + if (emitted_frame_related_regs[r] == 0) + emitted_frame_related_regs[r] = current_frame_info.r[r]; + else + gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]); + } + + static int + get_reg (enum ia64_frame_regs r) + { + reg_emitted (r); + return current_frame_info.r[r]; + } + + static bool + is_emitted (int regno) + { + unsigned int r; + + for (r = reg_fp; r < number_of_ia64_frame_regs; r++) + if (emitted_frame_related_regs[r] == regno) + return true; + return false; + } + + void + ia64_reload_gp (void) + { + rtx tmp; + + if (current_frame_info.r[reg_save_gp]) + { + tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp)); + } + else + { + HOST_WIDE_INT offset; + rtx offset_r; + + offset = (current_frame_info.spill_cfa_off + + current_frame_info.spill_size); + if (frame_pointer_needed) + { + tmp = hard_frame_pointer_rtx; + offset = -offset; + } + else + { + tmp = stack_pointer_rtx; + offset = current_frame_info.total_size - offset; + } + + offset_r = GEN_INT (offset); + if (satisfies_constraint_I (offset_r)) + emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r)); + else + { + emit_move_insn (pic_offset_table_rtx, offset_r); + emit_insn (gen_adddi3 (pic_offset_table_rtx, + pic_offset_table_rtx, tmp)); + } + + tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx); + } + + emit_move_insn (pic_offset_table_rtx, tmp); + } + + void + ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r, + rtx scratch_b, int noreturn_p, int sibcall_p) + { + rtx insn; + bool is_desc = false; + + /* If we find we're calling through a register, then we're actually + calling through a descriptor, so load up the values. */ + if (REG_P (addr) && GR_REGNO_P (REGNO (addr))) + { + rtx tmp; + bool addr_dead_p; + + /* ??? We are currently constrained to *not* use peep2, because + we can legitimately change the global lifetime of the GP + (in the form of killing where previously live). This is + because a call through a descriptor doesn't use the previous + value of the GP, while a direct call does, and we do not + commit to either form until the split here. + + That said, this means that we lack precise life info for + whether ADDR is dead after this call. This is not terribly + important, since we can fix things up essentially for free + with the POST_DEC below, but it's nice to not use it when we + can immediately tell it's not necessary. */ + addr_dead_p = ((noreturn_p || sibcall_p + || TEST_HARD_REG_BIT (regs_invalidated_by_call, + REGNO (addr))) + && !FUNCTION_ARG_REGNO_P (REGNO (addr))); + + /* Load the code address into scratch_b. */ + tmp = gen_rtx_POST_INC (Pmode, addr); + tmp = gen_rtx_MEM (Pmode, tmp); + emit_move_insn (scratch_r, tmp); + emit_move_insn (scratch_b, scratch_r); + + /* Load the GP address. If ADDR is not dead here, then we must + revert the change made above via the POST_INCREMENT. */ + if (!addr_dead_p) + tmp = gen_rtx_POST_DEC (Pmode, addr); + else + tmp = addr; + tmp = gen_rtx_MEM (Pmode, tmp); + emit_move_insn (pic_offset_table_rtx, tmp); + + is_desc = true; + addr = scratch_b; + } + + if (sibcall_p) + insn = gen_sibcall_nogp (addr); + else if (retval) + insn = gen_call_value_nogp (retval, addr, retaddr); + else + insn = gen_call_nogp (addr, retaddr); + emit_call_insn (insn); + + if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p) + ia64_reload_gp (); + } + + /* Expand an atomic operation. We want to perform MEM = VAL atomically. + + This differs from the generic code in that we know about the zero-extending + properties of cmpxchg, and the zero-extending requirements of ar.ccv. We + also know that ld.acq+cmpxchg.rel equals a full barrier. + + The loop we want to generate looks like + + cmp_reg = mem; + label: + old_reg = cmp_reg; + new_reg = cmp_reg op val; + cmp_reg = compare-and-swap(mem, old_reg, new_reg) + if (cmp_reg != old_reg) + goto label; + + Note that we only do the plain load from memory once. Subsequent + iterations use the value loaded by the compare-and-swap pattern. */ + + void + ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val, + rtx old_dst, rtx new_dst, enum memmodel model) + { + machine_mode mode = GET_MODE (mem); + rtx old_reg, new_reg, cmp_reg, ar_ccv, label; + enum insn_code icode; + + /* Special case for using fetchadd. */ + if ((mode == SImode || mode == DImode) + && (code == PLUS || code == MINUS) + && fetchadd_operand (val, mode)) + { + if (code == MINUS) + val = GEN_INT (-INTVAL (val)); + + if (!old_dst) + old_dst = gen_reg_rtx (mode); + + switch (model) + { + case MEMMODEL_ACQ_REL: + case MEMMODEL_SEQ_CST: + case MEMMODEL_SYNC_SEQ_CST: + emit_insn (gen_memory_barrier ()); + /* FALLTHRU */ + case MEMMODEL_RELAXED: + case MEMMODEL_ACQUIRE: + case MEMMODEL_SYNC_ACQUIRE: + case MEMMODEL_CONSUME: + if (mode == SImode) + icode = CODE_FOR_fetchadd_acq_si; + else + icode = CODE_FOR_fetchadd_acq_di; + break; + case MEMMODEL_RELEASE: + case MEMMODEL_SYNC_RELEASE: + if (mode == SImode) + icode = CODE_FOR_fetchadd_rel_si; + else + icode = CODE_FOR_fetchadd_rel_di; + break; + + default: + gcc_unreachable (); + } + + emit_insn (GEN_FCN (icode) (old_dst, mem, val)); + + if (new_dst) + { + new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst, + true, OPTAB_WIDEN); + if (new_reg != new_dst) + emit_move_insn (new_dst, new_reg); + } + return; + } + + /* Because of the volatile mem read, we get an ld.acq, which is the + front half of the full barrier. The end half is the cmpxchg.rel. + For relaxed and release memory models, we don't need this. But we + also don't bother trying to prevent it either. */ + gcc_assert (is_mm_relaxed (model) || is_mm_release (model) + || MEM_VOLATILE_P (mem)); + + old_reg = gen_reg_rtx (DImode); + cmp_reg = gen_reg_rtx (DImode); + label = gen_label_rtx (); + + if (mode != DImode) + { + val = simplify_gen_subreg (DImode, val, mode, 0); + emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1)); + } + else + emit_move_insn (cmp_reg, mem); + + emit_label (label); + + ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM); + emit_move_insn (old_reg, cmp_reg); + emit_move_insn (ar_ccv, cmp_reg); + + if (old_dst) + emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg)); + + new_reg = cmp_reg; + if (code == NOT) + { + new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX, + true, OPTAB_DIRECT); + new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true); + } + else + new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX, + true, OPTAB_DIRECT); + + if (mode != DImode) + new_reg = gen_lowpart (mode, new_reg); + if (new_dst) + emit_move_insn (new_dst, new_reg); + + switch (model) + { + case MEMMODEL_RELAXED: + case MEMMODEL_ACQUIRE: + case MEMMODEL_SYNC_ACQUIRE: + case MEMMODEL_CONSUME: + switch (mode) + { + case E_QImode: icode = CODE_FOR_cmpxchg_acq_qi; break; + case E_HImode: icode = CODE_FOR_cmpxchg_acq_hi; break; + case E_SImode: icode = CODE_FOR_cmpxchg_acq_si; break; + case E_DImode: icode = CODE_FOR_cmpxchg_acq_di; break; + default: + gcc_unreachable (); + } + break; + + case MEMMODEL_RELEASE: + case MEMMODEL_SYNC_RELEASE: + case MEMMODEL_ACQ_REL: + case MEMMODEL_SEQ_CST: + case MEMMODEL_SYNC_SEQ_CST: + switch (mode) + { + case E_QImode: icode = CODE_FOR_cmpxchg_rel_qi; break; + case E_HImode: icode = CODE_FOR_cmpxchg_rel_hi; break; + case E_SImode: icode = CODE_FOR_cmpxchg_rel_si; break; + case E_DImode: icode = CODE_FOR_cmpxchg_rel_di; break; + default: + gcc_unreachable (); + } + break; + + default: + gcc_unreachable (); + } + + emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg)); + + emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label); + } + + /* Begin the assembly file. */ + + static void + ia64_file_start (void) + { + default_file_start (); + emit_safe_across_calls (); + } + + void + emit_safe_across_calls (void) + { + unsigned int rs, re; + int out_state; + + rs = 1; + out_state = 0; + while (1) + { + while (rs < 64 && call_used_or_fixed_reg_p (PR_REG (rs))) + rs++; + if (rs >= 64) + break; + for (re = rs + 1; + re < 64 && ! call_used_or_fixed_reg_p (PR_REG (re)); re++) + continue; + if (out_state == 0) + { + fputs ("\t.pred.safe_across_calls ", asm_out_file); + out_state = 1; + } + else + fputc (',', asm_out_file); + if (re == rs + 1) + fprintf (asm_out_file, "p%u", rs); + else + fprintf (asm_out_file, "p%u-p%u", rs, re - 1); + rs = re + 1; + } + if (out_state) + fputc ('\n', asm_out_file); + } + + /* Globalize a declaration. */ + + static void + ia64_globalize_decl_name (FILE * stream, tree decl) + { + const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0); + tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl)); + if (version_attr) + { + tree v = TREE_VALUE (TREE_VALUE (version_attr)); + const char *p = TREE_STRING_POINTER (v); + fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p); + } + targetm.asm_out.globalize_label (stream, name); + if (TREE_CODE (decl) == FUNCTION_DECL) + ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function"); + } + + /* Helper function for ia64_compute_frame_size: find an appropriate general + register to spill some special register to. SPECIAL_SPILL_MASK contains + bits in GR0 to GR31 that have already been allocated by this routine. + TRY_LOCALS is true if we should attempt to locate a local regnum. */ + + static int + find_gr_spill (enum ia64_frame_regs r, int try_locals) + { + int regno; + + if (emitted_frame_related_regs[r] != 0) + { + regno = emitted_frame_related_regs[r]; + if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed) + && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1) + current_frame_info.n_local_regs = regno - LOC_REG (0) + 1; + else if (crtl->is_leaf + && regno >= GR_REG (1) && regno <= GR_REG (31)) + current_frame_info.gr_used_mask |= 1 << regno; + + return regno; + } + + /* If this is a leaf function, first try an otherwise unused + call-clobbered register. */ + if (crtl->is_leaf) + { + for (regno = GR_REG (1); regno <= GR_REG (31); regno++) + if (! df_regs_ever_live_p (regno) + && call_used_or_fixed_reg_p (regno) + && ! fixed_regs[regno] + && ! global_regs[regno] + && ((current_frame_info.gr_used_mask >> regno) & 1) == 0 + && ! is_emitted (regno)) + { + current_frame_info.gr_used_mask |= 1 << regno; + return regno; + } + } + + if (try_locals) + { + regno = current_frame_info.n_local_regs; + /* If there is a frame pointer, then we can't use loc79, because + that is HARD_FRAME_POINTER_REGNUM. In particular, see the + reg_name switching code in ia64_expand_prologue. */ + while (regno < (80 - frame_pointer_needed)) + if (! is_emitted (LOC_REG (regno++))) + { + current_frame_info.n_local_regs = regno; + return LOC_REG (regno - 1); + } + } + + /* Failed to find a general register to spill to. Must use stack. */ + return 0; + } + + /* In order to make for nice schedules, we try to allocate every temporary + to a different register. We must of course stay away from call-saved, + fixed, and global registers. We must also stay away from registers + allocated in current_frame_info.gr_used_mask, since those include regs + used all through the prologue. + + Any register allocated here must be used immediately. The idea is to + aid scheduling, not to solve data flow problems. */ + + static int last_scratch_gr_reg; + + static int + next_scratch_gr_reg (void) + { + int i, regno; + + for (i = 0; i < 32; ++i) + { + regno = (last_scratch_gr_reg + i + 1) & 31; + if (call_used_or_fixed_reg_p (regno) + && ! fixed_regs[regno] + && ! global_regs[regno] + && ((current_frame_info.gr_used_mask >> regno) & 1) == 0) + { + last_scratch_gr_reg = regno; + return regno; + } + } + + /* There must be _something_ available. */ + gcc_unreachable (); + } + + /* Helper function for ia64_compute_frame_size, called through + diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */ + + static void + mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED) + { + unsigned int regno = REGNO (reg); + if (regno < 32) + { + unsigned int i, n = REG_NREGS (reg); + for (i = 0; i < n; ++i) + current_frame_info.gr_used_mask |= 1 << (regno + i); + } + } + + + /* Returns the number of bytes offset between the frame pointer and the stack + pointer for the current function. SIZE is the number of bytes of space + needed for local variables. */ + + static void + ia64_compute_frame_size (HOST_WIDE_INT size) + { + HOST_WIDE_INT total_size; + HOST_WIDE_INT spill_size = 0; + HOST_WIDE_INT extra_spill_size = 0; + HOST_WIDE_INT pretend_args_size; + HARD_REG_SET mask; + int n_spilled = 0; + int spilled_gr_p = 0; + int spilled_fr_p = 0; + unsigned int regno; + int min_regno; + int max_regno; + int i; + + if (current_frame_info.initialized) + return; + + memset (¤t_frame_info, 0, sizeof current_frame_info); + CLEAR_HARD_REG_SET (mask); + + /* Don't allocate scratches to the return register. */ + diddle_return_value (mark_reg_gr_used_mask, NULL); + + /* Don't allocate scratches to the EH scratch registers. */ + if (cfun->machine->ia64_eh_epilogue_sp) + mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL); + if (cfun->machine->ia64_eh_epilogue_bsp) + mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL); + + /* Static stack checking uses r2 and r3. */ + if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK + || flag_stack_clash_protection) + current_frame_info.gr_used_mask |= 0xc; + + /* Find the size of the register stack frame. We have only 80 local + registers, because we reserve 8 for the inputs and 8 for the + outputs. */ + + /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed, + since we'll be adjusting that down later. */ + regno = LOC_REG (78) + ! frame_pointer_needed; + for (; regno >= LOC_REG (0); regno--) + if (df_regs_ever_live_p (regno) && !is_emitted (regno)) + break; + current_frame_info.n_local_regs = regno - LOC_REG (0) + 1; + + /* For functions marked with the syscall_linkage attribute, we must mark + all eight input registers as in use, so that locals aren't visible to + the caller. */ + + if (cfun->machine->n_varargs > 0 + || lookup_attribute ("syscall_linkage", + TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) + current_frame_info.n_input_regs = 8; + else + { + for (regno = IN_REG (7); regno >= IN_REG (0); regno--) + if (df_regs_ever_live_p (regno)) + break; + current_frame_info.n_input_regs = regno - IN_REG (0) + 1; + } + + for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--) + if (df_regs_ever_live_p (regno)) + break; + i = regno - OUT_REG (0) + 1; + + #ifndef PROFILE_HOOK + /* When -p profiling, we need one output register for the mcount argument. + Likewise for -a profiling for the bb_init_func argument. For -ax + profiling, we need two output registers for the two bb_init_trace_func + arguments. */ + if (crtl->profile) + i = MAX (i, 1); + #endif + current_frame_info.n_output_regs = i; + + /* ??? No rotating register support yet. */ + current_frame_info.n_rotate_regs = 0; + + /* Discover which registers need spilling, and how much room that + will take. Begin with floating point and general registers, + which will always wind up on the stack. */ + + for (regno = FR_REG (2); regno <= FR_REG (127); regno++) + if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno)) + { + SET_HARD_REG_BIT (mask, regno); + spill_size += 16; + n_spilled += 1; + spilled_fr_p = 1; + } + + for (regno = GR_REG (1); regno <= GR_REG (31); regno++) + if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno)) + { + SET_HARD_REG_BIT (mask, regno); + spill_size += 8; + n_spilled += 1; + spilled_gr_p = 1; + } + + for (regno = BR_REG (1); regno <= BR_REG (7); regno++) + if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno)) + { + SET_HARD_REG_BIT (mask, regno); + spill_size += 8; + n_spilled += 1; + } + + /* Now come all special registers that might get saved in other + general registers. */ + + if (frame_pointer_needed) + { + current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1); + /* If we did not get a register, then we take LOC79. This is guaranteed + to be free, even if regs_ever_live is already set, because this is + HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs, + as we don't count loc79 above. */ + if (current_frame_info.r[reg_fp] == 0) + { + current_frame_info.r[reg_fp] = LOC_REG (79); + current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1; + } + } + + if (! crtl->is_leaf) + { + /* Emit a save of BR0 if we call other functions. Do this even + if this function doesn't return, as EH depends on this to be + able to unwind the stack. */ + SET_HARD_REG_BIT (mask, BR_REG (0)); + + current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1); + if (current_frame_info.r[reg_save_b0] == 0) + { + extra_spill_size += 8; + n_spilled += 1; + } + + /* Similarly for ar.pfs. */ + SET_HARD_REG_BIT (mask, AR_PFS_REGNUM); + current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1); + if (current_frame_info.r[reg_save_ar_pfs] == 0) + { + extra_spill_size += 8; + n_spilled += 1; + } + + /* Similarly for gp. Note that if we're calling setjmp, the stacked + registers are clobbered, so we fall back to the stack. */ + current_frame_info.r[reg_save_gp] + = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1)); + if (current_frame_info.r[reg_save_gp] == 0) + { + SET_HARD_REG_BIT (mask, GR_REG (1)); + spill_size += 8; + n_spilled += 1; + } + } + else + { + if (df_regs_ever_live_p (BR_REG (0)) + && ! call_used_or_fixed_reg_p (BR_REG (0))) + { + SET_HARD_REG_BIT (mask, BR_REG (0)); + extra_spill_size += 8; + n_spilled += 1; + } + + if (df_regs_ever_live_p (AR_PFS_REGNUM)) + { + SET_HARD_REG_BIT (mask, AR_PFS_REGNUM); + current_frame_info.r[reg_save_ar_pfs] + = find_gr_spill (reg_save_ar_pfs, 1); + if (current_frame_info.r[reg_save_ar_pfs] == 0) + { + extra_spill_size += 8; + n_spilled += 1; + } + } + } + + /* Unwind descriptor hackery: things are most efficient if we allocate + consecutive GR save registers for RP, PFS, FP in that order. However, + it is absolutely critical that FP get the only hard register that's + guaranteed to be free, so we allocated it first. If all three did + happen to be allocated hard regs, and are consecutive, rearrange them + into the preferred order now. + + If we have already emitted code for any of those registers, + then it's already too late to change. */ + min_regno = MIN (current_frame_info.r[reg_fp], + MIN (current_frame_info.r[reg_save_b0], + current_frame_info.r[reg_save_ar_pfs])); + max_regno = MAX (current_frame_info.r[reg_fp], + MAX (current_frame_info.r[reg_save_b0], + current_frame_info.r[reg_save_ar_pfs])); + if (min_regno > 0 + && min_regno + 2 == max_regno + && (current_frame_info.r[reg_fp] == min_regno + 1 + || current_frame_info.r[reg_save_b0] == min_regno + 1 + || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1) + && (emitted_frame_related_regs[reg_save_b0] == 0 + || emitted_frame_related_regs[reg_save_b0] == min_regno) + && (emitted_frame_related_regs[reg_save_ar_pfs] == 0 + || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1) + && (emitted_frame_related_regs[reg_fp] == 0 + || emitted_frame_related_regs[reg_fp] == min_regno + 2)) + { + current_frame_info.r[reg_save_b0] = min_regno; + current_frame_info.r[reg_save_ar_pfs] = min_regno + 1; + current_frame_info.r[reg_fp] = min_regno + 2; + } + + /* See if we need to store the predicate register block. */ + for (regno = PR_REG (0); regno <= PR_REG (63); regno++) + if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno)) + break; + if (regno <= PR_REG (63)) + { + SET_HARD_REG_BIT (mask, PR_REG (0)); + current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1); + if (current_frame_info.r[reg_save_pr] == 0) + { + extra_spill_size += 8; + n_spilled += 1; + } + + /* ??? Mark them all as used so that register renaming and such + are free to use them. */ + for (regno = PR_REG (0); regno <= PR_REG (63); regno++) + df_set_regs_ever_live (regno, true); + } + + /* If we're forced to use st8.spill, we're forced to save and restore + ar.unat as well. The check for existing liveness allows inline asm + to touch ar.unat. */ + if (spilled_gr_p || cfun->machine->n_varargs + || df_regs_ever_live_p (AR_UNAT_REGNUM)) + { + df_set_regs_ever_live (AR_UNAT_REGNUM, true); + SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM); + current_frame_info.r[reg_save_ar_unat] + = find_gr_spill (reg_save_ar_unat, spill_size == 0); + if (current_frame_info.r[reg_save_ar_unat] == 0) + { + extra_spill_size += 8; + n_spilled += 1; + } + } + + if (df_regs_ever_live_p (AR_LC_REGNUM)) + { + SET_HARD_REG_BIT (mask, AR_LC_REGNUM); + current_frame_info.r[reg_save_ar_lc] + = find_gr_spill (reg_save_ar_lc, spill_size == 0); + if (current_frame_info.r[reg_save_ar_lc] == 0) + { + extra_spill_size += 8; + n_spilled += 1; + } + } + + /* If we have an odd number of words of pretend arguments written to + the stack, then the FR save area will be unaligned. We round the + size of this area up to keep things 16 byte aligned. */ + if (spilled_fr_p) + pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size); + else + pretend_args_size = crtl->args.pretend_args_size; + ++ if (FRAME_GROWS_DOWNWARD) ++ size = IA64_STACK_ALIGN (size); ++ + total_size = (spill_size + extra_spill_size + size + pretend_args_size + + crtl->outgoing_args_size); + total_size = IA64_STACK_ALIGN (total_size); + + /* We always use the 16-byte scratch area provided by the caller, but + if we are a leaf function, there's no one to which we need to provide + a scratch area. However, if the function allocates dynamic stack space, + the dynamic offset is computed early and contains STACK_POINTER_OFFSET, + so we need to cope. */ + if (crtl->is_leaf && !cfun->calls_alloca) + total_size = MAX (0, total_size - 16); + + current_frame_info.total_size = total_size; + current_frame_info.spill_cfa_off = pretend_args_size - 16; + current_frame_info.spill_size = spill_size; + current_frame_info.extra_spill_size = extra_spill_size; + current_frame_info.mask = mask; + current_frame_info.n_spilled = n_spilled; + current_frame_info.initialized = reload_completed; + } + + /* Worker function for TARGET_CAN_ELIMINATE. */ + + bool + ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) + { + return (to == BR_REG (0) ? crtl->is_leaf : true); + } + + /* Compute the initial difference between the specified pair of registers. */ + + HOST_WIDE_INT + ia64_initial_elimination_offset (int from, int to) + { - HOST_WIDE_INT offset; ++ HOST_WIDE_INT offset, size = get_frame_size (); + - ia64_compute_frame_size (get_frame_size ()); ++ ia64_compute_frame_size (size); + switch (from) + { + case FRAME_POINTER_REGNUM: - switch (to) - { - case HARD_FRAME_POINTER_REGNUM: - offset = -current_frame_info.total_size; - if (!crtl->is_leaf || cfun->calls_alloca) - offset += 16 + crtl->outgoing_args_size; - break; - - case STACK_POINTER_REGNUM: - offset = 0; - if (!crtl->is_leaf || cfun->calls_alloca) - offset += 16 + crtl->outgoing_args_size; - break; - - default: - gcc_unreachable (); - } ++ offset = FRAME_GROWS_DOWNWARD ? IA64_STACK_ALIGN (size) : 0; ++ if (!crtl->is_leaf || cfun->calls_alloca) ++ offset += 16 + crtl->outgoing_args_size; ++ if (to == HARD_FRAME_POINTER_REGNUM) ++ offset -= current_frame_info.total_size; ++ else ++ gcc_assert (to == STACK_POINTER_REGNUM); + break; + + case ARG_POINTER_REGNUM: + /* Arguments start above the 16 byte save area, unless stdarg + in which case we store through the 16 byte save area. */ + switch (to) + { + case HARD_FRAME_POINTER_REGNUM: + offset = 16 - crtl->args.pretend_args_size; + break; + + case STACK_POINTER_REGNUM: + offset = (current_frame_info.total_size + + 16 - crtl->args.pretend_args_size); + break; + + default: + gcc_unreachable (); + } + break; + + default: + gcc_unreachable (); + } + + return offset; + } + + /* If there are more than a trivial number of register spills, we use + two interleaved iterators so that we can get two memory references + per insn group. + + In order to simplify things in the prologue and epilogue expanders, + we use helper functions to fix up the memory references after the + fact with the appropriate offsets to a POST_MODIFY memory mode. + The following data structure tracks the state of the two iterators + while insns are being emitted. */ + + struct spill_fill_data + { + rtx_insn *init_after; /* point at which to emit initializations */ + rtx init_reg[2]; /* initial base register */ + rtx iter_reg[2]; /* the iterator registers */ + rtx *prev_addr[2]; /* address of last memory use */ + rtx_insn *prev_insn[2]; /* the insn corresponding to prev_addr */ + HOST_WIDE_INT prev_off[2]; /* last offset */ + int n_iter; /* number of iterators in use */ + int next_iter; /* next iterator to use */ + unsigned int save_gr_used_mask; + }; + + static struct spill_fill_data spill_fill_data; + + static void + setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off) + { + int i; + + spill_fill_data.init_after = get_last_insn (); + spill_fill_data.init_reg[0] = init_reg; + spill_fill_data.init_reg[1] = init_reg; + spill_fill_data.prev_addr[0] = NULL; + spill_fill_data.prev_addr[1] = NULL; + spill_fill_data.prev_insn[0] = NULL; + spill_fill_data.prev_insn[1] = NULL; + spill_fill_data.prev_off[0] = cfa_off; + spill_fill_data.prev_off[1] = cfa_off; + spill_fill_data.next_iter = 0; + spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask; + + spill_fill_data.n_iter = 1 + (n_spills > 2); + for (i = 0; i < spill_fill_data.n_iter; ++i) + { + int regno = next_scratch_gr_reg (); + spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno); + current_frame_info.gr_used_mask |= 1 << regno; + } + } + + static void + finish_spill_pointers (void) + { + current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask; + } + + static rtx + spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off) + { + int iter = spill_fill_data.next_iter; + HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off; + rtx disp_rtx = GEN_INT (disp); + rtx mem; + + if (spill_fill_data.prev_addr[iter]) + { + if (satisfies_constraint_N (disp_rtx)) + { + *spill_fill_data.prev_addr[iter] + = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter], + gen_rtx_PLUS (DImode, + spill_fill_data.iter_reg[iter], + disp_rtx)); + add_reg_note (spill_fill_data.prev_insn[iter], + REG_INC, spill_fill_data.iter_reg[iter]); + } + else + { + /* ??? Could use register post_modify for loads. */ + if (!satisfies_constraint_I (disp_rtx)) + { + rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ()); + emit_move_insn (tmp, disp_rtx); + disp_rtx = tmp; + } + emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter], + spill_fill_data.iter_reg[iter], disp_rtx)); + } + } + /* Micro-optimization: if we've created a frame pointer, it's at + CFA 0, which may allow the real iterator to be initialized lower, + slightly increasing parallelism. Also, if there are few saves + it may eliminate the iterator entirely. */ + else if (disp == 0 + && spill_fill_data.init_reg[iter] == stack_pointer_rtx + && frame_pointer_needed) + { + mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx); + set_mem_alias_set (mem, get_varargs_alias_set ()); + return mem; + } + else + { + rtx seq; + rtx_insn *insn; + + if (disp == 0) + seq = gen_movdi (spill_fill_data.iter_reg[iter], + spill_fill_data.init_reg[iter]); + else + { + start_sequence (); + + if (!satisfies_constraint_I (disp_rtx)) + { + rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ()); + emit_move_insn (tmp, disp_rtx); + disp_rtx = tmp; + } + + emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter], + spill_fill_data.init_reg[iter], + disp_rtx)); + + seq = get_insns (); + end_sequence (); + } + + /* Careful for being the first insn in a sequence. */ + if (spill_fill_data.init_after) + insn = emit_insn_after (seq, spill_fill_data.init_after); + else + { + rtx_insn *first = get_insns (); + if (first) + insn = emit_insn_before (seq, first); + else + insn = emit_insn (seq); + } + spill_fill_data.init_after = insn; + } + + mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]); + + /* ??? Not all of the spills are for varargs, but some of them are. + The rest of the spills belong in an alias set of their own. But + it doesn't actually hurt to include them here. */ + set_mem_alias_set (mem, get_varargs_alias_set ()); + + spill_fill_data.prev_addr[iter] = &XEXP (mem, 0); + spill_fill_data.prev_off[iter] = cfa_off; + + if (++iter >= spill_fill_data.n_iter) + iter = 0; + spill_fill_data.next_iter = iter; + + return mem; + } + + static void + do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off, + rtx frame_reg) + { + int iter = spill_fill_data.next_iter; + rtx mem; + rtx_insn *insn; + + mem = spill_restore_mem (reg, cfa_off); + insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off))); + spill_fill_data.prev_insn[iter] = insn; + + if (frame_reg) + { + rtx base; + HOST_WIDE_INT off; + + RTX_FRAME_RELATED_P (insn) = 1; + + /* Don't even pretend that the unwind code can intuit its way + through a pair of interleaved post_modify iterators. Just + provide the correct answer. */ + + if (frame_pointer_needed) + { + base = hard_frame_pointer_rtx; + off = - cfa_off; + } + else + { + base = stack_pointer_rtx; + off = current_frame_info.total_size - cfa_off; + } + + add_reg_note (insn, REG_CFA_OFFSET, + gen_rtx_SET (gen_rtx_MEM (GET_MODE (reg), + plus_constant (Pmode, + base, off)), + frame_reg)); + } + } + + static void + do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off) + { + int iter = spill_fill_data.next_iter; + rtx_insn *insn; + + insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off), + GEN_INT (cfa_off))); + spill_fill_data.prev_insn[iter] = insn; + } + + /* Wrapper functions that discards the CONST_INT spill offset. These + exist so that we can give gr_spill/gr_fill the offset they need and + use a consistent function interface. */ + + static rtx + gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED) + { + return gen_movdi (dest, src); + } + + static rtx + gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED) + { + return gen_fr_spill (dest, src); + } + + static rtx + gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED) + { + return gen_fr_restore (dest, src); + } + + #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) + + /* See Table 6.2 of the IA-64 Software Developer Manual, Volume 2. */ + #define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0) + + /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, + inclusive. These are offsets from the current stack pointer. BS_SIZE + is the size of the backing store. ??? This clobbers r2 and r3. */ + + static void + ia64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size, + int bs_size) + { + rtx r2 = gen_rtx_REG (Pmode, GR_REG (2)); + rtx r3 = gen_rtx_REG (Pmode, GR_REG (3)); + rtx p6 = gen_rtx_REG (BImode, PR_REG (6)); + + /* On the IA-64 there is a second stack in memory, namely the Backing Store + of the Register Stack Engine. We also need to probe it after checking + that the 2 stacks don't overlap. */ + emit_insn (gen_bsp_value (r3)); + emit_move_insn (r2, GEN_INT (-(first + size))); + + /* Compare current value of BSP and SP registers. */ + emit_insn (gen_rtx_SET (p6, gen_rtx_fmt_ee (LTU, BImode, + r3, stack_pointer_rtx))); + + /* Compute the address of the probe for the Backing Store (which grows + towards higher addresses). We probe only at the first offset of + the next page because some OS (eg Linux/ia64) only extend the + backing store when this specific address is hit (but generate a SEGV + on other address). Page size is the worst case (4KB). The reserve + size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough. + Also compute the address of the last probe for the memory stack + (which grows towards lower addresses). */ + emit_insn (gen_rtx_SET (r3, plus_constant (Pmode, r3, 4095))); + emit_insn (gen_rtx_SET (r2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2))); + + /* Compare them and raise SEGV if the former has topped the latter. */ + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx), + gen_rtx_SET (p6, gen_rtx_fmt_ee (GEU, BImode, + r3, r2)))); + emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, r3, GEN_INT (12), + const0_rtx), + const0_rtx)); + emit_insn (gen_rtx_COND_EXEC (VOIDmode, + gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx), + gen_rtx_TRAP_IF (VOIDmode, const1_rtx, + GEN_INT (11)))); + + /* Probe the Backing Store if necessary. */ + if (bs_size > 0) + emit_stack_probe (r3); + + /* Probe the memory stack if necessary. */ + if (size == 0) + ; + + /* See if we have a constant small number of probes to generate. If so, + that's the easy case. */ + else if (size <= PROBE_INTERVAL) + emit_stack_probe (r2); + + /* The run-time loop is made up of 9 insns in the generic case while this + compile-time loop is made up of 5+2*(n-2) insns for n # of intervals. */ + else if (size <= 4 * PROBE_INTERVAL) + { + HOST_WIDE_INT i; + + emit_move_insn (r2, GEN_INT (-(first + PROBE_INTERVAL))); + emit_insn (gen_rtx_SET (r2, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2))); + emit_stack_probe (r2); + + /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until + it exceeds SIZE. If only two probes are needed, this will not + generate any code. Then probe at FIRST + SIZE. */ + for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL) + { + emit_insn (gen_rtx_SET (r2, + plus_constant (Pmode, r2, -PROBE_INTERVAL))); + emit_stack_probe (r2); + } + + emit_insn (gen_rtx_SET (r2, + plus_constant (Pmode, r2, + (i - PROBE_INTERVAL) - size))); + emit_stack_probe (r2); + } + + /* Otherwise, do the same as above, but in a loop. Note that we must be + extra careful with variables wrapping around because we might be at + the very top (or the very bottom) of the address space and we have + to be able to handle this case properly; in particular, we use an + equality test for the loop condition. */ + else + { + HOST_WIDE_INT rounded_size; + + emit_move_insn (r2, GEN_INT (-first)); + + + /* Step 1: round SIZE to the previous multiple of the interval. */ + + rounded_size = size & -PROBE_INTERVAL; + + + /* Step 2: compute initial and final value of the loop counter. */ + + /* TEST_ADDR = SP + FIRST. */ + emit_insn (gen_rtx_SET (r2, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2))); + + /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */ + if (rounded_size > (1 << 21)) + { + emit_move_insn (r3, GEN_INT (-rounded_size)); + emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2, r3))); + } + else + emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2, + GEN_INT (-rounded_size)))); + + + /* Step 3: the loop + + do + { + TEST_ADDR = TEST_ADDR + PROBE_INTERVAL + probe at TEST_ADDR + } + while (TEST_ADDR != LAST_ADDR) + + probes at FIRST + N * PROBE_INTERVAL for values of N from 1 + until it is equal to ROUNDED_SIZE. */ + + emit_insn (gen_probe_stack_range (r2, r2, r3)); + + + /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time + that SIZE is equal to ROUNDED_SIZE. */ + + /* TEMP = SIZE - ROUNDED_SIZE. */ + if (size != rounded_size) + { + emit_insn (gen_rtx_SET (r2, plus_constant (Pmode, r2, + rounded_size - size))); + emit_stack_probe (r2); + } + } + + /* Make sure nothing is scheduled before we are done. */ + emit_insn (gen_blockage ()); + } + + /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are + absolute addresses. */ + + const char * + output_probe_stack_range (rtx reg1, rtx reg2) + { + static int labelno = 0; + char loop_lab[32]; + rtx xops[3]; + + ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); + + /* Loop. */ + ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); + + /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ + xops[0] = reg1; + xops[1] = GEN_INT (-PROBE_INTERVAL); + output_asm_insn ("addl %0 = %1, %0", xops); + fputs ("\t;;\n", asm_out_file); + + /* Probe at TEST_ADDR. */ + output_asm_insn ("probe.w.fault %0, 0", xops); + + /* Test if TEST_ADDR == LAST_ADDR. */ + xops[1] = reg2; + xops[2] = gen_rtx_REG (BImode, PR_REG (6)); + output_asm_insn ("cmp.eq %2, %I2 = %0, %1", xops); + + /* Branch. */ + fprintf (asm_out_file, "\t(%s) br.cond.dpnt ", reg_names [PR_REG (7)]); + assemble_name_raw (asm_out_file, loop_lab); + fputc ('\n', asm_out_file); + + return ""; + } + + /* Called after register allocation to add any instructions needed for the + prologue. Using a prologue insn is favored compared to putting all of the + instructions in output_function_prologue(), since it allows the scheduler + to intermix instructions with the saves of the caller saved registers. In + some cases, it might be necessary to emit a barrier instruction as the last + insn to prevent such scheduling. + + Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1 + so that the debug info generation code can handle them properly. + + The register save area is laid out like so: + cfa+16 + [ varargs spill area ] + [ fr register spill area ] + [ br register spill area ] + [ ar register spill area ] + [ pr register spill area ] + [ gr register spill area ] */ + + /* ??? Get inefficient code when the frame size is larger than can fit in an + adds instruction. */ + + void + ia64_expand_prologue (void) + { + rtx_insn *insn; + rtx ar_pfs_save_reg, ar_unat_save_reg; + int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs; + rtx reg, alt_reg; + + ia64_compute_frame_size (get_frame_size ()); + last_scratch_gr_reg = 15; + + if (flag_stack_usage_info) + current_function_static_stack_size = current_frame_info.total_size; + + if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK + || flag_stack_clash_protection) + { + HOST_WIDE_INT size = current_frame_info.total_size; + int bs_size = BACKING_STORE_SIZE (current_frame_info.n_input_regs + + current_frame_info.n_local_regs); + + if (crtl->is_leaf && !cfun->calls_alloca) + { + if (size > PROBE_INTERVAL && size > get_stack_check_protect ()) + ia64_emit_probe_stack_range (get_stack_check_protect (), + size - get_stack_check_protect (), + bs_size); + else if (size + bs_size > get_stack_check_protect ()) + ia64_emit_probe_stack_range (get_stack_check_protect (), + 0, bs_size); + } + else if (size + bs_size > 0) + ia64_emit_probe_stack_range (get_stack_check_protect (), size, bs_size); + } + + if (dump_file) + { + fprintf (dump_file, "ia64 frame related registers " + "recorded in current_frame_info.r[]:\n"); + #define PRINTREG(a) if (current_frame_info.r[a]) \ + fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a]) + PRINTREG(reg_fp); + PRINTREG(reg_save_b0); + PRINTREG(reg_save_pr); + PRINTREG(reg_save_ar_pfs); + PRINTREG(reg_save_ar_unat); + PRINTREG(reg_save_ar_lc); + PRINTREG(reg_save_gp); + #undef PRINTREG + } + + /* If there is no epilogue, then we don't need some prologue insns. + We need to avoid emitting the dead prologue insns, because flow + will complain about them. */ + if (optimize) + { + edge e; + edge_iterator ei; + + FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) + if ((e->flags & EDGE_FAKE) == 0 + && (e->flags & EDGE_FALLTHRU) != 0) + break; + epilogue_p = (e != NULL); + } + else + epilogue_p = 1; + + /* Set the local, input, and output register names. We need to do this + for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in + half. If we use in/loc/out register names, then we get assembler errors + in crtn.S because there is no alloc insn or regstk directive in there. */ + if (! TARGET_REG_NAMES) + { + int inputs = current_frame_info.n_input_regs; + int locals = current_frame_info.n_local_regs; + int outputs = current_frame_info.n_output_regs; + + for (i = 0; i < inputs; i++) + reg_names[IN_REG (i)] = ia64_reg_numbers[i]; + for (i = 0; i < locals; i++) + reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i]; + for (i = 0; i < outputs; i++) + reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i]; + } + + /* Set the frame pointer register name. The regnum is logically loc79, + but of course we'll not have allocated that many locals. Rather than + worrying about renumbering the existing rtxs, we adjust the name. */ + /* ??? This code means that we can never use one local register when + there is a frame pointer. loc79 gets wasted in this case, as it is + renamed to a register that will never be used. See also the try_locals + code in find_gr_spill. */ + if (current_frame_info.r[reg_fp]) + { + const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM]; + reg_names[HARD_FRAME_POINTER_REGNUM] + = reg_names[current_frame_info.r[reg_fp]]; + reg_names[current_frame_info.r[reg_fp]] = tmp; + } + + /* We don't need an alloc instruction if we've used no outputs or locals. */ + if (current_frame_info.n_local_regs == 0 + && current_frame_info.n_output_regs == 0 + && current_frame_info.n_input_regs <= crtl->args.info.int_regs + && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)) + { + /* If there is no alloc, but there are input registers used, then we + need a .regstk directive. */ + current_frame_info.need_regstk = (TARGET_REG_NAMES != 0); + ar_pfs_save_reg = NULL_RTX; + } + else + { + current_frame_info.need_regstk = 0; + + if (current_frame_info.r[reg_save_ar_pfs]) + { + regno = current_frame_info.r[reg_save_ar_pfs]; + reg_emitted (reg_save_ar_pfs); + } + else + regno = next_scratch_gr_reg (); + ar_pfs_save_reg = gen_rtx_REG (DImode, regno); + + insn = emit_insn (gen_alloc (ar_pfs_save_reg, + GEN_INT (current_frame_info.n_input_regs), + GEN_INT (current_frame_info.n_local_regs), + GEN_INT (current_frame_info.n_output_regs), + GEN_INT (current_frame_info.n_rotate_regs))); + if (current_frame_info.r[reg_save_ar_pfs]) + { + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_REGISTER, + gen_rtx_SET (ar_pfs_save_reg, + gen_rtx_REG (DImode, AR_PFS_REGNUM))); + } + } + + /* Set up frame pointer, stack pointer, and spill iterators. */ + + n_varargs = cfun->machine->n_varargs; + setup_spill_pointers (current_frame_info.n_spilled + n_varargs, + stack_pointer_rtx, 0); + + if (frame_pointer_needed) + { + insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + + /* Force the unwind info to recognize this as defining a new CFA, + rather than some temp register setup. */ + add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX); + } + + if (current_frame_info.total_size != 0) + { + rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size); + rtx offset; + + if (satisfies_constraint_I (frame_size_rtx)) + offset = frame_size_rtx; + else + { + regno = next_scratch_gr_reg (); + offset = gen_rtx_REG (DImode, regno); + emit_move_insn (offset, frame_size_rtx); + } + + insn = emit_insn (gen_adddi3 (stack_pointer_rtx, + stack_pointer_rtx, offset)); + + if (! frame_pointer_needed) + { + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_ADJUST_CFA, + gen_rtx_SET (stack_pointer_rtx, + gen_rtx_PLUS (DImode, + stack_pointer_rtx, + frame_size_rtx))); + } + + /* ??? At this point we must generate a magic insn that appears to + modify the stack pointer, the frame pointer, and all spill + iterators. This would allow the most scheduling freedom. For + now, just hard stop. */ + emit_insn (gen_blockage ()); + } + + /* Must copy out ar.unat before doing any integer spills. */ + if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)) + { + if (current_frame_info.r[reg_save_ar_unat]) + { + ar_unat_save_reg + = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]); + reg_emitted (reg_save_ar_unat); + } + else + { + alt_regno = next_scratch_gr_reg (); + ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno); + current_frame_info.gr_used_mask |= 1 << alt_regno; + } + + reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM); + insn = emit_move_insn (ar_unat_save_reg, reg); + if (current_frame_info.r[reg_save_ar_unat]) + { + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX); + } + + /* Even if we're not going to generate an epilogue, we still + need to save the register so that EH works. */ + if (! epilogue_p && current_frame_info.r[reg_save_ar_unat]) + emit_insn (gen_prologue_use (ar_unat_save_reg)); + } + else + ar_unat_save_reg = NULL_RTX; + + /* Spill all varargs registers. Do this before spilling any GR registers, + since we want the UNAT bits for the GR registers to override the UNAT + bits from varargs, which we don't care about. */ + + cfa_off = -16; + for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno) + { + reg = gen_rtx_REG (DImode, regno); + do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX); + } + + /* Locate the bottom of the register save area. */ + cfa_off = (current_frame_info.spill_cfa_off + + current_frame_info.spill_size + + current_frame_info.extra_spill_size); + + /* Save the predicate register block either in a register or in memory. */ + if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0))) + { + reg = gen_rtx_REG (DImode, PR_REG (0)); + if (current_frame_info.r[reg_save_pr] != 0) + { + alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]); + reg_emitted (reg_save_pr); + insn = emit_move_insn (alt_reg, reg); + + /* ??? Denote pr spill/fill by a DImode move that modifies all + 64 hard registers. */ + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX); + + /* Even if we're not going to generate an epilogue, we still + need to save the register so that EH works. */ + if (! epilogue_p) + emit_insn (gen_prologue_use (alt_reg)); + } + else + { + alt_regno = next_scratch_gr_reg (); + alt_reg = gen_rtx_REG (DImode, alt_regno); + insn = emit_move_insn (alt_reg, reg); + do_spill (gen_movdi_x, alt_reg, cfa_off, reg); + cfa_off -= 8; + } + } + + /* Handle AR regs in numerical order. All of them get special handling. */ + if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM) + && current_frame_info.r[reg_save_ar_unat] == 0) + { + reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM); + do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg); + cfa_off -= 8; + } + + /* The alloc insn already copied ar.pfs into a general register. The + only thing we have to do now is copy that register to a stack slot + if we'd not allocated a local register for the job. */ + if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM) + && current_frame_info.r[reg_save_ar_pfs] == 0) + { + reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); + do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg); + cfa_off -= 8; + } + + if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM)) + { + reg = gen_rtx_REG (DImode, AR_LC_REGNUM); + if (current_frame_info.r[reg_save_ar_lc] != 0) + { + alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]); + reg_emitted (reg_save_ar_lc); + insn = emit_move_insn (alt_reg, reg); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX); + + /* Even if we're not going to generate an epilogue, we still + need to save the register so that EH works. */ + if (! epilogue_p) + emit_insn (gen_prologue_use (alt_reg)); + } + else + { + alt_regno = next_scratch_gr_reg (); + alt_reg = gen_rtx_REG (DImode, alt_regno); + emit_move_insn (alt_reg, reg); + do_spill (gen_movdi_x, alt_reg, cfa_off, reg); + cfa_off -= 8; + } + } + + /* Save the return pointer. */ + if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0))) + { + reg = gen_rtx_REG (DImode, BR_REG (0)); + if (current_frame_info.r[reg_save_b0] != 0) + { + alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]); + reg_emitted (reg_save_b0); + insn = emit_move_insn (alt_reg, reg); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (alt_reg, pc_rtx)); + + /* Even if we're not going to generate an epilogue, we still + need to save the register so that EH works. */ + if (! epilogue_p) + emit_insn (gen_prologue_use (alt_reg)); + } + else + { + alt_regno = next_scratch_gr_reg (); + alt_reg = gen_rtx_REG (DImode, alt_regno); + emit_move_insn (alt_reg, reg); + do_spill (gen_movdi_x, alt_reg, cfa_off, reg); + cfa_off -= 8; + } + } + + if (current_frame_info.r[reg_save_gp]) + { + reg_emitted (reg_save_gp); + insn = emit_move_insn (gen_rtx_REG (DImode, + current_frame_info.r[reg_save_gp]), + pic_offset_table_rtx); + } + + /* We should now be at the base of the gr/br/fr spill area. */ + gcc_assert (cfa_off == (current_frame_info.spill_cfa_off + + current_frame_info.spill_size)); + + /* Spill all general registers. */ + for (regno = GR_REG (1); regno <= GR_REG (31); ++regno) + if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) + { + reg = gen_rtx_REG (DImode, regno); + do_spill (gen_gr_spill, reg, cfa_off, reg); + cfa_off -= 8; + } + + /* Spill the rest of the BR registers. */ + for (regno = BR_REG (1); regno <= BR_REG (7); ++regno) + if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) + { + alt_regno = next_scratch_gr_reg (); + alt_reg = gen_rtx_REG (DImode, alt_regno); + reg = gen_rtx_REG (DImode, regno); + emit_move_insn (alt_reg, reg); + do_spill (gen_movdi_x, alt_reg, cfa_off, reg); + cfa_off -= 8; + } + + /* Align the frame and spill all FR registers. */ + for (regno = FR_REG (2); regno <= FR_REG (127); ++regno) + if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) + { + gcc_assert (!(cfa_off & 15)); + reg = gen_rtx_REG (XFmode, regno); + do_spill (gen_fr_spill_x, reg, cfa_off, reg); + cfa_off -= 16; + } + + gcc_assert (cfa_off == current_frame_info.spill_cfa_off); + + finish_spill_pointers (); + } + + /* Output the textual info surrounding the prologue. */ + + void + ia64_start_function (FILE *file, const char *fnname, + tree decl ATTRIBUTE_UNUSED) + { + #if TARGET_ABI_OPEN_VMS + vms_start_function (fnname); + #endif + + fputs ("\t.proc ", file); + assemble_name (file, fnname); + fputc ('\n', file); + ASM_OUTPUT_LABEL (file, fnname); + } + + /* Called after register allocation to add any instructions needed for the + epilogue. Using an epilogue insn is favored compared to putting all of the + instructions in output_function_prologue(), since it allows the scheduler + to intermix instructions with the saves of the caller saved registers. In + some cases, it might be necessary to emit a barrier instruction as the last + insn to prevent such scheduling. */ + + void + ia64_expand_epilogue (int sibcall_p) + { + rtx_insn *insn; + rtx reg, alt_reg, ar_unat_save_reg; + int regno, alt_regno, cfa_off; + + ia64_compute_frame_size (get_frame_size ()); + + /* If there is a frame pointer, then we use it instead of the stack + pointer, so that the stack pointer does not need to be valid when + the epilogue starts. See EXIT_IGNORE_STACK. */ + if (frame_pointer_needed) + setup_spill_pointers (current_frame_info.n_spilled, + hard_frame_pointer_rtx, 0); + else + setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx, + current_frame_info.total_size); + + if (current_frame_info.total_size != 0) + { + /* ??? At this point we must generate a magic insn that appears to + modify the spill iterators and the frame pointer. This would + allow the most scheduling freedom. For now, just hard stop. */ + emit_insn (gen_blockage ()); + } + + /* Locate the bottom of the register save area. */ + cfa_off = (current_frame_info.spill_cfa_off + + current_frame_info.spill_size + + current_frame_info.extra_spill_size); + + /* Restore the predicate registers. */ + if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0))) + { + if (current_frame_info.r[reg_save_pr] != 0) + { + alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]); + reg_emitted (reg_save_pr); + } + else + { + alt_regno = next_scratch_gr_reg (); + alt_reg = gen_rtx_REG (DImode, alt_regno); + do_restore (gen_movdi_x, alt_reg, cfa_off); + cfa_off -= 8; + } + reg = gen_rtx_REG (DImode, PR_REG (0)); + emit_move_insn (reg, alt_reg); + } + + /* Restore the application registers. */ + + /* Load the saved unat from the stack, but do not restore it until + after the GRs have been restored. */ + if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)) + { + if (current_frame_info.r[reg_save_ar_unat] != 0) + { + ar_unat_save_reg + = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]); + reg_emitted (reg_save_ar_unat); + } + else + { + alt_regno = next_scratch_gr_reg (); + ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno); + current_frame_info.gr_used_mask |= 1 << alt_regno; + do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off); + cfa_off -= 8; + } + } + else + ar_unat_save_reg = NULL_RTX; + + if (current_frame_info.r[reg_save_ar_pfs] != 0) + { + reg_emitted (reg_save_ar_pfs); + alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]); + reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); + emit_move_insn (reg, alt_reg); + } + else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)) + { + alt_regno = next_scratch_gr_reg (); + alt_reg = gen_rtx_REG (DImode, alt_regno); + do_restore (gen_movdi_x, alt_reg, cfa_off); + cfa_off -= 8; + reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); + emit_move_insn (reg, alt_reg); + } + + if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM)) + { + if (current_frame_info.r[reg_save_ar_lc] != 0) + { + alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]); + reg_emitted (reg_save_ar_lc); + } + else + { + alt_regno = next_scratch_gr_reg (); + alt_reg = gen_rtx_REG (DImode, alt_regno); + do_restore (gen_movdi_x, alt_reg, cfa_off); + cfa_off -= 8; + } + reg = gen_rtx_REG (DImode, AR_LC_REGNUM); + emit_move_insn (reg, alt_reg); + } + + /* Restore the return pointer. */ + if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0))) + { + if (current_frame_info.r[reg_save_b0] != 0) + { + alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]); + reg_emitted (reg_save_b0); + } + else + { + alt_regno = next_scratch_gr_reg (); + alt_reg = gen_rtx_REG (DImode, alt_regno); + do_restore (gen_movdi_x, alt_reg, cfa_off); + cfa_off -= 8; + } + reg = gen_rtx_REG (DImode, BR_REG (0)); + emit_move_insn (reg, alt_reg); + } + + /* We should now be at the base of the gr/br/fr spill area. */ + gcc_assert (cfa_off == (current_frame_info.spill_cfa_off + + current_frame_info.spill_size)); + + /* The GP may be stored on the stack in the prologue, but it's + never restored in the epilogue. Skip the stack slot. */ + if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1))) + cfa_off -= 8; + + /* Restore all general registers. */ + for (regno = GR_REG (2); regno <= GR_REG (31); ++regno) + if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) + { + reg = gen_rtx_REG (DImode, regno); + do_restore (gen_gr_restore, reg, cfa_off); + cfa_off -= 8; + } + + /* Restore the branch registers. */ + for (regno = BR_REG (1); regno <= BR_REG (7); ++regno) + if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) + { + alt_regno = next_scratch_gr_reg (); + alt_reg = gen_rtx_REG (DImode, alt_regno); + do_restore (gen_movdi_x, alt_reg, cfa_off); + cfa_off -= 8; + reg = gen_rtx_REG (DImode, regno); + emit_move_insn (reg, alt_reg); + } + + /* Restore floating point registers. */ + for (regno = FR_REG (2); regno <= FR_REG (127); ++regno) + if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) + { + gcc_assert (!(cfa_off & 15)); + reg = gen_rtx_REG (XFmode, regno); + do_restore (gen_fr_restore_x, reg, cfa_off); + cfa_off -= 16; + } + + /* Restore ar.unat for real. */ + if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)) + { + reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM); + emit_move_insn (reg, ar_unat_save_reg); + } + + gcc_assert (cfa_off == current_frame_info.spill_cfa_off); + + finish_spill_pointers (); + + if (current_frame_info.total_size + || cfun->machine->ia64_eh_epilogue_sp + || frame_pointer_needed) + { + /* ??? At this point we must generate a magic insn that appears to + modify the spill iterators, the stack pointer, and the frame + pointer. This would allow the most scheduling freedom. For now, + just hard stop. */ + emit_insn (gen_blockage ()); + } + + if (cfun->machine->ia64_eh_epilogue_sp) + emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp); + else if (frame_pointer_needed) + { + insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL); + } + else if (current_frame_info.total_size) + { + rtx offset, frame_size_rtx; + + frame_size_rtx = GEN_INT (current_frame_info.total_size); + if (satisfies_constraint_I (frame_size_rtx)) + offset = frame_size_rtx; + else + { + regno = next_scratch_gr_reg (); + offset = gen_rtx_REG (DImode, regno); + emit_move_insn (offset, frame_size_rtx); + } + + insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, + offset)); + + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_ADJUST_CFA, + gen_rtx_SET (stack_pointer_rtx, + gen_rtx_PLUS (DImode, + stack_pointer_rtx, + frame_size_rtx))); + } + + if (cfun->machine->ia64_eh_epilogue_bsp) + emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp)); + + if (! sibcall_p) + emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0)))); + else + { + int fp = GR_REG (2); + /* We need a throw away register here, r0 and r1 are reserved, + so r2 is the first available call clobbered register. If + there was a frame_pointer register, we may have swapped the + names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make + sure we're using the string "r2" when emitting the register + name for the assembler. */ + if (current_frame_info.r[reg_fp] + && current_frame_info.r[reg_fp] == GR_REG (2)) + fp = HARD_FRAME_POINTER_REGNUM; + + /* We must emit an alloc to force the input registers to become output + registers. Otherwise, if the callee tries to pass its parameters + through to another call without an intervening alloc, then these + values get lost. */ + /* ??? We don't need to preserve all input registers. We only need to + preserve those input registers used as arguments to the sibling call. + It is unclear how to compute that number here. */ + if (current_frame_info.n_input_regs != 0) + { + rtx n_inputs = GEN_INT (current_frame_info.n_input_regs); + + insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp), + const0_rtx, const0_rtx, + n_inputs, const0_rtx)); + RTX_FRAME_RELATED_P (insn) = 1; + + /* ??? We need to mark the alloc as frame-related so that it gets + passed into ia64_asm_unwind_emit for ia64-specific unwinding. + But there's nothing dwarf2 related to be done wrt the register + windows. If we do nothing, dwarf2out will abort on the UNSPEC; + the empty parallel means dwarf2out will not see anything. */ + add_reg_note (insn, REG_FRAME_RELATED_EXPR, + gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (0))); + } + } + } + + /* Return 1 if br.ret can do all the work required to return from a + function. */ + + int + ia64_direct_return (void) + { + if (reload_completed && ! frame_pointer_needed) + { + ia64_compute_frame_size (get_frame_size ()); + + return (current_frame_info.total_size == 0 + && current_frame_info.n_spilled == 0 + && current_frame_info.r[reg_save_b0] == 0 + && current_frame_info.r[reg_save_pr] == 0 + && current_frame_info.r[reg_save_ar_pfs] == 0 + && current_frame_info.r[reg_save_ar_unat] == 0 + && current_frame_info.r[reg_save_ar_lc] == 0); + } + return 0; + } + + /* Return the magic cookie that we use to hold the return address + during early compilation. */ + + rtx + ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED) + { + if (count != 0) + return NULL; + return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR); + } + + /* Split this value after reload, now that we know where the return + address is saved. */ + + void + ia64_split_return_addr_rtx (rtx dest) + { + rtx src; + + if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0))) + { + if (current_frame_info.r[reg_save_b0] != 0) + { + src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]); + reg_emitted (reg_save_b0); + } + else + { + HOST_WIDE_INT off; + unsigned int regno; + rtx off_r; + + /* Compute offset from CFA for BR0. */ + /* ??? Must be kept in sync with ia64_expand_prologue. */ + off = (current_frame_info.spill_cfa_off + + current_frame_info.spill_size); + for (regno = GR_REG (1); regno <= GR_REG (31); ++regno) + if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) + off -= 8; + + /* Convert CFA offset to a register based offset. */ + if (frame_pointer_needed) + src = hard_frame_pointer_rtx; + else + { + src = stack_pointer_rtx; + off += current_frame_info.total_size; + } + + /* Load address into scratch register. */ + off_r = GEN_INT (off); + if (satisfies_constraint_I (off_r)) + emit_insn (gen_adddi3 (dest, src, off_r)); + else + { + emit_move_insn (dest, off_r); + emit_insn (gen_adddi3 (dest, src, dest)); + } + + src = gen_rtx_MEM (Pmode, dest); + } + } + else + src = gen_rtx_REG (DImode, BR_REG (0)); + + emit_move_insn (dest, src); + } + + int + ia64_hard_regno_rename_ok (int from, int to) + { + /* Don't clobber any of the registers we reserved for the prologue. */ + unsigned int r; + + for (r = reg_fp; r <= reg_save_ar_lc; r++) + if (to == current_frame_info.r[r] + || from == current_frame_info.r[r] + || to == emitted_frame_related_regs[r] + || from == emitted_frame_related_regs[r]) + return 0; + + /* Don't use output registers outside the register frame. */ + if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs)) + return 0; + + /* Retain even/oddness on predicate register pairs. */ + if (PR_REGNO_P (from) && PR_REGNO_P (to)) + return (from & 1) == (to & 1); + + return 1; + } + + /* Implement TARGET_HARD_REGNO_NREGS. + + ??? We say that BImode PR values require two registers. This allows us to + easily store the normal and inverted values. We use CCImode to indicate + a single predicate register. */ + + static unsigned int + ia64_hard_regno_nregs (unsigned int regno, machine_mode mode) + { + if (regno == PR_REG (0) && mode == DImode) + return 64; + if (PR_REGNO_P (regno) && (mode) == BImode) + return 2; + if ((PR_REGNO_P (regno) || GR_REGNO_P (regno)) && mode == CCImode) + return 1; + if (FR_REGNO_P (regno) && mode == XFmode) + return 1; + if (FR_REGNO_P (regno) && mode == RFmode) + return 1; + if (FR_REGNO_P (regno) && mode == XCmode) + return 2; + return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); + } + + /* Implement TARGET_HARD_REGNO_MODE_OK. */ + + static bool + ia64_hard_regno_mode_ok (unsigned int regno, machine_mode mode) + { + if (FR_REGNO_P (regno)) + return (GET_MODE_CLASS (mode) != MODE_CC + && mode != BImode + && mode != TFmode); + + if (PR_REGNO_P (regno)) + return mode == BImode || GET_MODE_CLASS (mode) == MODE_CC; + + if (GR_REGNO_P (regno)) + return mode != XFmode && mode != XCmode && mode != RFmode; + + if (AR_REGNO_P (regno)) + return mode == DImode; + + if (BR_REGNO_P (regno)) + return mode == DImode; + + return false; + } + + /* Implement TARGET_MODES_TIEABLE_P. + + Don't tie integer and FP modes, as that causes us to get integer registers + allocated for FP instructions. XFmode only supported in FP registers so + we can't tie it with any other modes. */ + + static bool + ia64_modes_tieable_p (machine_mode mode1, machine_mode mode2) + { + return (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2) + && ((mode1 == XFmode || mode1 == XCmode || mode1 == RFmode) + == (mode2 == XFmode || mode2 == XCmode || mode2 == RFmode)) + && (mode1 == BImode) == (mode2 == BImode)); + } + + /* Target hook for assembling integer objects. Handle word-sized + aligned objects and detect the cases when @fptr is needed. */ + + static bool + ia64_assemble_integer (rtx x, unsigned int size, int aligned_p) + { + if (size == POINTER_SIZE / BITS_PER_UNIT + && !(TARGET_NO_PIC || TARGET_AUTO_PIC) + && GET_CODE (x) == SYMBOL_REF + && SYMBOL_REF_FUNCTION_P (x)) + { + static const char * const directive[2][2] = { + /* 64-bit pointer */ /* 32-bit pointer */ + { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */ + { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */ + }; + fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file); + output_addr_const (asm_out_file, x); + fputs (")\n", asm_out_file); + return true; + } + return default_assemble_integer (x, size, aligned_p); + } + + /* Emit the function prologue. */ + + static void + ia64_output_function_prologue (FILE *file) + { + int mask, grsave, grsave_prev; + + if (current_frame_info.need_regstk) + fprintf (file, "\t.regstk %d, %d, %d, %d\n", + current_frame_info.n_input_regs, + current_frame_info.n_local_regs, + current_frame_info.n_output_regs, + current_frame_info.n_rotate_regs); + + if (ia64_except_unwind_info (&global_options) != UI_TARGET) + return; + + /* Emit the .prologue directive. */ + + mask = 0; + grsave = grsave_prev = 0; + if (current_frame_info.r[reg_save_b0] != 0) + { + mask |= 8; + grsave = grsave_prev = current_frame_info.r[reg_save_b0]; + } + if (current_frame_info.r[reg_save_ar_pfs] != 0 + && (grsave_prev == 0 + || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1)) + { + mask |= 4; + if (grsave_prev == 0) + grsave = current_frame_info.r[reg_save_ar_pfs]; + grsave_prev = current_frame_info.r[reg_save_ar_pfs]; + } + if (current_frame_info.r[reg_fp] != 0 + && (grsave_prev == 0 + || current_frame_info.r[reg_fp] == grsave_prev + 1)) + { + mask |= 2; + if (grsave_prev == 0) + grsave = HARD_FRAME_POINTER_REGNUM; + grsave_prev = current_frame_info.r[reg_fp]; + } + if (current_frame_info.r[reg_save_pr] != 0 + && (grsave_prev == 0 + || current_frame_info.r[reg_save_pr] == grsave_prev + 1)) + { + mask |= 1; + if (grsave_prev == 0) + grsave = current_frame_info.r[reg_save_pr]; + } + + if (mask && TARGET_GNU_AS) + fprintf (file, "\t.prologue %d, %d\n", mask, + ia64_dbx_register_number (grsave)); + else + fputs ("\t.prologue\n", file); + + /* Emit a .spill directive, if necessary, to relocate the base of + the register spill area. */ + if (current_frame_info.spill_cfa_off != -16) + fprintf (file, "\t.spill %ld\n", + (long) (current_frame_info.spill_cfa_off + + current_frame_info.spill_size)); + } + + /* Emit the .body directive at the scheduled end of the prologue. */ + + static void + ia64_output_function_end_prologue (FILE *file) + { + if (ia64_except_unwind_info (&global_options) != UI_TARGET) + return; + + fputs ("\t.body\n", file); + } + + /* Emit the function epilogue. */ + + static void + ia64_output_function_epilogue (FILE *) + { + int i; + + if (current_frame_info.r[reg_fp]) + { + const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM]; + reg_names[HARD_FRAME_POINTER_REGNUM] + = reg_names[current_frame_info.r[reg_fp]]; + reg_names[current_frame_info.r[reg_fp]] = tmp; + reg_emitted (reg_fp); + } + if (! TARGET_REG_NAMES) + { + for (i = 0; i < current_frame_info.n_input_regs; i++) + reg_names[IN_REG (i)] = ia64_input_reg_names[i]; + for (i = 0; i < current_frame_info.n_local_regs; i++) + reg_names[LOC_REG (i)] = ia64_local_reg_names[i]; + for (i = 0; i < current_frame_info.n_output_regs; i++) + reg_names[OUT_REG (i)] = ia64_output_reg_names[i]; + } + + current_frame_info.initialized = 0; + } + + int + ia64_dbx_register_number (int regno) + { + /* In ia64_expand_prologue we quite literally renamed the frame pointer + from its home at loc79 to something inside the register frame. We + must perform the same renumbering here for the debug info. */ + if (current_frame_info.r[reg_fp]) + { + if (regno == HARD_FRAME_POINTER_REGNUM) + regno = current_frame_info.r[reg_fp]; + else if (regno == current_frame_info.r[reg_fp]) + regno = HARD_FRAME_POINTER_REGNUM; + } + + if (IN_REGNO_P (regno)) + return 32 + regno - IN_REG (0); + else if (LOC_REGNO_P (regno)) + return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0); + else if (OUT_REGNO_P (regno)) + return (32 + current_frame_info.n_input_regs + + current_frame_info.n_local_regs + regno - OUT_REG (0)); + else + return regno; + } + + /* Implement TARGET_TRAMPOLINE_INIT. + + The trampoline should set the static chain pointer to value placed + into the trampoline and should branch to the specified routine. + To make the normal indirect-subroutine calling convention work, + the trampoline must look like a function descriptor; the first + word being the target address and the second being the target's + global pointer. + + We abuse the concept of a global pointer by arranging for it + to point to the data we need to load. The complete trampoline + has the following form: + + +-------------------+ \ + TRAMP: | __ia64_trampoline | | + +-------------------+ > fake function descriptor + | TRAMP+16 | | + +-------------------+ / + | target descriptor | + +-------------------+ + | static link | + +-------------------+ + */ + + static void + ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain) + { + rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); + rtx addr, addr_reg, tramp, eight = GEN_INT (8); + + /* The Intel assembler requires that the global __ia64_trampoline symbol + be declared explicitly */ + if (!TARGET_GNU_AS) + { + static bool declared_ia64_trampoline = false; + + if (!declared_ia64_trampoline) + { + declared_ia64_trampoline = true; + (*targetm.asm_out.globalize_label) (asm_out_file, + "__ia64_trampoline"); + } + } + + /* Make sure addresses are Pmode even if we are in ILP32 mode. */ + addr = convert_memory_address (Pmode, XEXP (m_tramp, 0)); + fnaddr = convert_memory_address (Pmode, fnaddr); + static_chain = convert_memory_address (Pmode, static_chain); + + /* Load up our iterator. */ + addr_reg = copy_to_reg (addr); + m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0); + + /* The first two words are the fake descriptor: + __ia64_trampoline, ADDR+16. */ + tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"); + if (TARGET_ABI_OPEN_VMS) + { + /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity + in the Macro-32 compiler) and changed the semantics of the LTOFF22 + relocation against function symbols to make it identical to the + LTOFF_FPTR22 relocation. Emit the latter directly to stay within + strict ELF and dereference to get the bare code address. */ + rtx reg = gen_reg_rtx (Pmode); + SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION; + emit_move_insn (reg, tramp); + emit_move_insn (reg, gen_rtx_MEM (Pmode, reg)); + tramp = reg; + } + emit_move_insn (m_tramp, tramp); + emit_insn (gen_adddi3 (addr_reg, addr_reg, eight)); + m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8); + + emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (Pmode, addr, 16))); + emit_insn (gen_adddi3 (addr_reg, addr_reg, eight)); + m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8); + + /* The third word is the target descriptor. */ + emit_move_insn (m_tramp, force_reg (Pmode, fnaddr)); + emit_insn (gen_adddi3 (addr_reg, addr_reg, eight)); + m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8); + + /* The fourth word is the static chain. */ + emit_move_insn (m_tramp, static_chain); + } + + /* Do any needed setup for a variadic function. CUM has not been updated + for the last named argument, which is given by ARG. + + We generate the actual spill instructions during prologue generation. */ + + static void + ia64_setup_incoming_varargs (cumulative_args_t cum, + const function_arg_info &arg, + int *pretend_size, + int second_time ATTRIBUTE_UNUSED) + { + CUMULATIVE_ARGS next_cum = *get_cumulative_args (cum); + + /* Skip the current argument. */ + ia64_function_arg_advance (pack_cumulative_args (&next_cum), arg); + + if (next_cum.words < MAX_ARGUMENT_SLOTS) + { + int n = MAX_ARGUMENT_SLOTS - next_cum.words; + *pretend_size = n * UNITS_PER_WORD; + cfun->machine->n_varargs = n; + } + } + + /* Check whether TYPE is a homogeneous floating point aggregate. If + it is, return the mode of the floating point type that appears + in all leafs. If it is not, return VOIDmode. + + An aggregate is a homogeneous floating point aggregate is if all + fields/elements in it have the same floating point type (e.g, + SFmode). 128-bit quad-precision floats are excluded. + + Variable sized aggregates should never arrive here, since we should + have already decided to pass them by reference. Top-level zero-sized + aggregates are excluded because our parallels crash the middle-end. */ + + static machine_mode + hfa_element_mode (const_tree type, bool nested) + { + machine_mode element_mode = VOIDmode; + machine_mode mode; + enum tree_code code = TREE_CODE (type); + int know_element_mode = 0; + tree t; + + if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type)))) + return VOIDmode; + + switch (code) + { + case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE: + case BOOLEAN_TYPE: case POINTER_TYPE: + case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE: + case LANG_TYPE: case FUNCTION_TYPE: + return VOIDmode; + + /* Fortran complex types are supposed to be HFAs, so we need to handle + gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex + types though. */ + case COMPLEX_TYPE: + if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT + && TYPE_MODE (type) != TCmode) + return GET_MODE_INNER (TYPE_MODE (type)); + else + return VOIDmode; + + case REAL_TYPE: + /* We want to return VOIDmode for raw REAL_TYPEs, but the actual + mode if this is contained within an aggregate. */ + if (nested && TYPE_MODE (type) != TFmode) + return TYPE_MODE (type); + else + return VOIDmode; + + case ARRAY_TYPE: + return hfa_element_mode (TREE_TYPE (type), 1); + + case RECORD_TYPE: + case UNION_TYPE: + case QUAL_UNION_TYPE: + for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t)) + { + if (TREE_CODE (t) != FIELD_DECL || DECL_FIELD_ABI_IGNORED (t)) + continue; + + mode = hfa_element_mode (TREE_TYPE (t), 1); + if (know_element_mode) + { + if (mode != element_mode) + return VOIDmode; + } + else if (GET_MODE_CLASS (mode) != MODE_FLOAT) + return VOIDmode; + else + { + know_element_mode = 1; + element_mode = mode; + } + } + return element_mode; + + default: + /* If we reach here, we probably have some front-end specific type + that the backend doesn't know about. This can happen via the + aggregate_value_p call in init_function_start. All we can do is + ignore unknown tree types. */ + return VOIDmode; + } + + return VOIDmode; + } + + /* Return the number of words required to hold a quantity of TYPE and MODE + when passed as an argument. */ + static int + ia64_function_arg_words (const_tree type, machine_mode mode) + { + int words; + + if (mode == BLKmode) + words = int_size_in_bytes (type); + else + words = GET_MODE_SIZE (mode); + + return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */ + } + + /* Return the number of registers that should be skipped so the current + argument (described by TYPE and WORDS) will be properly aligned. + + Integer and float arguments larger than 8 bytes start at the next + even boundary. Aggregates larger than 8 bytes start at the next + even boundary if the aggregate has 16 byte alignment. Note that + in the 32-bit ABI, TImode and TFmode have only 8-byte alignment + but are still to be aligned in registers. + + ??? The ABI does not specify how to handle aggregates with + alignment from 9 to 15 bytes, or greater than 16. We handle them + all as if they had 16 byte alignment. Such aggregates can occur + only if gcc extensions are used. */ + static int + ia64_function_arg_offset (const CUMULATIVE_ARGS *cum, + const_tree type, int words) + { + /* No registers are skipped on VMS. */ + if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0) + return 0; + + if (type + && TREE_CODE (type) != INTEGER_TYPE + && TREE_CODE (type) != REAL_TYPE) + return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT; + else + return words > 1; + } + + /* Return rtx for register where argument is passed, or zero if it is passed + on the stack. */ + /* ??? 128-bit quad-precision floats are always passed in general + registers. */ + + static rtx + ia64_function_arg_1 (cumulative_args_t cum_v, const function_arg_info &arg, + bool incoming) + { + const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + + int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST); + int words = ia64_function_arg_words (arg.type, arg.mode); + int offset = ia64_function_arg_offset (cum, arg.type, words); + machine_mode hfa_mode = VOIDmode; + + /* For OPEN VMS, emit the instruction setting up the argument register here, + when we know this will be together with the other arguments setup related + insns. This is not the conceptually best place to do this, but this is + the easiest as we have convenient access to cumulative args info. */ + + if (TARGET_ABI_OPEN_VMS && arg.end_marker_p ()) + { + unsigned HOST_WIDE_INT regval = cum->words; + int i; + + for (i = 0; i < 8; i++) + regval |= ((int) cum->atypes[i]) << (i * 3 + 8); + + emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)), + GEN_INT (regval)); + } + + /* If all argument slots are used, then it must go on the stack. */ + if (cum->words + offset >= MAX_ARGUMENT_SLOTS) + return 0; + + /* On OpenVMS argument is either in Rn or Fn. */ + if (TARGET_ABI_OPEN_VMS) + { + if (FLOAT_MODE_P (arg.mode)) + return gen_rtx_REG (arg.mode, FR_ARG_FIRST + cum->words); + else + return gen_rtx_REG (arg.mode, basereg + cum->words); + } + + /* Check for and handle homogeneous FP aggregates. */ + if (arg.type) + hfa_mode = hfa_element_mode (arg.type, 0); + + /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas + and unprototyped hfas are passed specially. */ + if (hfa_mode != VOIDmode && (! cum->prototype || arg.named)) + { + rtx loc[16]; + int i = 0; + int fp_regs = cum->fp_regs; + int int_regs = cum->words + offset; + int hfa_size = GET_MODE_SIZE (hfa_mode); + int byte_size; + int args_byte_size; + + /* If prototyped, pass it in FR regs then GR regs. + If not prototyped, pass it in both FR and GR regs. + + If this is an SFmode aggregate, then it is possible to run out of + FR regs while GR regs are still left. In that case, we pass the + remaining part in the GR regs. */ + + /* Fill the FP regs. We do this always. We stop if we reach the end + of the argument, the last FP register, or the last argument slot. */ + + byte_size = arg.promoted_size_in_bytes (); + args_byte_size = int_regs * UNITS_PER_WORD; + offset = 0; + for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS + && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++) + { + loc[i] = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (hfa_mode, (FR_ARG_FIRST + + fp_regs)), + GEN_INT (offset)); + offset += hfa_size; + args_byte_size += hfa_size; + fp_regs++; + } + + /* If no prototype, then the whole thing must go in GR regs. */ + if (! cum->prototype) + offset = 0; + /* If this is an SFmode aggregate, then we might have some left over + that needs to go in GR regs. */ + else if (byte_size != offset) + int_regs += offset / UNITS_PER_WORD; + + /* Fill in the GR regs. We must use DImode here, not the hfa mode. */ + + for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++) + { + machine_mode gr_mode = DImode; + unsigned int gr_size; + + /* If we have an odd 4 byte hunk because we ran out of FR regs, + then this goes in a GR reg left adjusted/little endian, right + adjusted/big endian. */ + /* ??? Currently this is handled wrong, because 4-byte hunks are + always right adjusted/little endian. */ + if (offset & 0x4) + gr_mode = SImode; + /* If we have an even 4 byte hunk because the aggregate is a + multiple of 4 bytes in size, then this goes in a GR reg right + adjusted/little endian. */ + else if (byte_size - offset == 4) + gr_mode = SImode; + + loc[i] = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (gr_mode, (basereg + + int_regs)), + GEN_INT (offset)); + + gr_size = GET_MODE_SIZE (gr_mode); + offset += gr_size; + if (gr_size == UNITS_PER_WORD + || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0)) + int_regs++; + else if (gr_size > UNITS_PER_WORD) + int_regs += gr_size / UNITS_PER_WORD; + } + return gen_rtx_PARALLEL (arg.mode, gen_rtvec_v (i, loc)); + } + + /* Integral and aggregates go in general registers. If we have run out of + FR registers, then FP values must also go in general registers. This can + happen when we have a SFmode HFA. */ + else if (arg.mode == TFmode || arg.mode == TCmode + || !FLOAT_MODE_P (arg.mode) + || cum->fp_regs == MAX_ARGUMENT_SLOTS) + { + int byte_size = arg.promoted_size_in_bytes (); + if (BYTES_BIG_ENDIAN + && (arg.mode == BLKmode || arg.aggregate_type_p ()) + && byte_size < UNITS_PER_WORD + && byte_size > 0) + { + rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (DImode, + (basereg + cum->words + + offset)), + const0_rtx); + return gen_rtx_PARALLEL (arg.mode, gen_rtvec (1, gr_reg)); + } + else + return gen_rtx_REG (arg.mode, basereg + cum->words + offset); + + } + + /* If there is a prototype, then FP values go in a FR register when + named, and in a GR register when unnamed. */ + else if (cum->prototype) + { + if (arg.named) + return gen_rtx_REG (arg.mode, FR_ARG_FIRST + cum->fp_regs); + /* In big-endian mode, an anonymous SFmode value must be represented + as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force + the value into the high half of the general register. */ + else if (BYTES_BIG_ENDIAN && arg.mode == SFmode) + return gen_rtx_PARALLEL (arg.mode, + gen_rtvec (1, + gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (DImode, basereg + cum->words + offset), + const0_rtx))); + else + return gen_rtx_REG (arg.mode, basereg + cum->words + offset); + } + /* If there is no prototype, then FP values go in both FR and GR + registers. */ + else + { + /* See comment above. */ + machine_mode inner_mode = + (BYTES_BIG_ENDIAN && arg.mode == SFmode) ? DImode : arg.mode; + + rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (arg.mode, (FR_ARG_FIRST + + cum->fp_regs)), + const0_rtx); + rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (inner_mode, + (basereg + cum->words + + offset)), + const0_rtx); + + return gen_rtx_PARALLEL (arg.mode, gen_rtvec (2, fp_reg, gr_reg)); + } + } + + /* Implement TARGET_FUNCION_ARG target hook. */ + + static rtx + ia64_function_arg (cumulative_args_t cum, const function_arg_info &arg) + { + return ia64_function_arg_1 (cum, arg, false); + } + + /* Implement TARGET_FUNCION_INCOMING_ARG target hook. */ + + static rtx + ia64_function_incoming_arg (cumulative_args_t cum, + const function_arg_info &arg) + { + return ia64_function_arg_1 (cum, arg, true); + } + + /* Return number of bytes, at the beginning of the argument, that must be + put in registers. 0 is the argument is entirely in registers or entirely + in memory. */ + + static int + ia64_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg) + { + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + + int words = ia64_function_arg_words (arg.type, arg.mode); + int offset = ia64_function_arg_offset (cum, arg.type, words); + + /* If all argument slots are used, then it must go on the stack. */ + if (cum->words + offset >= MAX_ARGUMENT_SLOTS) + return 0; + + /* It doesn't matter whether the argument goes in FR or GR regs. If + it fits within the 8 argument slots, then it goes entirely in + registers. If it extends past the last argument slot, then the rest + goes on the stack. */ + + if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS) + return 0; + + return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD; + } + + /* Return ivms_arg_type based on machine_mode. */ + + static enum ivms_arg_type + ia64_arg_type (machine_mode mode) + { + switch (mode) + { + case E_SFmode: + return FS; + case E_DFmode: + return FT; + default: + return I64; + } + } + + /* Update CUM to point after this argument. This is patterned after + ia64_function_arg. */ + + static void + ia64_function_arg_advance (cumulative_args_t cum_v, + const function_arg_info &arg) + { + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + int words = ia64_function_arg_words (arg.type, arg.mode); + int offset = ia64_function_arg_offset (cum, arg.type, words); + machine_mode hfa_mode = VOIDmode; + + /* If all arg slots are already full, then there is nothing to do. */ + if (cum->words >= MAX_ARGUMENT_SLOTS) + { + cum->words += words + offset; + return; + } + + cum->atypes[cum->words] = ia64_arg_type (arg.mode); + cum->words += words + offset; + + /* On OpenVMS argument is either in Rn or Fn. */ + if (TARGET_ABI_OPEN_VMS) + { + cum->int_regs = cum->words; + cum->fp_regs = cum->words; + return; + } + + /* Check for and handle homogeneous FP aggregates. */ + if (arg.type) + hfa_mode = hfa_element_mode (arg.type, 0); + + /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas + and unprototyped hfas are passed specially. */ + if (hfa_mode != VOIDmode && (! cum->prototype || arg.named)) + { + int fp_regs = cum->fp_regs; + /* This is the original value of cum->words + offset. */ + int int_regs = cum->words - words; + int hfa_size = GET_MODE_SIZE (hfa_mode); + int byte_size; + int args_byte_size; + + /* If prototyped, pass it in FR regs then GR regs. + If not prototyped, pass it in both FR and GR regs. + + If this is an SFmode aggregate, then it is possible to run out of + FR regs while GR regs are still left. In that case, we pass the + remaining part in the GR regs. */ + + /* Fill the FP regs. We do this always. We stop if we reach the end + of the argument, the last FP register, or the last argument slot. */ + + byte_size = arg.promoted_size_in_bytes (); + args_byte_size = int_regs * UNITS_PER_WORD; + offset = 0; + for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS + && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));) + { + offset += hfa_size; + args_byte_size += hfa_size; + fp_regs++; + } + + cum->fp_regs = fp_regs; + } + + /* Integral and aggregates go in general registers. So do TFmode FP values. + If we have run out of FR registers, then other FP values must also go in + general registers. This can happen when we have a SFmode HFA. */ + else if (arg.mode == TFmode || arg.mode == TCmode + || !FLOAT_MODE_P (arg.mode) + || cum->fp_regs == MAX_ARGUMENT_SLOTS) + cum->int_regs = cum->words; + + /* If there is a prototype, then FP values go in a FR register when + named, and in a GR register when unnamed. */ + else if (cum->prototype) + { + if (! arg.named) + cum->int_regs = cum->words; + else + /* ??? Complex types should not reach here. */ + cum->fp_regs + += (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT ? 2 : 1); + } + /* If there is no prototype, then FP values go in both FR and GR + registers. */ + else + { + /* ??? Complex types should not reach here. */ + cum->fp_regs + += (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT ? 2 : 1); + cum->int_regs = cum->words; + } + } + + /* Arguments with alignment larger than 8 bytes start at the next even + boundary. On ILP32 HPUX, TFmode arguments start on next even boundary + even though their normal alignment is 8 bytes. See ia64_function_arg. */ + + static unsigned int + ia64_function_arg_boundary (machine_mode mode, const_tree type) + { + if (mode == TFmode && TARGET_HPUX && TARGET_ILP32) + return PARM_BOUNDARY * 2; + + if (type) + { + if (TYPE_ALIGN (type) > PARM_BOUNDARY) + return PARM_BOUNDARY * 2; + else + return PARM_BOUNDARY; + } + + if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY) + return PARM_BOUNDARY * 2; + else + return PARM_BOUNDARY; + } + + /* True if it is OK to do sibling call optimization for the specified + call expression EXP. DECL will be the called function, or NULL if + this is an indirect call. */ + static bool + ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) + { + /* We can't perform a sibcall if the current function has the syscall_linkage + attribute. */ + if (lookup_attribute ("syscall_linkage", + TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) + return false; + + /* We must always return with our current GP. This means we can + only sibcall to functions defined in the current module unless + TARGET_CONST_GP is set to true. */ + return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP; + } + + + /* Implement va_arg. */ + + static tree + ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, + gimple_seq *post_p) + { + /* Variable sized types are passed by reference. */ + if (pass_va_arg_by_reference (type)) + { + tree ptrtype = build_pointer_type (type); + tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p); + return build_va_arg_indirect_ref (addr); + } + + /* Aggregate arguments with alignment larger than 8 bytes start at + the next even boundary. Integer and floating point arguments + do so if they are larger than 8 bytes, whether or not they are + also aligned larger than 8 bytes. */ + if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE) + ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT) + { + tree t = fold_build_pointer_plus_hwi (valist, 2 * UNITS_PER_WORD - 1); + t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, + build_int_cst (TREE_TYPE (t), -2 * UNITS_PER_WORD)); + gimplify_assign (unshare_expr (valist), t, pre_p); + } + + return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); + } + + /* Return 1 if function return value returned in memory. Return 0 if it is + in a register. */ + + static bool + ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED) + { + machine_mode mode; + machine_mode hfa_mode; + HOST_WIDE_INT byte_size; + + mode = TYPE_MODE (valtype); + byte_size = GET_MODE_SIZE (mode); + if (mode == BLKmode) + { + byte_size = int_size_in_bytes (valtype); + if (byte_size < 0) + return true; + } + + /* Hfa's with up to 8 elements are returned in the FP argument registers. */ + + hfa_mode = hfa_element_mode (valtype, 0); + if (hfa_mode != VOIDmode) + { + int hfa_size = GET_MODE_SIZE (hfa_mode); + + if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS) + return true; + else + return false; + } + else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS) + return true; + else + return false; + } + + /* Return rtx for register that holds the function return value. */ + + static rtx + ia64_function_value (const_tree valtype, + const_tree fn_decl_or_type, + bool outgoing ATTRIBUTE_UNUSED) + { + machine_mode mode; + machine_mode hfa_mode; + int unsignedp; + const_tree func = fn_decl_or_type; + + if (fn_decl_or_type + && !DECL_P (fn_decl_or_type)) + func = NULL; + + mode = TYPE_MODE (valtype); + hfa_mode = hfa_element_mode (valtype, 0); + + if (hfa_mode != VOIDmode) + { + rtx loc[8]; + int i; + int hfa_size; + int byte_size; + int offset; + + hfa_size = GET_MODE_SIZE (hfa_mode); + byte_size = ((mode == BLKmode) + ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode)); + offset = 0; + for (i = 0; offset < byte_size; i++) + { + loc[i] = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i), + GEN_INT (offset)); + offset += hfa_size; + } + return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); + } + else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode) + return gen_rtx_REG (mode, FR_ARG_FIRST); + else + { + bool need_parallel = false; + + /* In big-endian mode, we need to manage the layout of aggregates + in the registers so that we get the bits properly aligned in + the highpart of the registers. */ + if (BYTES_BIG_ENDIAN + && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype)))) + need_parallel = true; + + /* Something like struct S { long double x; char a[0] } is not an + HFA structure, and therefore doesn't go in fp registers. But + the middle-end will give it XFmode anyway, and XFmode values + don't normally fit in integer registers. So we need to smuggle + the value inside a parallel. */ + else if (mode == XFmode || mode == XCmode || mode == RFmode) + need_parallel = true; + + if (need_parallel) + { + rtx loc[8]; + int offset; + int bytesize; + int i; + + offset = 0; + bytesize = int_size_in_bytes (valtype); + /* An empty PARALLEL is invalid here, but the return value + doesn't matter for empty structs. */ + if (bytesize == 0) + return gen_rtx_REG (mode, GR_RET_FIRST); + for (i = 0; offset < bytesize; i++) + { + loc[i] = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (DImode, + GR_RET_FIRST + i), + GEN_INT (offset)); + offset += UNITS_PER_WORD; + } + return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); + } + + mode = promote_function_mode (valtype, mode, &unsignedp, + func ? TREE_TYPE (func) : NULL_TREE, + true); + + return gen_rtx_REG (mode, GR_RET_FIRST); + } + } + + /* Worker function for TARGET_LIBCALL_VALUE. */ + + static rtx + ia64_libcall_value (machine_mode mode, + const_rtx fun ATTRIBUTE_UNUSED) + { + return gen_rtx_REG (mode, + (((GET_MODE_CLASS (mode) == MODE_FLOAT + || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) + && (mode) != TFmode) + ? FR_RET_FIRST : GR_RET_FIRST)); + } + + /* Worker function for FUNCTION_VALUE_REGNO_P. */ + + static bool + ia64_function_value_regno_p (const unsigned int regno) + { + return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST) + || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST)); + } + + /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL. + We need to emit DTP-relative relocations. */ + + static void + ia64_output_dwarf_dtprel (FILE *file, int size, rtx x) + { + gcc_assert (size == 4 || size == 8); + if (size == 4) + fputs ("\tdata4.ua\t@dtprel(", file); + else + fputs ("\tdata8.ua\t@dtprel(", file); + output_addr_const (file, x); + fputs (")", file); + } + + /* Print a memory address as an operand to reference that memory location. */ + + /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps + also call this from ia64_print_operand for memory addresses. */ + + static void + ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED, + machine_mode /*mode*/, + rtx address ATTRIBUTE_UNUSED) + { + } + + /* Print an operand to an assembler instruction. + C Swap and print a comparison operator. + D Print an FP comparison operator. + E Print 32 - constant, for SImode shifts as extract. + e Print 64 - constant, for DImode rotates. + F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or + a floating point register emitted normally. + G A floating point constant. + I Invert a predicate register by adding 1. + J Select the proper predicate register for a condition. + j Select the inverse predicate register for a condition. + O Append .acq for volatile load. + P Postincrement of a MEM. + Q Append .rel for volatile store. + R Print .s .d or nothing for a single, double or no truncation. + S Shift amount for shladd instruction. + T Print an 8-bit sign extended number (K) as a 32-bit unsigned number + for Intel assembler. + U Print an 8-bit sign extended number (K) as a 64-bit unsigned number + for Intel assembler. + X A pair of floating point registers. + r Print register name, or constant 0 as r0. HP compatibility for + Linux kernel. + v Print vector constant value as an 8-byte integer value. */ + + static void + ia64_print_operand (FILE * file, rtx x, int code) + { + const char *str; + + switch (code) + { + case 0: + /* Handled below. */ + break; + + case 'C': + { + enum rtx_code c = swap_condition (GET_CODE (x)); + fputs (GET_RTX_NAME (c), file); + return; + } + + case 'D': + switch (GET_CODE (x)) + { + case NE: + str = "neq"; + break; + case UNORDERED: + str = "unord"; + break; + case ORDERED: + str = "ord"; + break; + case UNLT: + str = "nge"; + break; + case UNLE: + str = "ngt"; + break; + case UNGT: + str = "nle"; + break; + case UNGE: + str = "nlt"; + break; + case UNEQ: + case LTGT: + gcc_unreachable (); + default: + str = GET_RTX_NAME (GET_CODE (x)); + break; + } + fputs (str, file); + return; + + case 'E': + fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x)); + return; + + case 'e': + fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x)); + return; + + case 'F': + if (x == CONST0_RTX (GET_MODE (x))) + str = reg_names [FR_REG (0)]; + else if (x == CONST1_RTX (GET_MODE (x))) + str = reg_names [FR_REG (1)]; + else + { + gcc_assert (GET_CODE (x) == REG); + str = reg_names [REGNO (x)]; + } + fputs (str, file); + return; + + case 'G': + { + long val[4]; + real_to_target (val, CONST_DOUBLE_REAL_VALUE (x), GET_MODE (x)); + if (GET_MODE (x) == SFmode) + fprintf (file, "0x%08lx", val[0] & 0xffffffff); + else if (GET_MODE (x) == DFmode) + fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1]) + & 0xffffffff, + (WORDS_BIG_ENDIAN ? val[1] : val[0]) + & 0xffffffff); + else + output_operand_lossage ("invalid %%G mode"); + } + return; + + case 'I': + fputs (reg_names [REGNO (x) + 1], file); + return; + + case 'J': + case 'j': + { + unsigned int regno = REGNO (XEXP (x, 0)); + if (GET_CODE (x) == EQ) + regno += 1; + if (code == 'j') + regno ^= 1; + fputs (reg_names [regno], file); + } + return; + + case 'O': + if (MEM_VOLATILE_P (x)) + fputs(".acq", file); + return; + + case 'P': + { + HOST_WIDE_INT value; + + switch (GET_CODE (XEXP (x, 0))) + { + default: + return; + + case POST_MODIFY: + x = XEXP (XEXP (XEXP (x, 0), 1), 1); + if (GET_CODE (x) == CONST_INT) + value = INTVAL (x); + else + { + gcc_assert (GET_CODE (x) == REG); + fprintf (file, ", %s", reg_names[REGNO (x)]); + return; + } + break; + + case POST_INC: + value = GET_MODE_SIZE (GET_MODE (x)); + break; + + case POST_DEC: + value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x)); + break; + } + + fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value); + return; + } + + case 'Q': + if (MEM_VOLATILE_P (x)) + fputs(".rel", file); + return; + + case 'R': + if (x == CONST0_RTX (GET_MODE (x))) + fputs(".s", file); + else if (x == CONST1_RTX (GET_MODE (x))) + fputs(".d", file); + else if (x == CONST2_RTX (GET_MODE (x))) + ; + else + output_operand_lossage ("invalid %%R value"); + return; + + case 'S': + fprintf (file, "%d", exact_log2 (INTVAL (x))); + return; + + case 'T': + if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT) + { + fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff); + return; + } + break; + + case 'U': + if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT) + { + const char *prefix = "0x"; + if (INTVAL (x) & 0x80000000) + { + fprintf (file, "0xffffffff"); + prefix = ""; + } + fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff); + return; + } + break; + + case 'X': + { + unsigned int regno = REGNO (x); + fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]); + } + return; + + case 'r': + /* If this operand is the constant zero, write it as register zero. + Any register, zero, or CONST_INT value is OK here. */ + if (GET_CODE (x) == REG) + fputs (reg_names[REGNO (x)], file); + else if (x == CONST0_RTX (GET_MODE (x))) + fputs ("r0", file); + else if (GET_CODE (x) == CONST_INT) + output_addr_const (file, x); + else + output_operand_lossage ("invalid %%r value"); + return; + + case 'v': + gcc_assert (GET_CODE (x) == CONST_VECTOR); + x = simplify_subreg (DImode, x, GET_MODE (x), 0); + break; + + case '+': + { + const char *which; + + /* For conditional branches, returns or calls, substitute + sptk, dptk, dpnt, or spnt for %s. */ + x = find_reg_note (current_output_insn, REG_BR_PROB, 0); + if (x) + { + int pred_val = profile_probability::from_reg_br_prob_note + (XINT (x, 0)).to_reg_br_prob_base (); + + /* Guess top and bottom 10% statically predicted. */ + if (pred_val < REG_BR_PROB_BASE / 50 + && br_prob_note_reliable_p (x)) + which = ".spnt"; + else if (pred_val < REG_BR_PROB_BASE / 2) + which = ".dpnt"; + else if (pred_val < REG_BR_PROB_BASE / 100 * 98 + || !br_prob_note_reliable_p (x)) + which = ".dptk"; + else + which = ".sptk"; + } + else if (CALL_P (current_output_insn)) + which = ".sptk"; + else + which = ".dptk"; + + fputs (which, file); + return; + } + + case ',': + x = current_insn_predicate; + if (x) + { + unsigned int regno = REGNO (XEXP (x, 0)); + if (GET_CODE (x) == EQ) + regno += 1; + fprintf (file, "(%s) ", reg_names [regno]); + } + return; + + default: + output_operand_lossage ("ia64_print_operand: unknown code"); + return; + } + + switch (GET_CODE (x)) + { + /* This happens for the spill/restore instructions. */ + case POST_INC: + case POST_DEC: + case POST_MODIFY: + x = XEXP (x, 0); + /* fall through */ + + case REG: + fputs (reg_names [REGNO (x)], file); + break; + + case MEM: + { + rtx addr = XEXP (x, 0); + if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC) + addr = XEXP (addr, 0); + fprintf (file, "[%s]", reg_names [REGNO (addr)]); + break; + } + + default: + output_addr_const (file, x); + break; + } + + return; + } + + /* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P. */ + + static bool + ia64_print_operand_punct_valid_p (unsigned char code) + { + return (code == '+' || code == ','); + } + + /* Compute a (partial) cost for rtx X. Return true if the complete + cost has been computed, and false if subexpressions should be + scanned. In either case, *TOTAL contains the cost result. */ + /* ??? This is incomplete. */ + + static bool + ia64_rtx_costs (rtx x, machine_mode mode, int outer_code, + int opno ATTRIBUTE_UNUSED, + int *total, bool speed ATTRIBUTE_UNUSED) + { + int code = GET_CODE (x); + + switch (code) + { + case CONST_INT: + switch (outer_code) + { + case SET: + *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1); + return true; + case PLUS: + if (satisfies_constraint_I (x)) + *total = 0; + else if (satisfies_constraint_J (x)) + *total = 1; + else + *total = COSTS_N_INSNS (1); + return true; + default: + if (satisfies_constraint_K (x) || satisfies_constraint_L (x)) + *total = 0; + else + *total = COSTS_N_INSNS (1); + return true; + } + + case CONST_DOUBLE: + *total = COSTS_N_INSNS (1); + return true; + + case CONST: + case SYMBOL_REF: + case LABEL_REF: + *total = COSTS_N_INSNS (3); + return true; + + case FMA: + *total = COSTS_N_INSNS (4); + return true; + + case MULT: + /* For multiplies wider than HImode, we have to go to the FPU, + which normally involves copies. Plus there's the latency + of the multiply itself, and the latency of the instructions to + transfer integer regs to FP regs. */ + if (FLOAT_MODE_P (mode)) + *total = COSTS_N_INSNS (4); + else if (GET_MODE_SIZE (mode) > 2) + *total = COSTS_N_INSNS (10); + else + *total = COSTS_N_INSNS (2); + return true; + + case PLUS: + case MINUS: + if (FLOAT_MODE_P (mode)) + { + *total = COSTS_N_INSNS (4); + return true; + } + /* FALLTHRU */ + + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + *total = COSTS_N_INSNS (1); + return true; + + case DIV: + case UDIV: + case MOD: + case UMOD: + /* We make divide expensive, so that divide-by-constant will be + optimized to a multiply. */ + *total = COSTS_N_INSNS (60); + return true; + + default: + return false; + } + } + + /* Calculate the cost of moving data from a register in class FROM to + one in class TO, using MODE. */ + + static int + ia64_register_move_cost (machine_mode mode, reg_class_t from, + reg_class_t to) + { + /* ADDL_REGS is the same as GR_REGS for movement purposes. */ + if (to == ADDL_REGS) + to = GR_REGS; + if (from == ADDL_REGS) + from = GR_REGS; + + /* All costs are symmetric, so reduce cases by putting the + lower number class as the destination. */ + if (from < to) + { + reg_class_t tmp = to; + to = from, from = tmp; + } + + /* Moving from FR<->GR in XFmode must be more expensive than 2, + so that we get secondary memory reloads. Between FR_REGS, + we have to make this at least as expensive as memory_move_cost + to avoid spectacularly poor register class preferencing. */ + if (mode == XFmode || mode == RFmode) + { + if (to != GR_REGS || from != GR_REGS) + return memory_move_cost (mode, to, false); + else + return 3; + } + + switch (to) + { + case PR_REGS: + /* Moving between PR registers takes two insns. */ + if (from == PR_REGS) + return 3; + /* Moving between PR and anything but GR is impossible. */ + if (from != GR_REGS) + return memory_move_cost (mode, to, false); + break; + + case BR_REGS: + /* Moving between BR and anything but GR is impossible. */ + if (from != GR_REGS && from != GR_AND_BR_REGS) + return memory_move_cost (mode, to, false); + break; + + case AR_I_REGS: + case AR_M_REGS: + /* Moving between AR and anything but GR is impossible. */ + if (from != GR_REGS) + return memory_move_cost (mode, to, false); + break; + + case GR_REGS: + case FR_REGS: + case FP_REGS: + case GR_AND_FR_REGS: + case GR_AND_BR_REGS: + case ALL_REGS: + break; + + default: + gcc_unreachable (); + } + + return 2; + } + + /* Calculate the cost of moving data of MODE from a register to or from + memory. */ + + static int + ia64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED, + reg_class_t rclass, + bool in ATTRIBUTE_UNUSED) + { + if (rclass == GENERAL_REGS + || rclass == FR_REGS + || rclass == FP_REGS + || rclass == GR_AND_FR_REGS) + return 4; + else + return 10; + } + + /* Implement TARGET_PREFERRED_RELOAD_CLASS. Place additional restrictions + on RCLASS to use when copying X into that class. */ + + static reg_class_t + ia64_preferred_reload_class (rtx x, reg_class_t rclass) + { + switch (rclass) + { + case FR_REGS: + case FP_REGS: + /* Don't allow volatile mem reloads into floating point registers. + This is defined to force reload to choose the r/m case instead + of the f/f case when reloading (set (reg fX) (mem/v)). */ + if (MEM_P (x) && MEM_VOLATILE_P (x)) + return NO_REGS; + + /* Force all unrecognized constants into the constant pool. */ + if (CONSTANT_P (x)) + return NO_REGS; + break; + + case AR_M_REGS: + case AR_I_REGS: + if (!OBJECT_P (x)) + return NO_REGS; + break; + + default: + break; + } + + return rclass; + } + + /* This function returns the register class required for a secondary + register when copying between one of the registers in RCLASS, and X, + using MODE. A return value of NO_REGS means that no secondary register + is required. */ + + enum reg_class + ia64_secondary_reload_class (enum reg_class rclass, + machine_mode mode ATTRIBUTE_UNUSED, rtx x) + { + int regno = -1; + + if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG) + regno = true_regnum (x); + + switch (rclass) + { + case BR_REGS: + case AR_M_REGS: + case AR_I_REGS: + /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global + interaction. We end up with two pseudos with overlapping lifetimes + both of which are equiv to the same constant, and both which need + to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end + changes depending on the path length, which means the qty_first_reg + check in make_regs_eqv can give different answers at different times. + At some point I'll probably need a reload_indi pattern to handle + this. + + We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we + wound up with a FP register from GR_AND_FR_REGS. Extend that to all + non-general registers for good measure. */ + if (regno >= 0 && ! GENERAL_REGNO_P (regno)) + return GR_REGS; + + /* This is needed if a pseudo used as a call_operand gets spilled to a + stack slot. */ + if (GET_CODE (x) == MEM) + return GR_REGS; + break; + + case FR_REGS: + case FP_REGS: + /* Need to go through general registers to get to other class regs. */ + if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno))) + return GR_REGS; + + /* This can happen when a paradoxical subreg is an operand to the + muldi3 pattern. */ + /* ??? This shouldn't be necessary after instruction scheduling is + enabled, because paradoxical subregs are not accepted by + register_operand when INSN_SCHEDULING is defined. Or alternatively, + stop the paradoxical subreg stupidity in the *_operand functions + in recog.cc. */ + if (GET_CODE (x) == MEM + && (GET_MODE (x) == SImode || GET_MODE (x) == HImode + || GET_MODE (x) == QImode)) + return GR_REGS; + + /* This can happen because of the ior/and/etc patterns that accept FP + registers as operands. If the third operand is a constant, then it + needs to be reloaded into a FP register. */ + if (GET_CODE (x) == CONST_INT) + return GR_REGS; + + /* This can happen because of register elimination in a muldi3 insn. + E.g. `26107 * (unsigned long)&u'. */ + if (GET_CODE (x) == PLUS) + return GR_REGS; + break; + + case PR_REGS: + /* ??? This happens if we cse/gcse a BImode value across a call, + and the function has a nonlocal goto. This is because global + does not allocate call crossing pseudos to hard registers when + crtl->has_nonlocal_goto is true. This is relatively + common for C++ programs that use exceptions. To reproduce, + return NO_REGS and compile libstdc++. */ + if (GET_CODE (x) == MEM) + return GR_REGS; + + /* This can happen when we take a BImode subreg of a DImode value, + and that DImode value winds up in some non-GR register. */ + if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno)) + return GR_REGS; + break; + + default: + break; + } + + return NO_REGS; + } + + + /* Implement targetm.unspec_may_trap_p hook. */ + static int + ia64_unspec_may_trap_p (const_rtx x, unsigned flags) + { + switch (XINT (x, 1)) + { + case UNSPEC_LDA: + case UNSPEC_LDS: + case UNSPEC_LDSA: + case UNSPEC_LDCCLR: + case UNSPEC_CHKACLR: + case UNSPEC_CHKS: + /* These unspecs are just wrappers. */ + return may_trap_p_1 (XVECEXP (x, 0, 0), flags); + } + + return default_unspec_may_trap_p (x, flags); + } + + + /* Parse the -mfixed-range= option string. */ + + static void + fix_range (const char *const_str) + { + int i, first, last; + char *str, *dash, *comma; + + /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and + REG2 are either register names or register numbers. The effect + of this option is to mark the registers in the range from REG1 to + REG2 as ``fixed'' so they won't be used by the compiler. This is + used, e.g., to ensure that kernel mode code doesn't use f32-f127. */ + + i = strlen (const_str); + str = (char *) alloca (i + 1); + memcpy (str, const_str, i + 1); + + while (1) + { + dash = strchr (str, '-'); + if (!dash) + { + warning (0, "value of %<-mfixed-range%> must have form REG1-REG2"); + return; + } + *dash = '\0'; + + comma = strchr (dash + 1, ','); + if (comma) + *comma = '\0'; + + first = decode_reg_name (str); + if (first < 0) + { + warning (0, "unknown register name: %s", str); + return; + } + + last = decode_reg_name (dash + 1); + if (last < 0) + { + warning (0, "unknown register name: %s", dash + 1); + return; + } + + *dash = '-'; + + if (first > last) + { + warning (0, "%s-%s is an empty range", str, dash + 1); + return; + } + + for (i = first; i <= last; ++i) + fixed_regs[i] = 1; + + if (!comma) + break; + + *comma = ','; + str = comma + 1; + } + } + + /* Implement TARGET_OPTION_OVERRIDE. */ + + static void + ia64_option_override (void) + { + unsigned int i; + cl_deferred_option *opt; + vec *v + = (vec *) ia64_deferred_options; + + if (v) + FOR_EACH_VEC_ELT (*v, i, opt) + { + switch (opt->opt_index) + { + case OPT_mfixed_range_: + fix_range (opt->arg); + break; + + default: + gcc_unreachable (); + } + } + + if (TARGET_AUTO_PIC) + target_flags |= MASK_CONST_GP; + + /* Numerous experiment shows that IRA based loop pressure + calculation works better for RTL loop invariant motion on targets + with enough (>= 32) registers. It is an expensive optimization. + So it is on only for peak performance. */ + if (optimize >= 3) + flag_ira_loop_pressure = 1; + + + ia64_section_threshold = (OPTION_SET_P (g_switch_value) + ? g_switch_value + : IA64_DEFAULT_GVALUE); + + init_machine_status = ia64_init_machine_status; + + if (flag_align_functions && !str_align_functions) + str_align_functions = "64"; + if (flag_align_loops && !str_align_loops) + str_align_loops = "32"; + if (TARGET_ABI_OPEN_VMS) + flag_no_common = 1; + + ia64_override_options_after_change(); + } + + /* Implement targetm.override_options_after_change. */ + + static void + ia64_override_options_after_change (void) + { + if (optimize >= 3 + && !OPTION_SET_P (flag_selective_scheduling) + && !OPTION_SET_P (flag_selective_scheduling2)) + { + flag_selective_scheduling2 = 1; + flag_sel_sched_pipelining = 1; + } + if (mflag_sched_control_spec == 2) + { + /* Control speculation is on by default for the selective scheduler, + but not for the Haifa scheduler. */ + mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0; + } + if (flag_sel_sched_pipelining && flag_auto_inc_dec) + { + /* FIXME: remove this when we'd implement breaking autoinsns as + a transformation. */ + flag_auto_inc_dec = 0; + } + } + + /* Initialize the record of emitted frame related registers. */ + + void ia64_init_expanders (void) + { + memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs)); + } + + static struct machine_function * + ia64_init_machine_status (void) + { + return ggc_cleared_alloc (); + } + + static enum attr_itanium_class ia64_safe_itanium_class (rtx_insn *); + static enum attr_type ia64_safe_type (rtx_insn *); + + static enum attr_itanium_class + ia64_safe_itanium_class (rtx_insn *insn) + { + if (recog_memoized (insn) >= 0) + return get_attr_itanium_class (insn); + else if (DEBUG_INSN_P (insn)) + return ITANIUM_CLASS_IGNORE; + else + return ITANIUM_CLASS_UNKNOWN; + } + + static enum attr_type + ia64_safe_type (rtx_insn *insn) + { + if (recog_memoized (insn) >= 0) + return get_attr_type (insn); + else + return TYPE_UNKNOWN; + } + + /* The following collection of routines emit instruction group stop bits as + necessary to avoid dependencies. */ + + /* Need to track some additional registers as far as serialization is + concerned so we can properly handle br.call and br.ret. We could + make these registers visible to gcc, but since these registers are + never explicitly used in gcc generated code, it seems wasteful to + do so (plus it would make the call and return patterns needlessly + complex). */ + #define REG_RP (BR_REG (0)) + #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1) + /* This is used for volatile asms which may require a stop bit immediately + before and after them. */ + #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2) + #define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3) + #define NUM_REGS (AR_UNAT_BIT_0 + 64) + + /* For each register, we keep track of how it has been written in the + current instruction group. + + If a register is written unconditionally (no qualifying predicate), + WRITE_COUNT is set to 2 and FIRST_PRED is ignored. + + If a register is written if its qualifying predicate P is true, we + set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register + may be written again by the complement of P (P^1) and when this happens, + WRITE_COUNT gets set to 2. + + The result of this is that whenever an insn attempts to write a register + whose WRITE_COUNT is two, we need to issue an insn group barrier first. + + If a predicate register is written by a floating-point insn, we set + WRITTEN_BY_FP to true. + + If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND + to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */ + + #if GCC_VERSION >= 4000 + #define RWS_FIELD_TYPE __extension__ unsigned short + #else + #define RWS_FIELD_TYPE unsigned int + #endif + struct reg_write_state + { + RWS_FIELD_TYPE write_count : 2; + RWS_FIELD_TYPE first_pred : 10; + RWS_FIELD_TYPE written_by_fp : 1; + RWS_FIELD_TYPE written_by_and : 1; + RWS_FIELD_TYPE written_by_or : 1; + }; + + /* Cumulative info for the current instruction group. */ + struct reg_write_state rws_sum[NUM_REGS]; + #if CHECKING_P + /* Bitmap whether a register has been written in the current insn. */ + unsigned HOST_WIDEST_FAST_INT rws_insn + [(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1) + / HOST_BITS_PER_WIDEST_FAST_INT]; + + static inline void + rws_insn_set (unsigned int regno) + { + unsigned int elt = regno / HOST_BITS_PER_WIDEST_FAST_INT; + unsigned int bit = regno % HOST_BITS_PER_WIDEST_FAST_INT; + gcc_assert (!((rws_insn[elt] >> bit) & 1)); + rws_insn[elt] |= (unsigned HOST_WIDEST_FAST_INT) 1 << bit; + } + + static inline int + rws_insn_test (unsigned int regno) + { + unsigned int elt = regno / HOST_BITS_PER_WIDEST_FAST_INT; + unsigned int bit = regno % HOST_BITS_PER_WIDEST_FAST_INT; + return (rws_insn[elt] >> bit) & 1; + } + #else + /* When not checking, track just REG_AR_CFM and REG_VOLATILE. */ + unsigned char rws_insn[2]; + + static inline void + rws_insn_set (int regno) + { + if (regno == REG_AR_CFM) + rws_insn[0] = 1; + else if (regno == REG_VOLATILE) + rws_insn[1] = 1; + } + + static inline int + rws_insn_test (int regno) + { + if (regno == REG_AR_CFM) + return rws_insn[0]; + if (regno == REG_VOLATILE) + return rws_insn[1]; + return 0; + } + #endif + + /* Indicates whether this is the first instruction after a stop bit, + in which case we don't need another stop bit. Without this, + ia64_variable_issue will die when scheduling an alloc. */ + static int first_instruction; + + /* Misc flags needed to compute RAW/WAW dependencies while we are traversing + RTL for one instruction. */ + struct reg_flags + { + unsigned int is_write : 1; /* Is register being written? */ + unsigned int is_fp : 1; /* Is register used as part of an fp op? */ + unsigned int is_branch : 1; /* Is register used as part of a branch? */ + unsigned int is_and : 1; /* Is register used as part of and.orcm? */ + unsigned int is_or : 1; /* Is register used as part of or.andcm? */ + unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */ + }; + + static void rws_update (int, struct reg_flags, int); + static int rws_access_regno (int, struct reg_flags, int); + static int rws_access_reg (rtx, struct reg_flags, int); + static void update_set_flags (rtx, struct reg_flags *); + static int set_src_needs_barrier (rtx, struct reg_flags, int); + static int rtx_needs_barrier (rtx, struct reg_flags, int); + static void init_insn_group_barriers (void); + static int group_barrier_needed (rtx_insn *); + static int safe_group_barrier_needed (rtx_insn *); + static int in_safe_group_barrier; + + /* Update *RWS for REGNO, which is being written by the current instruction, + with predicate PRED, and associated register flags in FLAGS. */ + + static void + rws_update (int regno, struct reg_flags flags, int pred) + { + if (pred) + rws_sum[regno].write_count++; + else + rws_sum[regno].write_count = 2; + rws_sum[regno].written_by_fp |= flags.is_fp; + /* ??? Not tracking and/or across differing predicates. */ + rws_sum[regno].written_by_and = flags.is_and; + rws_sum[regno].written_by_or = flags.is_or; + rws_sum[regno].first_pred = pred; + } + + /* Handle an access to register REGNO of type FLAGS using predicate register + PRED. Update rws_sum array. Return 1 if this access creates + a dependency with an earlier instruction in the same group. */ + + static int + rws_access_regno (int regno, struct reg_flags flags, int pred) + { + int need_barrier = 0; + + gcc_assert (regno < NUM_REGS); + + if (! PR_REGNO_P (regno)) + flags.is_and = flags.is_or = 0; + + if (flags.is_write) + { + int write_count; + + rws_insn_set (regno); + write_count = rws_sum[regno].write_count; + + switch (write_count) + { + case 0: + /* The register has not been written yet. */ + if (!in_safe_group_barrier) + rws_update (regno, flags, pred); + break; + + case 1: + /* The register has been written via a predicate. Treat + it like a unconditional write and do not try to check + for complementary pred reg in earlier write. */ + if (flags.is_and && rws_sum[regno].written_by_and) + ; + else if (flags.is_or && rws_sum[regno].written_by_or) + ; + else + need_barrier = 1; + if (!in_safe_group_barrier) + rws_update (regno, flags, pred); + break; + + case 2: + /* The register has been unconditionally written already. We + need a barrier. */ + if (flags.is_and && rws_sum[regno].written_by_and) + ; + else if (flags.is_or && rws_sum[regno].written_by_or) + ; + else + need_barrier = 1; + if (!in_safe_group_barrier) + { + rws_sum[regno].written_by_and = flags.is_and; + rws_sum[regno].written_by_or = flags.is_or; + } + break; + + default: + gcc_unreachable (); + } + } + else + { + if (flags.is_branch) + { + /* Branches have several RAW exceptions that allow to avoid + barriers. */ + + if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM) + /* RAW dependencies on branch regs are permissible as long + as the writer is a non-branch instruction. Since we + never generate code that uses a branch register written + by a branch instruction, handling this case is + easy. */ + return 0; + + if (REGNO_REG_CLASS (regno) == PR_REGS + && ! rws_sum[regno].written_by_fp) + /* The predicates of a branch are available within the + same insn group as long as the predicate was written by + something other than a floating-point instruction. */ + return 0; + } + + if (flags.is_and && rws_sum[regno].written_by_and) + return 0; + if (flags.is_or && rws_sum[regno].written_by_or) + return 0; + + switch (rws_sum[regno].write_count) + { + case 0: + /* The register has not been written yet. */ + break; + + case 1: + /* The register has been written via a predicate, assume we + need a barrier (don't check for complementary regs). */ + need_barrier = 1; + break; + + case 2: + /* The register has been unconditionally written already. We + need a barrier. */ + need_barrier = 1; + break; + + default: + gcc_unreachable (); + } + } + + return need_barrier; + } + + static int + rws_access_reg (rtx reg, struct reg_flags flags, int pred) + { + int regno = REGNO (reg); + int n = REG_NREGS (reg); + + if (n == 1) + return rws_access_regno (regno, flags, pred); + else + { + int need_barrier = 0; + while (--n >= 0) + need_barrier |= rws_access_regno (regno + n, flags, pred); + return need_barrier; + } + } + + /* Examine X, which is a SET rtx, and update the flags, the predicate, and + the condition, stored in *PFLAGS, *PPRED and *PCOND. */ + + static void + update_set_flags (rtx x, struct reg_flags *pflags) + { + rtx src = SET_SRC (x); + + switch (GET_CODE (src)) + { + case CALL: + return; + + case IF_THEN_ELSE: + /* There are four cases here: + (1) The destination is (pc), in which case this is a branch, + nothing here applies. + (2) The destination is ar.lc, in which case this is a + doloop_end_internal, + (3) The destination is an fp register, in which case this is + an fselect instruction. + (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case + this is a check load. + In all cases, nothing we do in this function applies. */ + return; + + default: + if (COMPARISON_P (src) + && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0)))) + /* Set pflags->is_fp to 1 so that we know we're dealing + with a floating point comparison when processing the + destination of the SET. */ + pflags->is_fp = 1; + + /* Discover if this is a parallel comparison. We only handle + and.orcm and or.andcm at present, since we must retain a + strict inverse on the predicate pair. */ + else if (GET_CODE (src) == AND) + pflags->is_and = 1; + else if (GET_CODE (src) == IOR) + pflags->is_or = 1; + + break; + } + } + + /* Subroutine of rtx_needs_barrier; this function determines whether the + source of a given SET rtx found in X needs a barrier. FLAGS and PRED + are as in rtx_needs_barrier. COND is an rtx that holds the condition + for this insn. */ + + static int + set_src_needs_barrier (rtx x, struct reg_flags flags, int pred) + { + int need_barrier = 0; + rtx dst; + rtx src = SET_SRC (x); + + if (GET_CODE (src) == CALL) + /* We don't need to worry about the result registers that + get written by subroutine call. */ + return rtx_needs_barrier (src, flags, pred); + else if (SET_DEST (x) == pc_rtx) + { + /* X is a conditional branch. */ + /* ??? This seems redundant, as the caller sets this bit for + all JUMP_INSNs. */ + if (!ia64_spec_check_src_p (src)) + flags.is_branch = 1; + return rtx_needs_barrier (src, flags, pred); + } + + if (ia64_spec_check_src_p (src)) + /* Avoid checking one register twice (in condition + and in 'then' section) for ldc pattern. */ + { + gcc_assert (REG_P (XEXP (src, 2))); + need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred); + + /* We process MEM below. */ + src = XEXP (src, 1); + } + + need_barrier |= rtx_needs_barrier (src, flags, pred); + + dst = SET_DEST (x); + if (GET_CODE (dst) == ZERO_EXTRACT) + { + need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred); + need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred); + } + return need_barrier; + } + + /* Handle an access to rtx X of type FLAGS using predicate register + PRED. Return 1 if this access creates a dependency with an earlier + instruction in the same group. */ + + static int + rtx_needs_barrier (rtx x, struct reg_flags flags, int pred) + { + int i, j; + int is_complemented = 0; + int need_barrier = 0; + const char *format_ptr; + struct reg_flags new_flags; + rtx cond; + + if (! x) + return 0; + + new_flags = flags; + + switch (GET_CODE (x)) + { + case SET: + update_set_flags (x, &new_flags); + need_barrier = set_src_needs_barrier (x, new_flags, pred); + if (GET_CODE (SET_SRC (x)) != CALL) + { + new_flags.is_write = 1; + need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred); + } + break; + + case CALL: + new_flags.is_write = 0; + need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred); + + /* Avoid multiple register writes, in case this is a pattern with + multiple CALL rtx. This avoids a failure in rws_access_reg. */ + if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM)) + { + new_flags.is_write = 1; + need_barrier |= rws_access_regno (REG_RP, new_flags, pred); + need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred); + need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred); + } + break; + + case COND_EXEC: + /* X is a predicated instruction. */ + + cond = COND_EXEC_TEST (x); + gcc_assert (!pred); + need_barrier = rtx_needs_barrier (cond, flags, 0); + + if (GET_CODE (cond) == EQ) + is_complemented = 1; + cond = XEXP (cond, 0); + gcc_assert (GET_CODE (cond) == REG + && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS); + pred = REGNO (cond); + if (is_complemented) + ++pred; + + need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred); + return need_barrier; + + case CLOBBER: + case USE: + /* Clobber & use are for earlier compiler-phases only. */ + break; + + case ASM_OPERANDS: + case ASM_INPUT: + /* We always emit stop bits for traditional asms. We emit stop bits + for volatile extended asms if TARGET_VOL_ASM_STOP is true. */ + if (GET_CODE (x) != ASM_OPERANDS + || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP)) + { + /* Avoid writing the register multiple times if we have multiple + asm outputs. This avoids a failure in rws_access_reg. */ + if (! rws_insn_test (REG_VOLATILE)) + { + new_flags.is_write = 1; + rws_access_regno (REG_VOLATILE, new_flags, pred); + } + return 1; + } + + /* For all ASM_OPERANDS, we must traverse the vector of input operands. + We cannot just fall through here since then we would be confused + by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate + traditional asms unlike their normal usage. */ + + for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i) + if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred)) + need_barrier = 1; + break; + + case PARALLEL: + for (i = XVECLEN (x, 0) - 1; i >= 0; --i) + { + rtx pat = XVECEXP (x, 0, i); + switch (GET_CODE (pat)) + { + case SET: + update_set_flags (pat, &new_flags); + need_barrier |= set_src_needs_barrier (pat, new_flags, pred); + break; + + case USE: + case CALL: + case ASM_OPERANDS: + case ASM_INPUT: + need_barrier |= rtx_needs_barrier (pat, flags, pred); + break; + + case CLOBBER: + if (REG_P (XEXP (pat, 0)) + && extract_asm_operands (x) != NULL_RTX + && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM) + { + new_flags.is_write = 1; + need_barrier |= rtx_needs_barrier (XEXP (pat, 0), + new_flags, pred); + new_flags = flags; + } + break; + + case RETURN: + break; + + default: + gcc_unreachable (); + } + } + for (i = XVECLEN (x, 0) - 1; i >= 0; --i) + { + rtx pat = XVECEXP (x, 0, i); + if (GET_CODE (pat) == SET) + { + if (GET_CODE (SET_SRC (pat)) != CALL) + { + new_flags.is_write = 1; + need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags, + pred); + } + } + else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN) + need_barrier |= rtx_needs_barrier (pat, flags, pred); + } + break; + + case SUBREG: + need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred); + break; + case REG: + if (REGNO (x) == AR_UNAT_REGNUM) + { + for (i = 0; i < 64; ++i) + need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred); + } + else + need_barrier = rws_access_reg (x, flags, pred); + break; + + case MEM: + /* Find the regs used in memory address computation. */ + new_flags.is_write = 0; + need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred); + break; + + case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR: + case SYMBOL_REF: case LABEL_REF: case CONST: + break; + + /* Operators with side-effects. */ + case POST_INC: case POST_DEC: + gcc_assert (GET_CODE (XEXP (x, 0)) == REG); + + new_flags.is_write = 0; + need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred); + new_flags.is_write = 1; + need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred); + break; + + case POST_MODIFY: + gcc_assert (GET_CODE (XEXP (x, 0)) == REG); + + new_flags.is_write = 0; + need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred); + need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred); + new_flags.is_write = 1; + need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred); + break; + + /* Handle common unary and binary ops for efficiency. */ + case COMPARE: case PLUS: case MINUS: case MULT: case DIV: + case MOD: case UDIV: case UMOD: case AND: case IOR: + case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT: + case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX: + case NE: case EQ: case GE: case GT: case LE: + case LT: case GEU: case GTU: case LEU: case LTU: + need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred); + need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred); + break; + + case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND: + case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT: + case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS: + case SQRT: case FFS: case POPCOUNT: + need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred); + break; + + case VEC_SELECT: + /* VEC_SELECT's second argument is a PARALLEL with integers that + describe the elements selected. On ia64, those integers are + always constants. Avoid walking the PARALLEL so that we don't + get confused with "normal" parallels and then die. */ + need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred); + break; + + case UNSPEC: + switch (XINT (x, 1)) + { + case UNSPEC_LTOFF_DTPMOD: + case UNSPEC_LTOFF_DTPREL: + case UNSPEC_DTPREL: + case UNSPEC_LTOFF_TPREL: + case UNSPEC_TPREL: + case UNSPEC_PRED_REL_MUTEX: + case UNSPEC_PIC_CALL: + case UNSPEC_MF: + case UNSPEC_FETCHADD_ACQ: + case UNSPEC_FETCHADD_REL: + case UNSPEC_BSP_VALUE: + case UNSPEC_FLUSHRS: + case UNSPEC_BUNDLE_SELECTOR: + break; + + case UNSPEC_GR_SPILL: + case UNSPEC_GR_RESTORE: + { + HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1)); + HOST_WIDE_INT bit = (offset >> 3) & 63; + + need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); + new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL); + need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit, + new_flags, pred); + break; + } + + case UNSPEC_FR_SPILL: + case UNSPEC_FR_RESTORE: + case UNSPEC_GETF_EXP: + case UNSPEC_SETF_EXP: + case UNSPEC_ADDP4: + case UNSPEC_FR_SQRT_RECIP_APPROX: + case UNSPEC_FR_SQRT_RECIP_APPROX_RES: + case UNSPEC_LDA: + case UNSPEC_LDS: + case UNSPEC_LDS_A: + case UNSPEC_LDSA: + case UNSPEC_CHKACLR: + case UNSPEC_CHKS: + need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); + break; + + case UNSPEC_FR_RECIP_APPROX: + case UNSPEC_SHRP: + case UNSPEC_COPYSIGN: + case UNSPEC_FR_RECIP_APPROX_RES: + need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); + need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred); + break; + + case UNSPEC_CMPXCHG_ACQ: + case UNSPEC_CMPXCHG_REL: + need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred); + need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred); + break; + + default: + gcc_unreachable (); + } + break; + + case UNSPEC_VOLATILE: + switch (XINT (x, 1)) + { + case UNSPECV_ALLOC: + /* Alloc must always be the first instruction of a group. + We force this by always returning true. */ + /* ??? We might get better scheduling if we explicitly check for + input/local/output register dependencies, and modify the + scheduler so that alloc is always reordered to the start of + the current group. We could then eliminate all of the + first_instruction code. */ + rws_access_regno (AR_PFS_REGNUM, flags, pred); + + new_flags.is_write = 1; + rws_access_regno (REG_AR_CFM, new_flags, pred); + return 1; + + case UNSPECV_SET_BSP: + case UNSPECV_PROBE_STACK_RANGE: + need_barrier = 1; + break; + + case UNSPECV_BLOCKAGE: + case UNSPECV_INSN_GROUP_BARRIER: + case UNSPECV_BREAK: + case UNSPECV_PSAC_ALL: + case UNSPECV_PSAC_NORMAL: + return 0; + + case UNSPECV_PROBE_STACK_ADDRESS: + need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); + break; + + default: + gcc_unreachable (); + } + break; + + case RETURN: + new_flags.is_write = 0; + need_barrier = rws_access_regno (REG_RP, flags, pred); + need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred); + + new_flags.is_write = 1; + need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred); + need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred); + break; + + default: + format_ptr = GET_RTX_FORMAT (GET_CODE (x)); + for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) + switch (format_ptr[i]) + { + case '0': /* unused field */ + case 'i': /* integer */ + case 'n': /* note */ + case 'w': /* wide integer */ + case 's': /* pointer to string */ + case 'S': /* optional pointer to string */ + break; + + case 'e': + if (rtx_needs_barrier (XEXP (x, i), flags, pred)) + need_barrier = 1; + break; + + case 'E': + for (j = XVECLEN (x, i) - 1; j >= 0; --j) + if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred)) + need_barrier = 1; + break; + + default: + gcc_unreachable (); + } + break; + } + return need_barrier; + } + + /* Clear out the state for group_barrier_needed at the start of a + sequence of insns. */ + + static void + init_insn_group_barriers (void) + { + memset (rws_sum, 0, sizeof (rws_sum)); + first_instruction = 1; + } + + /* Given the current state, determine whether a group barrier (a stop bit) is + necessary before INSN. Return nonzero if so. This modifies the state to + include the effects of INSN as a side-effect. */ + + static int + group_barrier_needed (rtx_insn *insn) + { + rtx pat; + int need_barrier = 0; + struct reg_flags flags; + + memset (&flags, 0, sizeof (flags)); + switch (GET_CODE (insn)) + { + case NOTE: + case DEBUG_INSN: + break; + + case BARRIER: + /* A barrier doesn't imply an instruction group boundary. */ + break; + + case CODE_LABEL: + memset (rws_insn, 0, sizeof (rws_insn)); + return 1; + + case CALL_INSN: + flags.is_branch = 1; + flags.is_sibcall = SIBLING_CALL_P (insn); + memset (rws_insn, 0, sizeof (rws_insn)); + + /* Don't bundle a call following another call. */ + if ((pat = prev_active_insn (insn)) && CALL_P (pat)) + { + need_barrier = 1; + break; + } + + need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0); + break; + + case JUMP_INSN: + if (!ia64_spec_check_p (insn)) + flags.is_branch = 1; + + /* Don't bundle a jump following a call. */ + if ((pat = prev_active_insn (insn)) && CALL_P (pat)) + { + need_barrier = 1; + break; + } + /* FALLTHRU */ + + case INSN: + if (GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + /* Don't care about USE and CLOBBER "insns"---those are used to + indicate to the optimizer that it shouldn't get rid of + certain operations. */ + break; + + pat = PATTERN (insn); + + /* Ug. Hack hacks hacked elsewhere. */ + switch (recog_memoized (insn)) + { + /* We play dependency tricks with the epilogue in order + to get proper schedules. Undo this for dv analysis. */ + case CODE_FOR_epilogue_deallocate_stack: + case CODE_FOR_prologue_allocate_stack: + pat = XVECEXP (pat, 0, 0); + break; + + /* The pattern we use for br.cloop confuses the code above. + The second element of the vector is representative. */ + case CODE_FOR_doloop_end_internal: + pat = XVECEXP (pat, 0, 1); + break; + + /* Doesn't generate code. */ + case CODE_FOR_pred_rel_mutex: + case CODE_FOR_prologue_use: + return 0; + + default: + break; + } + + memset (rws_insn, 0, sizeof (rws_insn)); + need_barrier = rtx_needs_barrier (pat, flags, 0); + + /* Check to see if the previous instruction was a volatile + asm. */ + if (! need_barrier) + need_barrier = rws_access_regno (REG_VOLATILE, flags, 0); + + break; + + default: + gcc_unreachable (); + } + + if (first_instruction && important_for_bundling_p (insn)) + { + need_barrier = 0; + first_instruction = 0; + } + + return need_barrier; + } + + /* Like group_barrier_needed, but do not clobber the current state. */ + + static int + safe_group_barrier_needed (rtx_insn *insn) + { + int saved_first_instruction; + int t; + + saved_first_instruction = first_instruction; + in_safe_group_barrier = 1; + + t = group_barrier_needed (insn); + + first_instruction = saved_first_instruction; + in_safe_group_barrier = 0; + + return t; + } + + /* Scan the current function and insert stop bits as necessary to + eliminate dependencies. This function assumes that a final + instruction scheduling pass has been run which has already + inserted most of the necessary stop bits. This function only + inserts new ones at basic block boundaries, since these are + invisible to the scheduler. */ + + static void + emit_insn_group_barriers (FILE *dump) + { + rtx_insn *insn; + rtx_insn *last_label = 0; + int insns_since_last_label = 0; + + init_insn_group_barriers (); + + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + if (LABEL_P (insn)) + { + if (insns_since_last_label) + last_label = insn; + insns_since_last_label = 0; + } + else if (NOTE_P (insn) + && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK) + { + if (insns_since_last_label) + last_label = insn; + insns_since_last_label = 0; + } + else if (NONJUMP_INSN_P (insn) + && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE + && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER) + { + init_insn_group_barriers (); + last_label = 0; + } + else if (NONDEBUG_INSN_P (insn)) + { + insns_since_last_label = 1; + + if (group_barrier_needed (insn)) + { + if (last_label) + { + if (dump) + fprintf (dump, "Emitting stop before label %d\n", + INSN_UID (last_label)); + emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label); + insn = last_label; + + init_insn_group_barriers (); + last_label = 0; + } + } + } + } + } + + /* Like emit_insn_group_barriers, but run if no final scheduling pass was run. + This function has to emit all necessary group barriers. */ + + static void + emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED) + { + rtx_insn *insn; + + init_insn_group_barriers (); + + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + if (BARRIER_P (insn)) + { + rtx_insn *last = prev_active_insn (insn); + + if (! last) + continue; + if (JUMP_TABLE_DATA_P (last)) + last = prev_active_insn (last); + if (recog_memoized (last) != CODE_FOR_insn_group_barrier) + emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last); + + init_insn_group_barriers (); + } + else if (NONDEBUG_INSN_P (insn)) + { + if (recog_memoized (insn) == CODE_FOR_insn_group_barrier) + init_insn_group_barriers (); + else if (group_barrier_needed (insn)) + { + emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn); + init_insn_group_barriers (); + group_barrier_needed (insn); + } + } + } + } + + + + /* Instruction scheduling support. */ + + #define NR_BUNDLES 10 + + /* A list of names of all available bundles. */ + + static const char *bundle_name [NR_BUNDLES] = + { + ".mii", + ".mmi", + ".mfi", + ".mmf", + #if NR_BUNDLES == 10 + ".bbb", + ".mbb", + #endif + ".mib", + ".mmb", + ".mfb", + ".mlx" + }; + + /* Nonzero if we should insert stop bits into the schedule. */ + + int ia64_final_schedule = 0; + + /* Codes of the corresponding queried units: */ + + static int _0mii_, _0mmi_, _0mfi_, _0mmf_; + static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_; + + static int _1mii_, _1mmi_, _1mfi_, _1mmf_; + static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_; + + static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6; + + /* The following variable value is an insn group barrier. */ + + static rtx_insn *dfa_stop_insn; + + /* The following variable value is the last issued insn. */ + + static rtx_insn *last_scheduled_insn; + + /* The following variable value is pointer to a DFA state used as + temporary variable. */ + + static state_t temp_dfa_state = NULL; + + /* The following variable value is DFA state after issuing the last + insn. */ + + static state_t prev_cycle_state = NULL; + + /* The following array element values are TRUE if the corresponding + insn requires to add stop bits before it. */ + + static char *stops_p = NULL; + + /* The following variable is used to set up the mentioned above array. */ + + static int stop_before_p = 0; + + /* The following variable value is length of the arrays `clocks' and + `add_cycles'. */ + + static int clocks_length; + + /* The following variable value is number of data speculations in progress. */ + static int pending_data_specs = 0; + + /* Number of memory references on current and three future processor cycles. */ + static char mem_ops_in_group[4]; + + /* Number of current processor cycle (from scheduler's point of view). */ + static int current_cycle; + + static rtx ia64_single_set (rtx_insn *); + static void ia64_emit_insn_before (rtx, rtx_insn *); + + /* Map a bundle number to its pseudo-op. */ + + const char * + get_bundle_name (int b) + { + return bundle_name[b]; + } + + + /* Return the maximum number of instructions a cpu can issue. */ + + static int + ia64_issue_rate (void) + { + return 6; + } + + /* Helper function - like single_set, but look inside COND_EXEC. */ + + static rtx + ia64_single_set (rtx_insn *insn) + { + rtx x = PATTERN (insn), ret; + if (GET_CODE (x) == COND_EXEC) + x = COND_EXEC_CODE (x); + if (GET_CODE (x) == SET) + return x; + + /* Special case here prologue_allocate_stack and epilogue_deallocate_stack. + Although they are not classical single set, the second set is there just + to protect it from moving past FP-relative stack accesses. */ + switch (recog_memoized (insn)) + { + case CODE_FOR_prologue_allocate_stack: + case CODE_FOR_prologue_allocate_stack_pr: + case CODE_FOR_epilogue_deallocate_stack: + case CODE_FOR_epilogue_deallocate_stack_pr: + ret = XVECEXP (x, 0, 0); + break; + + default: + ret = single_set_2 (insn, x); + break; + } + + return ret; + } + + /* Adjust the cost of a scheduling dependency. + Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN. + COST is the current cost, DW is dependency weakness. */ + static int + ia64_adjust_cost (rtx_insn *insn, int dep_type1, rtx_insn *dep_insn, + int cost, dw_t dw) + { + enum reg_note dep_type = (enum reg_note) dep_type1; + enum attr_itanium_class dep_class; + enum attr_itanium_class insn_class; + + insn_class = ia64_safe_itanium_class (insn); + dep_class = ia64_safe_itanium_class (dep_insn); + + /* Treat true memory dependencies separately. Ignore apparent true + dependence between store and call (call has a MEM inside a SYMBOL_REF). */ + if (dep_type == REG_DEP_TRUE + && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF) + && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL)) + return 0; + + if (dw == MIN_DEP_WEAK) + /* Store and load are likely to alias, use higher cost to avoid stall. */ + return param_sched_mem_true_dep_cost; + else if (dw > MIN_DEP_WEAK) + { + /* Store and load are less likely to alias. */ + if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF) + /* Assume there will be no cache conflict for floating-point data. + For integer data, L1 conflict penalty is huge (17 cycles), so we + never assume it will not cause a conflict. */ + return 0; + else + return cost; + } + + if (dep_type != REG_DEP_OUTPUT) + return cost; + + if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF + || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF) + return 0; + + return cost; + } + + /* Like emit_insn_before, but skip cycle_display notes. + ??? When cycle display notes are implemented, update this. */ + + static void + ia64_emit_insn_before (rtx insn, rtx_insn *before) + { + emit_insn_before (insn, before); + } + + /* The following function marks insns who produce addresses for load + and store insns. Such insns will be placed into M slots because it + decrease latency time for Itanium1 (see function + `ia64_produce_address_p' and the DFA descriptions). */ + + static void + ia64_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail) + { + rtx_insn *insn, *next, *next_tail; + + /* Before reload, which_alternative is not set, which means that + ia64_safe_itanium_class will produce wrong results for (at least) + move instructions. */ + if (!reload_completed) + return; + + next_tail = NEXT_INSN (tail); + for (insn = head; insn != next_tail; insn = NEXT_INSN (insn)) + if (INSN_P (insn)) + insn->call = 0; + for (insn = head; insn != next_tail; insn = NEXT_INSN (insn)) + if (INSN_P (insn) + && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU) + { + sd_iterator_def sd_it; + dep_t dep; + bool has_mem_op_consumer_p = false; + + FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep) + { + enum attr_itanium_class c; + + if (DEP_TYPE (dep) != REG_DEP_TRUE) + continue; + + next = DEP_CON (dep); + c = ia64_safe_itanium_class (next); + if ((c == ITANIUM_CLASS_ST + || c == ITANIUM_CLASS_STF) + && ia64_st_address_bypass_p (insn, next)) + { + has_mem_op_consumer_p = true; + break; + } + else if ((c == ITANIUM_CLASS_LD + || c == ITANIUM_CLASS_FLD + || c == ITANIUM_CLASS_FLDP) + && ia64_ld_address_bypass_p (insn, next)) + { + has_mem_op_consumer_p = true; + break; + } + } + + insn->call = has_mem_op_consumer_p; + } + } + + /* We're beginning a new block. Initialize data structures as necessary. */ + + static void + ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED, + int sched_verbose ATTRIBUTE_UNUSED, + int max_ready ATTRIBUTE_UNUSED) + { + if (flag_checking && !sel_sched_p () && reload_completed) + { + for (rtx_insn *insn = NEXT_INSN (current_sched_info->prev_head); + insn != current_sched_info->next_tail; + insn = NEXT_INSN (insn)) + gcc_assert (!SCHED_GROUP_P (insn)); + } + last_scheduled_insn = NULL; + init_insn_group_barriers (); + + current_cycle = 0; + memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group)); + } + + /* We're beginning a scheduling pass. Check assertion. */ + + static void + ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED, + int sched_verbose ATTRIBUTE_UNUSED, + int max_ready ATTRIBUTE_UNUSED) + { + gcc_assert (pending_data_specs == 0); + } + + /* Scheduling pass is now finished. Free/reset static variable. */ + static void + ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED, + int sched_verbose ATTRIBUTE_UNUSED) + { + gcc_assert (pending_data_specs == 0); + } + + /* Return TRUE if INSN is a load (either normal or speculative, but not a + speculation check), FALSE otherwise. */ + static bool + is_load_p (rtx_insn *insn) + { + enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn); + + return + ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD) + && get_attr_check_load (insn) == CHECK_LOAD_NO); + } + + /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array + (taking account for 3-cycle cache reference postponing for stores: Intel + Itanium 2 Reference Manual for Software Development and Optimization, + 6.7.3.1). */ + static void + record_memory_reference (rtx_insn *insn) + { + enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn); + + switch (insn_class) { + case ITANIUM_CLASS_FLD: + case ITANIUM_CLASS_LD: + mem_ops_in_group[current_cycle % 4]++; + break; + case ITANIUM_CLASS_STF: + case ITANIUM_CLASS_ST: + mem_ops_in_group[(current_cycle + 3) % 4]++; + break; + default:; + } + } + + /* We are about to being issuing insns for this clock cycle. + Override the default sort algorithm to better slot instructions. */ + + static int + ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready, + int *pn_ready, int clock_var, + int reorder_type) + { + int n_asms; + int n_ready = *pn_ready; + rtx_insn **e_ready = ready + n_ready; + rtx_insn **insnp; + + if (sched_verbose) + fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type); + + if (reorder_type == 0) + { + /* First, move all USEs, CLOBBERs and other crud out of the way. */ + n_asms = 0; + for (insnp = ready; insnp < e_ready; insnp++) + if (insnp < e_ready) + { + rtx_insn *insn = *insnp; + enum attr_type t = ia64_safe_type (insn); + if (t == TYPE_UNKNOWN) + { + if (GET_CODE (PATTERN (insn)) == ASM_INPUT + || asm_noperands (PATTERN (insn)) >= 0) + { + rtx_insn *lowest = ready[n_asms]; + ready[n_asms] = insn; + *insnp = lowest; + n_asms++; + } + else + { + rtx_insn *highest = ready[n_ready - 1]; + ready[n_ready - 1] = insn; + *insnp = highest; + return 1; + } + } + } + + if (n_asms < n_ready) + { + /* Some normal insns to process. Skip the asms. */ + ready += n_asms; + n_ready -= n_asms; + } + else if (n_ready > 0) + return 1; + } + + if (ia64_final_schedule) + { + int deleted = 0; + int nr_need_stop = 0; + + for (insnp = ready; insnp < e_ready; insnp++) + if (safe_group_barrier_needed (*insnp)) + nr_need_stop++; + + if (reorder_type == 1 && n_ready == nr_need_stop) + return 0; + if (reorder_type == 0) + return 1; + insnp = e_ready; + /* Move down everything that needs a stop bit, preserving + relative order. */ + while (insnp-- > ready + deleted) + while (insnp >= ready + deleted) + { + rtx_insn *insn = *insnp; + if (! safe_group_barrier_needed (insn)) + break; + memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx)); + *ready = insn; + deleted++; + } + n_ready -= deleted; + ready += deleted; + } + + current_cycle = clock_var; + if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns) + { + int moved = 0; + + insnp = e_ready; + /* Move down loads/stores, preserving relative order. */ + while (insnp-- > ready + moved) + while (insnp >= ready + moved) + { + rtx_insn *insn = *insnp; + if (! is_load_p (insn)) + break; + memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx)); + *ready = insn; + moved++; + } + n_ready -= moved; + ready += moved; + } + + return 1; + } + + /* We are about to being issuing insns for this clock cycle. Override + the default sort algorithm to better slot instructions. */ + + static int + ia64_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready, + int *pn_ready, int clock_var) + { + return ia64_dfa_sched_reorder (dump, sched_verbose, ready, + pn_ready, clock_var, 0); + } + + /* Like ia64_sched_reorder, but called after issuing each insn. + Override the default sort algorithm to better slot instructions. */ + + static int + ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED, + int sched_verbose ATTRIBUTE_UNUSED, rtx_insn **ready, + int *pn_ready, int clock_var) + { + return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready, + clock_var, 1); + } + + /* We are about to issue INSN. Return the number of insns left on the + ready queue that can be issued this cycle. */ + + static int + ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED, + int sched_verbose ATTRIBUTE_UNUSED, + rtx_insn *insn, + int can_issue_more ATTRIBUTE_UNUSED) + { + if (sched_deps_info->generate_spec_deps && !sel_sched_p ()) + /* Modulo scheduling does not extend h_i_d when emitting + new instructions. Don't use h_i_d, if we don't have to. */ + { + if (DONE_SPEC (insn) & BEGIN_DATA) + pending_data_specs++; + if (CHECK_SPEC (insn) & BEGIN_DATA) + pending_data_specs--; + } + + if (DEBUG_INSN_P (insn)) + return 1; + + last_scheduled_insn = insn; + memcpy (prev_cycle_state, curr_state, dfa_state_size); + if (reload_completed) + { + int needed = group_barrier_needed (insn); + + gcc_assert (!needed); + if (CALL_P (insn)) + init_insn_group_barriers (); + stops_p [INSN_UID (insn)] = stop_before_p; + stop_before_p = 0; + + record_memory_reference (insn); + } + return 1; + } + + /* We are choosing insn from the ready queue. Return zero if INSN + can be chosen. */ + + static int + ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index) + { + gcc_assert (insn && INSN_P (insn)); + + /* Size of ALAT is 32. As far as we perform conservative + data speculation, we keep ALAT half-empty. */ + if (pending_data_specs >= 16 && (TODO_SPEC (insn) & BEGIN_DATA)) + return ready_index == 0 ? -1 : 1; + + if (ready_index == 0) + return 0; + + if ((!reload_completed + || !safe_group_barrier_needed (insn)) + && (!mflag_sched_mem_insns_hard_limit + || !is_load_p (insn) + || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns)) + return 0; + + return 1; + } + + /* The following variable value is pseudo-insn used by the DFA insn + scheduler to change the DFA state when the simulated clock is + increased. */ + + static rtx_insn *dfa_pre_cycle_insn; + + /* Returns 1 when a meaningful insn was scheduled between the last group + barrier and LAST. */ + static int + scheduled_good_insn (rtx_insn *last) + { + if (last && recog_memoized (last) >= 0) + return 1; + + for ( ; + last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last) + && !stops_p[INSN_UID (last)]; + last = PREV_INSN (last)) + /* We could hit a NOTE_INSN_DELETED here which is actually outside + the ebb we're scheduling. */ + if (INSN_P (last) && recog_memoized (last) >= 0) + return 1; + + return 0; + } + + /* We are about to being issuing INSN. Return nonzero if we cannot + issue it on given cycle CLOCK and return zero if we should not sort + the ready queue on the next clock start. */ + + static int + ia64_dfa_new_cycle (FILE *dump, int verbose, rtx_insn *insn, int last_clock, + int clock, int *sort_p) + { + gcc_assert (insn && INSN_P (insn)); + + if (DEBUG_INSN_P (insn)) + return 0; + + /* When a group barrier is needed for insn, last_scheduled_insn + should be set. */ + gcc_assert (!(reload_completed && safe_group_barrier_needed (insn)) + || last_scheduled_insn); + + if ((reload_completed + && (safe_group_barrier_needed (insn) + || (mflag_sched_stop_bits_after_every_cycle + && last_clock != clock + && last_scheduled_insn + && scheduled_good_insn (last_scheduled_insn)))) + || (last_scheduled_insn + && (CALL_P (last_scheduled_insn) + || unknown_for_bundling_p (last_scheduled_insn)))) + { + init_insn_group_barriers (); + + if (verbose && dump) + fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn), + last_clock == clock ? " + cycle advance" : ""); + + stop_before_p = 1; + current_cycle = clock; + mem_ops_in_group[current_cycle % 4] = 0; + + if (last_clock == clock) + { + state_transition (curr_state, dfa_stop_insn); + if (TARGET_EARLY_STOP_BITS) + *sort_p = (last_scheduled_insn == NULL_RTX + || ! CALL_P (last_scheduled_insn)); + else + *sort_p = 0; + return 1; + } + + if (last_scheduled_insn) + { + if (unknown_for_bundling_p (last_scheduled_insn)) + state_reset (curr_state); + else + { + memcpy (curr_state, prev_cycle_state, dfa_state_size); + state_transition (curr_state, dfa_stop_insn); + state_transition (curr_state, dfa_pre_cycle_insn); + state_transition (curr_state, NULL); + } + } + } + return 0; + } + + /* Implement targetm.sched.h_i_d_extended hook. + Extend internal data structures. */ + static void + ia64_h_i_d_extended (void) + { + if (stops_p != NULL) + { + int new_clocks_length = get_max_uid () * 3 / 2; + stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1); + clocks_length = new_clocks_length; + } + } + + + /* This structure describes the data used by the backend to guide scheduling. + When the current scheduling point is switched, this data should be saved + and restored later, if the scheduler returns to this point. */ + struct _ia64_sched_context + { + state_t prev_cycle_state; + rtx_insn *last_scheduled_insn; + struct reg_write_state rws_sum[NUM_REGS]; + struct reg_write_state rws_insn[NUM_REGS]; + int first_instruction; + int pending_data_specs; + int current_cycle; + char mem_ops_in_group[4]; + }; + typedef struct _ia64_sched_context *ia64_sched_context_t; + + /* Allocates a scheduling context. */ + static void * + ia64_alloc_sched_context (void) + { + return xmalloc (sizeof (struct _ia64_sched_context)); + } + + /* Initializes the _SC context with clean data, if CLEAN_P, and from + the global context otherwise. */ + static void + ia64_init_sched_context (void *_sc, bool clean_p) + { + ia64_sched_context_t sc = (ia64_sched_context_t) _sc; + + sc->prev_cycle_state = xmalloc (dfa_state_size); + if (clean_p) + { + state_reset (sc->prev_cycle_state); + sc->last_scheduled_insn = NULL; + memset (sc->rws_sum, 0, sizeof (rws_sum)); + memset (sc->rws_insn, 0, sizeof (rws_insn)); + sc->first_instruction = 1; + sc->pending_data_specs = 0; + sc->current_cycle = 0; + memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group)); + } + else + { + memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size); + sc->last_scheduled_insn = last_scheduled_insn; + memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum)); + memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn)); + sc->first_instruction = first_instruction; + sc->pending_data_specs = pending_data_specs; + sc->current_cycle = current_cycle; + memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group)); + } + } + + /* Sets the global scheduling context to the one pointed to by _SC. */ + static void + ia64_set_sched_context (void *_sc) + { + ia64_sched_context_t sc = (ia64_sched_context_t) _sc; + + gcc_assert (sc != NULL); + + memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size); + last_scheduled_insn = sc->last_scheduled_insn; + memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum)); + memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn)); + first_instruction = sc->first_instruction; + pending_data_specs = sc->pending_data_specs; + current_cycle = sc->current_cycle; + memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group)); + } + + /* Clears the data in the _SC scheduling context. */ + static void + ia64_clear_sched_context (void *_sc) + { + ia64_sched_context_t sc = (ia64_sched_context_t) _sc; + + free (sc->prev_cycle_state); + sc->prev_cycle_state = NULL; + } + + /* Frees the _SC scheduling context. */ + static void + ia64_free_sched_context (void *_sc) + { + gcc_assert (_sc != NULL); + + free (_sc); + } + + typedef rtx (* gen_func_t) (rtx, rtx); + + /* Return a function that will generate a load of mode MODE_NO + with speculation types TS. */ + static gen_func_t + get_spec_load_gen_function (ds_t ts, int mode_no) + { + static gen_func_t gen_ld_[] = { + gen_movbi, + gen_movqi_internal, + gen_movhi_internal, + gen_movsi_internal, + gen_movdi_internal, + gen_movsf_internal, + gen_movdf_internal, + gen_movxf_internal, + gen_movti_internal, + gen_zero_extendqidi2, + gen_zero_extendhidi2, + gen_zero_extendsidi2, + }; + + static gen_func_t gen_ld_a[] = { + gen_movbi_advanced, + gen_movqi_advanced, + gen_movhi_advanced, + gen_movsi_advanced, + gen_movdi_advanced, + gen_movsf_advanced, + gen_movdf_advanced, + gen_movxf_advanced, + gen_movti_advanced, + gen_zero_extendqidi2_advanced, + gen_zero_extendhidi2_advanced, + gen_zero_extendsidi2_advanced, + }; + static gen_func_t gen_ld_s[] = { + gen_movbi_speculative, + gen_movqi_speculative, + gen_movhi_speculative, + gen_movsi_speculative, + gen_movdi_speculative, + gen_movsf_speculative, + gen_movdf_speculative, + gen_movxf_speculative, + gen_movti_speculative, + gen_zero_extendqidi2_speculative, + gen_zero_extendhidi2_speculative, + gen_zero_extendsidi2_speculative, + }; + static gen_func_t gen_ld_sa[] = { + gen_movbi_speculative_advanced, + gen_movqi_speculative_advanced, + gen_movhi_speculative_advanced, + gen_movsi_speculative_advanced, + gen_movdi_speculative_advanced, + gen_movsf_speculative_advanced, + gen_movdf_speculative_advanced, + gen_movxf_speculative_advanced, + gen_movti_speculative_advanced, + gen_zero_extendqidi2_speculative_advanced, + gen_zero_extendhidi2_speculative_advanced, + gen_zero_extendsidi2_speculative_advanced, + }; + static gen_func_t gen_ld_s_a[] = { + gen_movbi_speculative_a, + gen_movqi_speculative_a, + gen_movhi_speculative_a, + gen_movsi_speculative_a, + gen_movdi_speculative_a, + gen_movsf_speculative_a, + gen_movdf_speculative_a, + gen_movxf_speculative_a, + gen_movti_speculative_a, + gen_zero_extendqidi2_speculative_a, + gen_zero_extendhidi2_speculative_a, + gen_zero_extendsidi2_speculative_a, + }; + + gen_func_t *gen_ld; + + if (ts & BEGIN_DATA) + { + if (ts & BEGIN_CONTROL) + gen_ld = gen_ld_sa; + else + gen_ld = gen_ld_a; + } + else if (ts & BEGIN_CONTROL) + { + if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL) + || ia64_needs_block_p (ts)) + gen_ld = gen_ld_s; + else + gen_ld = gen_ld_s_a; + } + else if (ts == 0) + gen_ld = gen_ld_; + else + gcc_unreachable (); + + return gen_ld[mode_no]; + } + + /* Constants that help mapping 'machine_mode' to int. */ + enum SPEC_MODES + { + SPEC_MODE_INVALID = -1, + SPEC_MODE_FIRST = 0, + SPEC_MODE_FOR_EXTEND_FIRST = 1, + SPEC_MODE_FOR_EXTEND_LAST = 3, + SPEC_MODE_LAST = 8 + }; + + enum + { + /* Offset to reach ZERO_EXTEND patterns. */ + SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1 + }; + + /* Return index of the MODE. */ + static int + ia64_mode_to_int (machine_mode mode) + { + switch (mode) + { + case E_BImode: return 0; /* SPEC_MODE_FIRST */ + case E_QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */ + case E_HImode: return 2; + case E_SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */ + case E_DImode: return 4; + case E_SFmode: return 5; + case E_DFmode: return 6; + case E_XFmode: return 7; + case E_TImode: + /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not + mentioned in itanium[12].md. Predicate fp_register_operand also + needs to be defined. Bottom line: better disable for now. */ + return SPEC_MODE_INVALID; + default: return SPEC_MODE_INVALID; + } + } + + /* Provide information about speculation capabilities. */ + static void + ia64_set_sched_flags (spec_info_t spec_info) + { + unsigned int *flags = &(current_sched_info->flags); + + if (*flags & SCHED_RGN + || *flags & SCHED_EBB + || *flags & SEL_SCHED) + { + int mask = 0; + + if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0) + || (mflag_sched_ar_data_spec && reload_completed)) + { + mask |= BEGIN_DATA; + + if (!sel_sched_p () + && ((mflag_sched_br_in_data_spec && !reload_completed) + || (mflag_sched_ar_in_data_spec && reload_completed))) + mask |= BE_IN_DATA; + } + + if (mflag_sched_control_spec + && (!sel_sched_p () + || reload_completed)) + { + mask |= BEGIN_CONTROL; + + if (!sel_sched_p () && mflag_sched_in_control_spec) + mask |= BE_IN_CONTROL; + } + + spec_info->mask = mask; + + if (mask) + { + *flags |= USE_DEPS_LIST | DO_SPECULATION; + + if (mask & BE_IN_SPEC) + *flags |= NEW_BBS; + + spec_info->flags = 0; + + if ((mask & CONTROL_SPEC) + && sel_sched_p () && mflag_sel_sched_dont_check_control_spec) + spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL; + + if (sched_verbose >= 1) + spec_info->dump = sched_dump; + else + spec_info->dump = 0; + + if (mflag_sched_count_spec_in_critical_path) + spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH; + } + } + else + spec_info->mask = 0; + } + + /* If INSN is an appropriate load return its mode. + Return -1 otherwise. */ + static int + get_mode_no_for_insn (rtx_insn *insn) + { + rtx reg, mem, mode_rtx; + int mode_no; + bool extend_p; + + extract_insn_cached (insn); + + /* We use WHICH_ALTERNATIVE only after reload. This will + guarantee that reload won't touch a speculative insn. */ + + if (recog_data.n_operands != 2) + return -1; + + reg = recog_data.operand[0]; + mem = recog_data.operand[1]; + + /* We should use MEM's mode since REG's mode in presence of + ZERO_EXTEND will always be DImode. */ + if (get_attr_speculable1 (insn) == SPECULABLE1_YES) + /* Process non-speculative ld. */ + { + if (!reload_completed) + { + /* Do not speculate into regs like ar.lc. */ + if (!REG_P (reg) || AR_REGNO_P (REGNO (reg))) + return -1; + + if (!MEM_P (mem)) + return -1; + + { + rtx mem_reg = XEXP (mem, 0); + + if (!REG_P (mem_reg)) + return -1; + } + + mode_rtx = mem; + } + else if (get_attr_speculable2 (insn) == SPECULABLE2_YES) + { + gcc_assert (REG_P (reg) && MEM_P (mem)); + mode_rtx = mem; + } + else + return -1; + } + else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES + || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES + || get_attr_check_load (insn) == CHECK_LOAD_YES) + /* Process speculative ld or ld.c. */ + { + gcc_assert (REG_P (reg) && MEM_P (mem)); + mode_rtx = mem; + } + else + { + enum attr_itanium_class attr_class = get_attr_itanium_class (insn); + + if (attr_class == ITANIUM_CLASS_CHK_A + || attr_class == ITANIUM_CLASS_CHK_S_I + || attr_class == ITANIUM_CLASS_CHK_S_F) + /* Process chk. */ + mode_rtx = reg; + else + return -1; + } + + mode_no = ia64_mode_to_int (GET_MODE (mode_rtx)); + + if (mode_no == SPEC_MODE_INVALID) + return -1; + + extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx)); + + if (extend_p) + { + if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no + && mode_no <= SPEC_MODE_FOR_EXTEND_LAST)) + return -1; + + mode_no += SPEC_GEN_EXTEND_OFFSET; + } + + return mode_no; + } + + /* If X is an unspec part of a speculative load, return its code. + Return -1 otherwise. */ + static int + get_spec_unspec_code (const_rtx x) + { + if (GET_CODE (x) != UNSPEC) + return -1; + + { + int code; + + code = XINT (x, 1); + + switch (code) + { + case UNSPEC_LDA: + case UNSPEC_LDS: + case UNSPEC_LDS_A: + case UNSPEC_LDSA: + return code; + + default: + return -1; + } + } + } + + /* Implement skip_rtx_p hook. */ + static bool + ia64_skip_rtx_p (const_rtx x) + { + return get_spec_unspec_code (x) != -1; + } + + /* If INSN is a speculative load, return its UNSPEC code. + Return -1 otherwise. */ + static int + get_insn_spec_code (const_rtx insn) + { + rtx pat, reg, mem; + + pat = PATTERN (insn); + + if (GET_CODE (pat) == COND_EXEC) + pat = COND_EXEC_CODE (pat); + + if (GET_CODE (pat) != SET) + return -1; + + reg = SET_DEST (pat); + if (!REG_P (reg)) + return -1; + + mem = SET_SRC (pat); + if (GET_CODE (mem) == ZERO_EXTEND) + mem = XEXP (mem, 0); + + return get_spec_unspec_code (mem); + } + + /* If INSN is a speculative load, return a ds with the speculation types. + Otherwise [if INSN is a normal instruction] return 0. */ + static ds_t + ia64_get_insn_spec_ds (rtx_insn *insn) + { + int code = get_insn_spec_code (insn); + + switch (code) + { + case UNSPEC_LDA: + return BEGIN_DATA; + + case UNSPEC_LDS: + case UNSPEC_LDS_A: + return BEGIN_CONTROL; + + case UNSPEC_LDSA: + return BEGIN_DATA | BEGIN_CONTROL; + + default: + return 0; + } + } + + /* If INSN is a speculative load return a ds with the speculation types that + will be checked. + Otherwise [if INSN is a normal instruction] return 0. */ + static ds_t + ia64_get_insn_checked_ds (rtx_insn *insn) + { + int code = get_insn_spec_code (insn); + + switch (code) + { + case UNSPEC_LDA: + return BEGIN_DATA | BEGIN_CONTROL; + + case UNSPEC_LDS: + return BEGIN_CONTROL; + + case UNSPEC_LDS_A: + case UNSPEC_LDSA: + return BEGIN_DATA | BEGIN_CONTROL; + + default: + return 0; + } + } + + /* If GEN_P is true, calculate the index of needed speculation check and return + speculative pattern for INSN with speculative mode TS, machine mode + MODE_NO and with ZERO_EXTEND (if EXTEND_P is true). + If GEN_P is false, just calculate the index of needed speculation check. */ + static rtx + ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no) + { + rtx pat, new_pat; + gen_func_t gen_load; + + gen_load = get_spec_load_gen_function (ts, mode_no); + + new_pat = gen_load (copy_rtx (recog_data.operand[0]), + copy_rtx (recog_data.operand[1])); + + pat = PATTERN (insn); + if (GET_CODE (pat) == COND_EXEC) + new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)), + new_pat); + + return new_pat; + } + + static bool + insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED, + ds_t ds ATTRIBUTE_UNUSED) + { + return false; + } + + /* Implement targetm.sched.speculate_insn hook. + Check if the INSN can be TS speculative. + If 'no' - return -1. + If 'yes' - generate speculative pattern in the NEW_PAT and return 1. + If current pattern of the INSN already provides TS speculation, + return 0. */ + static int + ia64_speculate_insn (rtx_insn *insn, ds_t ts, rtx *new_pat) + { + int mode_no; + int res; + + gcc_assert (!(ts & ~SPECULATIVE)); + + if (ia64_spec_check_p (insn)) + return -1; + + if ((ts & BE_IN_SPEC) + && !insn_can_be_in_speculative_p (insn, ts)) + return -1; + + mode_no = get_mode_no_for_insn (insn); + + if (mode_no != SPEC_MODE_INVALID) + { + if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts)) + res = 0; + else + { + res = 1; + *new_pat = ia64_gen_spec_load (insn, ts, mode_no); + } + } + else + res = -1; + + return res; + } + + /* Return a function that will generate a check for speculation TS with mode + MODE_NO. + If simple check is needed, pass true for SIMPLE_CHECK_P. + If clearing check is needed, pass true for CLEARING_CHECK_P. */ + static gen_func_t + get_spec_check_gen_function (ds_t ts, int mode_no, + bool simple_check_p, bool clearing_check_p) + { + static gen_func_t gen_ld_c_clr[] = { + gen_movbi_clr, + gen_movqi_clr, + gen_movhi_clr, + gen_movsi_clr, + gen_movdi_clr, + gen_movsf_clr, + gen_movdf_clr, + gen_movxf_clr, + gen_movti_clr, + gen_zero_extendqidi2_clr, + gen_zero_extendhidi2_clr, + gen_zero_extendsidi2_clr, + }; + static gen_func_t gen_ld_c_nc[] = { + gen_movbi_nc, + gen_movqi_nc, + gen_movhi_nc, + gen_movsi_nc, + gen_movdi_nc, + gen_movsf_nc, + gen_movdf_nc, + gen_movxf_nc, + gen_movti_nc, + gen_zero_extendqidi2_nc, + gen_zero_extendhidi2_nc, + gen_zero_extendsidi2_nc, + }; + static gen_func_t gen_chk_a_clr[] = { + gen_advanced_load_check_clr_bi, + gen_advanced_load_check_clr_qi, + gen_advanced_load_check_clr_hi, + gen_advanced_load_check_clr_si, + gen_advanced_load_check_clr_di, + gen_advanced_load_check_clr_sf, + gen_advanced_load_check_clr_df, + gen_advanced_load_check_clr_xf, + gen_advanced_load_check_clr_ti, + gen_advanced_load_check_clr_di, + gen_advanced_load_check_clr_di, + gen_advanced_load_check_clr_di, + }; + static gen_func_t gen_chk_a_nc[] = { + gen_advanced_load_check_nc_bi, + gen_advanced_load_check_nc_qi, + gen_advanced_load_check_nc_hi, + gen_advanced_load_check_nc_si, + gen_advanced_load_check_nc_di, + gen_advanced_load_check_nc_sf, + gen_advanced_load_check_nc_df, + gen_advanced_load_check_nc_xf, + gen_advanced_load_check_nc_ti, + gen_advanced_load_check_nc_di, + gen_advanced_load_check_nc_di, + gen_advanced_load_check_nc_di, + }; + static gen_func_t gen_chk_s[] = { + gen_speculation_check_bi, + gen_speculation_check_qi, + gen_speculation_check_hi, + gen_speculation_check_si, + gen_speculation_check_di, + gen_speculation_check_sf, + gen_speculation_check_df, + gen_speculation_check_xf, + gen_speculation_check_ti, + gen_speculation_check_di, + gen_speculation_check_di, + gen_speculation_check_di, + }; + + gen_func_t *gen_check; + + if (ts & BEGIN_DATA) + { + /* We don't need recovery because even if this is ld.sa + ALAT entry will be allocated only if NAT bit is set to zero. + So it is enough to use ld.c here. */ + + if (simple_check_p) + { + gcc_assert (mflag_sched_spec_ldc); + + if (clearing_check_p) + gen_check = gen_ld_c_clr; + else + gen_check = gen_ld_c_nc; + } + else + { + if (clearing_check_p) + gen_check = gen_chk_a_clr; + else + gen_check = gen_chk_a_nc; + } + } + else if (ts & BEGIN_CONTROL) + { + if (simple_check_p) + /* We might want to use ld.sa -> ld.c instead of + ld.s -> chk.s. */ + { + gcc_assert (!ia64_needs_block_p (ts)); + + if (clearing_check_p) + gen_check = gen_ld_c_clr; + else + gen_check = gen_ld_c_nc; + } + else + { + gen_check = gen_chk_s; + } + } + else + gcc_unreachable (); + + gcc_assert (mode_no >= 0); + return gen_check[mode_no]; + } + + /* Return nonzero, if INSN needs branchy recovery check. */ + static bool + ia64_needs_block_p (ds_t ts) + { + if (ts & BEGIN_DATA) + return !mflag_sched_spec_ldc; + + gcc_assert ((ts & BEGIN_CONTROL) != 0); + + return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc); + } + + /* Generate (or regenerate) a recovery check for INSN. */ + static rtx + ia64_gen_spec_check (rtx_insn *insn, rtx_insn *label, ds_t ds) + { + rtx op1, pat, check_pat; + gen_func_t gen_check; + int mode_no; + + mode_no = get_mode_no_for_insn (insn); + gcc_assert (mode_no >= 0); + + if (label) + op1 = label; + else + { + gcc_assert (!ia64_needs_block_p (ds)); + op1 = copy_rtx (recog_data.operand[1]); + } + + gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX, + true); + + check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1); + + pat = PATTERN (insn); + if (GET_CODE (pat) == COND_EXEC) + check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)), + check_pat); + + return check_pat; + } + + /* Return nonzero, if X is branchy recovery check. */ + static int + ia64_spec_check_p (rtx x) + { + x = PATTERN (x); + if (GET_CODE (x) == COND_EXEC) + x = COND_EXEC_CODE (x); + if (GET_CODE (x) == SET) + return ia64_spec_check_src_p (SET_SRC (x)); + return 0; + } + + /* Return nonzero, if SRC belongs to recovery check. */ + static int + ia64_spec_check_src_p (rtx src) + { + if (GET_CODE (src) == IF_THEN_ELSE) + { + rtx t; + + t = XEXP (src, 0); + if (GET_CODE (t) == NE) + { + t = XEXP (t, 0); + + if (GET_CODE (t) == UNSPEC) + { + int code; + + code = XINT (t, 1); + + if (code == UNSPEC_LDCCLR + || code == UNSPEC_LDCNC + || code == UNSPEC_CHKACLR + || code == UNSPEC_CHKANC + || code == UNSPEC_CHKS) + { + gcc_assert (code != 0); + return code; + } + } + } + } + return 0; + } + + + /* The following page contains abstract data `bundle states' which are + used for bundling insns (inserting nops and template generation). */ + + /* The following describes state of insn bundling. */ + + struct bundle_state + { + /* Unique bundle state number to identify them in the debugging + output */ + int unique_num; + rtx_insn *insn; /* corresponding insn, NULL for the 1st and the last state */ + /* number nops before and after the insn */ + short before_nops_num, after_nops_num; + int insn_num; /* insn number (0 - for initial state, 1 - for the 1st + insn */ + int cost; /* cost of the state in cycles */ + int accumulated_insns_num; /* number of all previous insns including + nops. L is considered as 2 insns */ + int branch_deviation; /* deviation of previous branches from 3rd slots */ + int middle_bundle_stops; /* number of stop bits in the middle of bundles */ + struct bundle_state *next; /* next state with the same insn_num */ + struct bundle_state *originator; /* originator (previous insn state) */ + /* All bundle states are in the following chain. */ + struct bundle_state *allocated_states_chain; + /* The DFA State after issuing the insn and the nops. */ + state_t dfa_state; + }; + + /* The following is map insn number to the corresponding bundle state. */ + + static struct bundle_state **index_to_bundle_states; + + /* The unique number of next bundle state. */ + + static int bundle_states_num; + + /* All allocated bundle states are in the following chain. */ + + static struct bundle_state *allocated_bundle_states_chain; + + /* All allocated but not used bundle states are in the following + chain. */ + + static struct bundle_state *free_bundle_state_chain; + + + /* The following function returns a free bundle state. */ + + static struct bundle_state * + get_free_bundle_state (void) + { + struct bundle_state *result; + + if (free_bundle_state_chain != NULL) + { + result = free_bundle_state_chain; + free_bundle_state_chain = result->next; + } + else + { + result = XNEW (struct bundle_state); + result->dfa_state = xmalloc (dfa_state_size); + result->allocated_states_chain = allocated_bundle_states_chain; + allocated_bundle_states_chain = result; + } + result->unique_num = bundle_states_num++; + return result; + + } + + /* The following function frees given bundle state. */ + + static void + free_bundle_state (struct bundle_state *state) + { + state->next = free_bundle_state_chain; + free_bundle_state_chain = state; + } + + /* Start work with abstract data `bundle states'. */ + + static void + initiate_bundle_states (void) + { + bundle_states_num = 0; + free_bundle_state_chain = NULL; + allocated_bundle_states_chain = NULL; + } + + /* Finish work with abstract data `bundle states'. */ + + static void + finish_bundle_states (void) + { + struct bundle_state *curr_state, *next_state; + + for (curr_state = allocated_bundle_states_chain; + curr_state != NULL; + curr_state = next_state) + { + next_state = curr_state->allocated_states_chain; + free (curr_state->dfa_state); + free (curr_state); + } + } + + /* Hashtable helpers. */ + + struct bundle_state_hasher : nofree_ptr_hash + { + static inline hashval_t hash (const bundle_state *); + static inline bool equal (const bundle_state *, const bundle_state *); + }; + + /* The function returns hash of BUNDLE_STATE. */ + + inline hashval_t + bundle_state_hasher::hash (const bundle_state *state) + { + unsigned result, i; + + for (result = i = 0; i < dfa_state_size; i++) + result += (((unsigned char *) state->dfa_state) [i] + << ((i % CHAR_BIT) * 3 + CHAR_BIT)); + return result + state->insn_num; + } + + /* The function returns nonzero if the bundle state keys are equal. */ + + inline bool + bundle_state_hasher::equal (const bundle_state *state1, + const bundle_state *state2) + { + return (state1->insn_num == state2->insn_num + && memcmp (state1->dfa_state, state2->dfa_state, + dfa_state_size) == 0); + } + + /* Hash table of the bundle states. The key is dfa_state and insn_num + of the bundle states. */ + + static hash_table *bundle_state_table; + + /* The function inserts the BUNDLE_STATE into the hash table. The + function returns nonzero if the bundle has been inserted into the + table. The table contains the best bundle state with given key. */ + + static int + insert_bundle_state (struct bundle_state *bundle_state) + { + struct bundle_state **entry_ptr; + + entry_ptr = bundle_state_table->find_slot (bundle_state, INSERT); + if (*entry_ptr == NULL) + { + bundle_state->next = index_to_bundle_states [bundle_state->insn_num]; + index_to_bundle_states [bundle_state->insn_num] = bundle_state; + *entry_ptr = bundle_state; + return TRUE; + } + else if (bundle_state->cost < (*entry_ptr)->cost + || (bundle_state->cost == (*entry_ptr)->cost + && ((*entry_ptr)->accumulated_insns_num + > bundle_state->accumulated_insns_num + || ((*entry_ptr)->accumulated_insns_num + == bundle_state->accumulated_insns_num + && ((*entry_ptr)->branch_deviation + > bundle_state->branch_deviation + || ((*entry_ptr)->branch_deviation + == bundle_state->branch_deviation + && (*entry_ptr)->middle_bundle_stops + > bundle_state->middle_bundle_stops)))))) + + { + struct bundle_state temp; + + temp = **entry_ptr; + **entry_ptr = *bundle_state; + (*entry_ptr)->next = temp.next; + *bundle_state = temp; + } + return FALSE; + } + + /* Start work with the hash table. */ + + static void + initiate_bundle_state_table (void) + { + bundle_state_table = new hash_table (50); + } + + /* Finish work with the hash table. */ + + static void + finish_bundle_state_table (void) + { + delete bundle_state_table; + bundle_state_table = NULL; + } + + + + /* The following variable is a insn `nop' used to check bundle states + with different number of inserted nops. */ + + static rtx_insn *ia64_nop; + + /* The following function tries to issue NOPS_NUM nops for the current + state without advancing processor cycle. If it failed, the + function returns FALSE and frees the current state. */ + + static int + try_issue_nops (struct bundle_state *curr_state, int nops_num) + { + int i; + + for (i = 0; i < nops_num; i++) + if (state_transition (curr_state->dfa_state, ia64_nop) >= 0) + { + free_bundle_state (curr_state); + return FALSE; + } + return TRUE; + } + + /* The following function tries to issue INSN for the current + state without advancing processor cycle. If it failed, the + function returns FALSE and frees the current state. */ + + static int + try_issue_insn (struct bundle_state *curr_state, rtx insn) + { + if (insn && state_transition (curr_state->dfa_state, insn) >= 0) + { + free_bundle_state (curr_state); + return FALSE; + } + return TRUE; + } + + /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN + starting with ORIGINATOR without advancing processor cycle. If + TRY_BUNDLE_END_P is TRUE, the function also/only (if + ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle. + If it was successful, the function creates new bundle state and + insert into the hash table and into `index_to_bundle_states'. */ + + static void + issue_nops_and_insn (struct bundle_state *originator, int before_nops_num, + rtx_insn *insn, int try_bundle_end_p, + int only_bundle_end_p) + { + struct bundle_state *curr_state; + + curr_state = get_free_bundle_state (); + memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size); + curr_state->insn = insn; + curr_state->insn_num = originator->insn_num + 1; + curr_state->cost = originator->cost; + curr_state->originator = originator; + curr_state->before_nops_num = before_nops_num; + curr_state->after_nops_num = 0; + curr_state->accumulated_insns_num + = originator->accumulated_insns_num + before_nops_num; + curr_state->branch_deviation = originator->branch_deviation; + curr_state->middle_bundle_stops = originator->middle_bundle_stops; + gcc_assert (insn); + if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier) + { + gcc_assert (GET_MODE (insn) != TImode); + if (!try_issue_nops (curr_state, before_nops_num)) + return; + if (!try_issue_insn (curr_state, insn)) + return; + memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size); + if (curr_state->accumulated_insns_num % 3 != 0) + curr_state->middle_bundle_stops++; + if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0 + && curr_state->accumulated_insns_num % 3 != 0) + { + free_bundle_state (curr_state); + return; + } + } + else if (GET_MODE (insn) != TImode) + { + if (!try_issue_nops (curr_state, before_nops_num)) + return; + if (!try_issue_insn (curr_state, insn)) + return; + curr_state->accumulated_insns_num++; + gcc_assert (!unknown_for_bundling_p (insn)); + + if (ia64_safe_type (insn) == TYPE_L) + curr_state->accumulated_insns_num++; + } + else + { + /* If this is an insn that must be first in a group, then don't allow + nops to be emitted before it. Currently, alloc is the only such + supported instruction. */ + /* ??? The bundling automatons should handle this for us, but they do + not yet have support for the first_insn attribute. */ + if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES) + { + free_bundle_state (curr_state); + return; + } + + state_transition (curr_state->dfa_state, dfa_pre_cycle_insn); + state_transition (curr_state->dfa_state, NULL); + curr_state->cost++; + if (!try_issue_nops (curr_state, before_nops_num)) + return; + if (!try_issue_insn (curr_state, insn)) + return; + curr_state->accumulated_insns_num++; + if (unknown_for_bundling_p (insn)) + { + /* Finish bundle containing asm insn. */ + curr_state->after_nops_num + = 3 - curr_state->accumulated_insns_num % 3; + curr_state->accumulated_insns_num + += 3 - curr_state->accumulated_insns_num % 3; + } + else if (ia64_safe_type (insn) == TYPE_L) + curr_state->accumulated_insns_num++; + } + if (ia64_safe_type (insn) == TYPE_B) + curr_state->branch_deviation + += 2 - (curr_state->accumulated_insns_num - 1) % 3; + if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0) + { + if (!only_bundle_end_p && insert_bundle_state (curr_state)) + { + state_t dfa_state; + struct bundle_state *curr_state1; + struct bundle_state *allocated_states_chain; + + curr_state1 = get_free_bundle_state (); + dfa_state = curr_state1->dfa_state; + allocated_states_chain = curr_state1->allocated_states_chain; + *curr_state1 = *curr_state; + curr_state1->dfa_state = dfa_state; + curr_state1->allocated_states_chain = allocated_states_chain; + memcpy (curr_state1->dfa_state, curr_state->dfa_state, + dfa_state_size); + curr_state = curr_state1; + } + if (!try_issue_nops (curr_state, + 3 - curr_state->accumulated_insns_num % 3)) + return; + curr_state->after_nops_num + = 3 - curr_state->accumulated_insns_num % 3; + curr_state->accumulated_insns_num + += 3 - curr_state->accumulated_insns_num % 3; + } + if (!insert_bundle_state (curr_state)) + free_bundle_state (curr_state); + return; + } + + /* The following function returns position in the two window bundle + for given STATE. */ + + static int + get_max_pos (state_t state) + { + if (cpu_unit_reservation_p (state, pos_6)) + return 6; + else if (cpu_unit_reservation_p (state, pos_5)) + return 5; + else if (cpu_unit_reservation_p (state, pos_4)) + return 4; + else if (cpu_unit_reservation_p (state, pos_3)) + return 3; + else if (cpu_unit_reservation_p (state, pos_2)) + return 2; + else if (cpu_unit_reservation_p (state, pos_1)) + return 1; + else + return 0; + } + + /* The function returns code of a possible template for given position + and state. The function should be called only with 2 values of + position equal to 3 or 6. We avoid generating F NOPs by putting + templates containing F insns at the end of the template search + because undocumented anomaly in McKinley derived cores which can + cause stalls if an F-unit insn (including a NOP) is issued within a + six-cycle window after reading certain application registers (such + as ar.bsp). Furthermore, power-considerations also argue against + the use of F-unit instructions unless they're really needed. */ + + static int + get_template (state_t state, int pos) + { + switch (pos) + { + case 3: + if (cpu_unit_reservation_p (state, _0mmi_)) + return 1; + else if (cpu_unit_reservation_p (state, _0mii_)) + return 0; + else if (cpu_unit_reservation_p (state, _0mmb_)) + return 7; + else if (cpu_unit_reservation_p (state, _0mib_)) + return 6; + else if (cpu_unit_reservation_p (state, _0mbb_)) + return 5; + else if (cpu_unit_reservation_p (state, _0bbb_)) + return 4; + else if (cpu_unit_reservation_p (state, _0mmf_)) + return 3; + else if (cpu_unit_reservation_p (state, _0mfi_)) + return 2; + else if (cpu_unit_reservation_p (state, _0mfb_)) + return 8; + else if (cpu_unit_reservation_p (state, _0mlx_)) + return 9; + else + gcc_unreachable (); + case 6: + if (cpu_unit_reservation_p (state, _1mmi_)) + return 1; + else if (cpu_unit_reservation_p (state, _1mii_)) + return 0; + else if (cpu_unit_reservation_p (state, _1mmb_)) + return 7; + else if (cpu_unit_reservation_p (state, _1mib_)) + return 6; + else if (cpu_unit_reservation_p (state, _1mbb_)) + return 5; + else if (cpu_unit_reservation_p (state, _1bbb_)) + return 4; + else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_)) + return 3; + else if (cpu_unit_reservation_p (state, _1mfi_)) + return 2; + else if (cpu_unit_reservation_p (state, _1mfb_)) + return 8; + else if (cpu_unit_reservation_p (state, _1mlx_)) + return 9; + else + gcc_unreachable (); + default: + gcc_unreachable (); + } + } + + /* True when INSN is important for bundling. */ + + static bool + important_for_bundling_p (rtx_insn *insn) + { + return (INSN_P (insn) + && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE + && GET_CODE (PATTERN (insn)) != USE + && GET_CODE (PATTERN (insn)) != CLOBBER); + } + + /* The following function returns an insn important for insn bundling + followed by INSN and before TAIL. */ + + static rtx_insn * + get_next_important_insn (rtx_insn *insn, rtx_insn *tail) + { + for (; insn && insn != tail; insn = NEXT_INSN (insn)) + if (important_for_bundling_p (insn)) + return insn; + return NULL; + } + + /* True when INSN is unknown, but important, for bundling. */ + + static bool + unknown_for_bundling_p (rtx_insn *insn) + { + return (INSN_P (insn) + && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_UNKNOWN + && GET_CODE (PATTERN (insn)) != USE + && GET_CODE (PATTERN (insn)) != CLOBBER); + } + + /* Add a bundle selector TEMPLATE0 before INSN. */ + + static void + ia64_add_bundle_selector_before (int template0, rtx_insn *insn) + { + rtx b = gen_bundle_selector (GEN_INT (template0)); + + ia64_emit_insn_before (b, insn); + #if NR_BUNDLES == 10 + if ((template0 == 4 || template0 == 5) + && ia64_except_unwind_info (&global_options) == UI_TARGET) + { + int i; + rtx note = NULL_RTX; + + /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the + first or second slot. If it is and has REG_EH_NOTE set, copy it + to following nops, as br.call sets rp to the address of following + bundle and therefore an EH region end must be on a bundle + boundary. */ + insn = PREV_INSN (insn); + for (i = 0; i < 3; i++) + { + do + insn = next_active_insn (insn); + while (NONJUMP_INSN_P (insn) + && get_attr_empty (insn) == EMPTY_YES); + if (CALL_P (insn)) + note = find_reg_note (insn, REG_EH_REGION, NULL_RTX); + else if (note) + { + int code; + + gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop + || code == CODE_FOR_nop_b); + if (find_reg_note (insn, REG_EH_REGION, NULL_RTX)) + note = NULL_RTX; + else + add_reg_note (insn, REG_EH_REGION, XEXP (note, 0)); + } + } + } + #endif + } + + /* The following function does insn bundling. Bundling means + inserting templates and nop insns to fit insn groups into permitted + templates. Instruction scheduling uses NDFA (non-deterministic + finite automata) encoding informations about the templates and the + inserted nops. Nondeterminism of the automata permits follows + all possible insn sequences very fast. + + Unfortunately it is not possible to get information about inserting + nop insns and used templates from the automata states. The + automata only says that we can issue an insn possibly inserting + some nops before it and using some template. Therefore insn + bundling in this function is implemented by using DFA + (deterministic finite automata). We follow all possible insn + sequences by inserting 0-2 nops (that is what the NDFA describe for + insn scheduling) before/after each insn being bundled. We know the + start of simulated processor cycle from insn scheduling (insn + starting a new cycle has TImode). + + Simple implementation of insn bundling would create enormous + number of possible insn sequences satisfying information about new + cycle ticks taken from the insn scheduling. To make the algorithm + practical we use dynamic programming. Each decision (about + inserting nops and implicitly about previous decisions) is described + by structure bundle_state (see above). If we generate the same + bundle state (key is automaton state after issuing the insns and + nops for it), we reuse already generated one. As consequence we + reject some decisions which cannot improve the solution and + reduce memory for the algorithm. + + When we reach the end of EBB (extended basic block), we choose the + best sequence and then, moving back in EBB, insert templates for + the best alternative. The templates are taken from querying + automaton state for each insn in chosen bundle states. + + So the algorithm makes two (forward and backward) passes through + EBB. */ + + static void + bundling (FILE *dump, int verbose, rtx_insn *prev_head_insn, rtx_insn *tail) + { + struct bundle_state *curr_state, *next_state, *best_state; + rtx_insn *insn, *next_insn; + int insn_num; + int i, bundle_end_p, only_bundle_end_p, asm_p; + int pos = 0, max_pos, template0, template1; + rtx_insn *b; + enum attr_type type; + + insn_num = 0; + /* Count insns in the EBB. */ + for (insn = NEXT_INSN (prev_head_insn); + insn && insn != tail; + insn = NEXT_INSN (insn)) + if (INSN_P (insn)) + insn_num++; + if (insn_num == 0) + return; + bundling_p = 1; + dfa_clean_insn_cache (); + initiate_bundle_state_table (); + index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2); + /* First (forward) pass -- generation of bundle states. */ + curr_state = get_free_bundle_state (); + curr_state->insn = NULL; + curr_state->before_nops_num = 0; + curr_state->after_nops_num = 0; + curr_state->insn_num = 0; + curr_state->cost = 0; + curr_state->accumulated_insns_num = 0; + curr_state->branch_deviation = 0; + curr_state->middle_bundle_stops = 0; + curr_state->next = NULL; + curr_state->originator = NULL; + state_reset (curr_state->dfa_state); + index_to_bundle_states [0] = curr_state; + insn_num = 0; + /* Shift cycle mark if it is put on insn which could be ignored. */ + for (insn = NEXT_INSN (prev_head_insn); + insn != tail; + insn = NEXT_INSN (insn)) + if (INSN_P (insn) + && !important_for_bundling_p (insn) + && GET_MODE (insn) == TImode) + { + PUT_MODE (insn, VOIDmode); + for (next_insn = NEXT_INSN (insn); + next_insn != tail; + next_insn = NEXT_INSN (next_insn)) + if (important_for_bundling_p (next_insn) + && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier) + { + PUT_MODE (next_insn, TImode); + break; + } + } + /* Forward pass: generation of bundle states. */ + for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail); + insn != NULL_RTX; + insn = next_insn) + { + gcc_assert (important_for_bundling_p (insn)); + type = ia64_safe_type (insn); + next_insn = get_next_important_insn (NEXT_INSN (insn), tail); + insn_num++; + index_to_bundle_states [insn_num] = NULL; + for (curr_state = index_to_bundle_states [insn_num - 1]; + curr_state != NULL; + curr_state = next_state) + { + pos = curr_state->accumulated_insns_num % 3; + next_state = curr_state->next; + /* We must fill up the current bundle in order to start a + subsequent asm insn in a new bundle. Asm insn is always + placed in a separate bundle. */ + only_bundle_end_p + = (next_insn != NULL_RTX + && INSN_CODE (insn) == CODE_FOR_insn_group_barrier + && unknown_for_bundling_p (next_insn)); + /* We may fill up the current bundle if it is the cycle end + without a group barrier. */ + bundle_end_p + = (only_bundle_end_p || next_insn == NULL_RTX + || (GET_MODE (next_insn) == TImode + && INSN_CODE (insn) != CODE_FOR_insn_group_barrier)); + if (type == TYPE_F || type == TYPE_B || type == TYPE_L + || type == TYPE_S) + issue_nops_and_insn (curr_state, 2, insn, bundle_end_p, + only_bundle_end_p); + issue_nops_and_insn (curr_state, 1, insn, bundle_end_p, + only_bundle_end_p); + issue_nops_and_insn (curr_state, 0, insn, bundle_end_p, + only_bundle_end_p); + } + gcc_assert (index_to_bundle_states [insn_num]); + for (curr_state = index_to_bundle_states [insn_num]; + curr_state != NULL; + curr_state = curr_state->next) + if (verbose >= 2 && dump) + { + /* This structure is taken from generated code of the + pipeline hazard recognizer (see file insn-attrtab.cc). + Please don't forget to change the structure if a new + automaton is added to .md file. */ + struct DFA_chip + { + unsigned short one_automaton_state; + unsigned short oneb_automaton_state; + unsigned short two_automaton_state; + unsigned short twob_automaton_state; + }; + + fprintf + (dump, + "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n", + curr_state->unique_num, + (curr_state->originator == NULL + ? -1 : curr_state->originator->unique_num), + curr_state->cost, + curr_state->before_nops_num, curr_state->after_nops_num, + curr_state->accumulated_insns_num, curr_state->branch_deviation, + curr_state->middle_bundle_stops, + ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state, + INSN_UID (insn)); + } + } + + /* We should find a solution because the 2nd insn scheduling has + found one. */ + gcc_assert (index_to_bundle_states [insn_num]); + /* Find a state corresponding to the best insn sequence. */ + best_state = NULL; + for (curr_state = index_to_bundle_states [insn_num]; + curr_state != NULL; + curr_state = curr_state->next) + /* We are just looking at the states with fully filled up last + bundle. The first we prefer insn sequences with minimal cost + then with minimal inserted nops and finally with branch insns + placed in the 3rd slots. */ + if (curr_state->accumulated_insns_num % 3 == 0 + && (best_state == NULL || best_state->cost > curr_state->cost + || (best_state->cost == curr_state->cost + && (curr_state->accumulated_insns_num + < best_state->accumulated_insns_num + || (curr_state->accumulated_insns_num + == best_state->accumulated_insns_num + && (curr_state->branch_deviation + < best_state->branch_deviation + || (curr_state->branch_deviation + == best_state->branch_deviation + && curr_state->middle_bundle_stops + < best_state->middle_bundle_stops))))))) + best_state = curr_state; + /* Second (backward) pass: adding nops and templates. */ + gcc_assert (best_state); + insn_num = best_state->before_nops_num; + template0 = template1 = -1; + for (curr_state = best_state; + curr_state->originator != NULL; + curr_state = curr_state->originator) + { + insn = curr_state->insn; + asm_p = unknown_for_bundling_p (insn); + insn_num++; + if (verbose >= 2 && dump) + { + struct DFA_chip + { + unsigned short one_automaton_state; + unsigned short oneb_automaton_state; + unsigned short two_automaton_state; + unsigned short twob_automaton_state; + }; + + fprintf + (dump, + "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n", + curr_state->unique_num, + (curr_state->originator == NULL + ? -1 : curr_state->originator->unique_num), + curr_state->cost, + curr_state->before_nops_num, curr_state->after_nops_num, + curr_state->accumulated_insns_num, curr_state->branch_deviation, + curr_state->middle_bundle_stops, + ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state, + INSN_UID (insn)); + } + /* Find the position in the current bundle window. The window can + contain at most two bundles. Two bundle window means that + the processor will make two bundle rotation. */ + max_pos = get_max_pos (curr_state->dfa_state); + if (max_pos == 6 + /* The following (negative template number) means that the + processor did one bundle rotation. */ + || (max_pos == 3 && template0 < 0)) + { + /* We are at the end of the window -- find template(s) for + its bundle(s). */ + pos = max_pos; + if (max_pos == 3) + template0 = get_template (curr_state->dfa_state, 3); + else + { + template1 = get_template (curr_state->dfa_state, 3); + template0 = get_template (curr_state->dfa_state, 6); + } + } + if (max_pos > 3 && template1 < 0) + /* It may happen when we have the stop inside a bundle. */ + { + gcc_assert (pos <= 3); + template1 = get_template (curr_state->dfa_state, 3); + pos += 3; + } + if (!asm_p) + /* Emit nops after the current insn. */ + for (i = 0; i < curr_state->after_nops_num; i++) + { + rtx nop_pat = gen_nop (); + rtx_insn *nop = emit_insn_after (nop_pat, insn); + pos--; + gcc_assert (pos >= 0); + if (pos % 3 == 0) + { + /* We are at the start of a bundle: emit the template + (it should be defined). */ + gcc_assert (template0 >= 0); + ia64_add_bundle_selector_before (template0, nop); + /* If we have two bundle window, we make one bundle + rotation. Otherwise template0 will be undefined + (negative value). */ + template0 = template1; + template1 = -1; + } + } + /* Move the position backward in the window. Group barrier has + no slot. Asm insn takes all bundle. */ + if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier + && !unknown_for_bundling_p (insn)) + pos--; + /* Long insn takes 2 slots. */ + if (ia64_safe_type (insn) == TYPE_L) + pos--; + gcc_assert (pos >= 0); + if (pos % 3 == 0 + && INSN_CODE (insn) != CODE_FOR_insn_group_barrier + && !unknown_for_bundling_p (insn)) + { + /* The current insn is at the bundle start: emit the + template. */ + gcc_assert (template0 >= 0); + ia64_add_bundle_selector_before (template0, insn); + b = PREV_INSN (insn); + insn = b; + /* See comment above in analogous place for emitting nops + after the insn. */ + template0 = template1; + template1 = -1; + } + /* Emit nops after the current insn. */ + for (i = 0; i < curr_state->before_nops_num; i++) + { + rtx nop_pat = gen_nop (); + ia64_emit_insn_before (nop_pat, insn); + rtx_insn *nop = PREV_INSN (insn); + insn = nop; + pos--; + gcc_assert (pos >= 0); + if (pos % 3 == 0) + { + /* See comment above in analogous place for emitting nops + after the insn. */ + gcc_assert (template0 >= 0); + ia64_add_bundle_selector_before (template0, insn); + b = PREV_INSN (insn); + insn = b; + template0 = template1; + template1 = -1; + } + } + } + + if (flag_checking) + { + /* Assert right calculation of middle_bundle_stops. */ + int num = best_state->middle_bundle_stops; + bool start_bundle = true, end_bundle = false; + + for (insn = NEXT_INSN (prev_head_insn); + insn && insn != tail; + insn = NEXT_INSN (insn)) + { + if (!INSN_P (insn)) + continue; + if (recog_memoized (insn) == CODE_FOR_bundle_selector) + start_bundle = true; + else + { + rtx_insn *next_insn; + + for (next_insn = NEXT_INSN (insn); + next_insn && next_insn != tail; + next_insn = NEXT_INSN (next_insn)) + if (INSN_P (next_insn) + && (ia64_safe_itanium_class (next_insn) + != ITANIUM_CLASS_IGNORE + || recog_memoized (next_insn) + == CODE_FOR_bundle_selector) + && GET_CODE (PATTERN (next_insn)) != USE + && GET_CODE (PATTERN (next_insn)) != CLOBBER) + break; + + end_bundle = next_insn == NULL_RTX + || next_insn == tail + || (INSN_P (next_insn) + && recog_memoized (next_insn) == CODE_FOR_bundle_selector); + if (recog_memoized (insn) == CODE_FOR_insn_group_barrier + && !start_bundle && !end_bundle + && next_insn + && !unknown_for_bundling_p (next_insn)) + num--; + + start_bundle = false; + } + } + + gcc_assert (num == 0); + } + + free (index_to_bundle_states); + finish_bundle_state_table (); + bundling_p = 0; + dfa_clean_insn_cache (); + } + + /* The following function is called at the end of scheduling BB or + EBB. After reload, it inserts stop bits and does insn bundling. */ + + static void + ia64_sched_finish (FILE *dump, int sched_verbose) + { + if (sched_verbose) + fprintf (dump, "// Finishing schedule.\n"); + if (!reload_completed) + return; + if (reload_completed) + { + final_emit_insn_group_barriers (dump); + bundling (dump, sched_verbose, current_sched_info->prev_head, + current_sched_info->next_tail); + if (sched_verbose && dump) + fprintf (dump, "// finishing %d-%d\n", + INSN_UID (NEXT_INSN (current_sched_info->prev_head)), + INSN_UID (PREV_INSN (current_sched_info->next_tail))); + + return; + } + } + + /* The following function inserts stop bits in scheduled BB or EBB. */ + + static void + final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED) + { + rtx_insn *insn; + int need_barrier_p = 0; + int seen_good_insn = 0; + + init_insn_group_barriers (); + + for (insn = NEXT_INSN (current_sched_info->prev_head); + insn != current_sched_info->next_tail; + insn = NEXT_INSN (insn)) + { + if (BARRIER_P (insn)) + { + rtx_insn *last = prev_active_insn (insn); + + if (! last) + continue; + if (JUMP_TABLE_DATA_P (last)) + last = prev_active_insn (last); + if (recog_memoized (last) != CODE_FOR_insn_group_barrier) + emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last); + + init_insn_group_barriers (); + seen_good_insn = 0; + need_barrier_p = 0; + } + else if (NONDEBUG_INSN_P (insn)) + { + if (recog_memoized (insn) == CODE_FOR_insn_group_barrier) + { + init_insn_group_barriers (); + seen_good_insn = 0; + need_barrier_p = 0; + } + else if (need_barrier_p || group_barrier_needed (insn) + || (mflag_sched_stop_bits_after_every_cycle + && GET_MODE (insn) == TImode + && seen_good_insn)) + { + if (TARGET_EARLY_STOP_BITS) + { + rtx_insn *last; + + for (last = insn; + last != current_sched_info->prev_head; + last = PREV_INSN (last)) + if (INSN_P (last) && GET_MODE (last) == TImode + && stops_p [INSN_UID (last)]) + break; + if (last == current_sched_info->prev_head) + last = insn; + last = prev_active_insn (last); + if (last + && recog_memoized (last) != CODE_FOR_insn_group_barrier) + emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), + last); + init_insn_group_barriers (); + for (last = NEXT_INSN (last); + last != insn; + last = NEXT_INSN (last)) + if (INSN_P (last)) + { + group_barrier_needed (last); + if (recog_memoized (last) >= 0 + && important_for_bundling_p (last)) + seen_good_insn = 1; + } + } + else + { + emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), + insn); + init_insn_group_barriers (); + seen_good_insn = 0; + } + group_barrier_needed (insn); + if (recog_memoized (insn) >= 0 + && important_for_bundling_p (insn)) + seen_good_insn = 1; + } + else if (recog_memoized (insn) >= 0 + && important_for_bundling_p (insn)) + seen_good_insn = 1; + need_barrier_p = (CALL_P (insn) || unknown_for_bundling_p (insn)); + } + } + } + + + + /* If the following function returns TRUE, we will use the DFA + insn scheduler. */ + + static int + ia64_first_cycle_multipass_dfa_lookahead (void) + { + return (reload_completed ? 6 : 4); + } + + /* The following function initiates variable `dfa_pre_cycle_insn'. */ + + static void + ia64_init_dfa_pre_cycle_insn (void) + { + if (temp_dfa_state == NULL) + { + dfa_state_size = state_size (); + temp_dfa_state = xmalloc (dfa_state_size); + prev_cycle_state = xmalloc (dfa_state_size); + } + dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ()); + SET_PREV_INSN (dfa_pre_cycle_insn) = SET_NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX; + recog_memoized (dfa_pre_cycle_insn); + dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3))); + SET_PREV_INSN (dfa_stop_insn) = SET_NEXT_INSN (dfa_stop_insn) = NULL_RTX; + recog_memoized (dfa_stop_insn); + } + + /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN + used by the DFA insn scheduler. */ + + static rtx + ia64_dfa_pre_cycle_insn (void) + { + return dfa_pre_cycle_insn; + } + + /* The following function returns TRUE if PRODUCER (of type ilog or + ld) produces address for CONSUMER (of type st or stf). */ + + int + ia64_st_address_bypass_p (rtx_insn *producer, rtx_insn *consumer) + { + rtx dest, reg, mem; + + gcc_assert (producer && consumer); + dest = ia64_single_set (producer); + gcc_assert (dest); + reg = SET_DEST (dest); + gcc_assert (reg); + if (GET_CODE (reg) == SUBREG) + reg = SUBREG_REG (reg); + gcc_assert (GET_CODE (reg) == REG); + + dest = ia64_single_set (consumer); + gcc_assert (dest); + mem = SET_DEST (dest); + gcc_assert (mem && GET_CODE (mem) == MEM); + return reg_mentioned_p (reg, mem); + } + + /* The following function returns TRUE if PRODUCER (of type ilog or + ld) produces address for CONSUMER (of type ld or fld). */ + + int + ia64_ld_address_bypass_p (rtx_insn *producer, rtx_insn *consumer) + { + rtx dest, src, reg, mem; + + gcc_assert (producer && consumer); + dest = ia64_single_set (producer); + gcc_assert (dest); + reg = SET_DEST (dest); + gcc_assert (reg); + if (GET_CODE (reg) == SUBREG) + reg = SUBREG_REG (reg); + gcc_assert (GET_CODE (reg) == REG); + + src = ia64_single_set (consumer); + gcc_assert (src); + mem = SET_SRC (src); + gcc_assert (mem); + + if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0) + mem = XVECEXP (mem, 0, 0); + else if (GET_CODE (mem) == IF_THEN_ELSE) + /* ??? Is this bypass necessary for ld.c? */ + { + gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR); + mem = XEXP (mem, 1); + } + + while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND) + mem = XEXP (mem, 0); + + if (GET_CODE (mem) == UNSPEC) + { + int c = XINT (mem, 1); + + gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A + || c == UNSPEC_LDSA); + mem = XVECEXP (mem, 0, 0); + } + + /* Note that LO_SUM is used for GOT loads. */ + gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM); + + return reg_mentioned_p (reg, mem); + } + + /* The following function returns TRUE if INSN produces address for a + load/store insn. We will place such insns into M slot because it + decreases its latency time. */ + + int + ia64_produce_address_p (rtx insn) + { + return insn->call; + } + + + /* Emit pseudo-ops for the assembler to describe predicate relations. + At present this assumes that we only consider predicate pairs to + be mutex, and that the assembler can deduce proper values from + straight-line code. */ + + static void + emit_predicate_relation_info (void) + { + basic_block bb; + + FOR_EACH_BB_REVERSE_FN (bb, cfun) + { + int r; + rtx_insn *head = BB_HEAD (bb); + + /* We only need such notes at code labels. */ + if (! LABEL_P (head)) + continue; + if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head))) + head = NEXT_INSN (head); + + /* Skip p0, which may be thought to be live due to (reg:DI p0) + grabbing the entire block of predicate registers. */ + for (r = PR_REG (2); r < PR_REG (64); r += 2) + if (REGNO_REG_SET_P (df_get_live_in (bb), r)) + { + rtx p = gen_rtx_REG (BImode, r); + rtx_insn *n = emit_insn_after (gen_pred_rel_mutex (p), head); + if (head == BB_END (bb)) + BB_END (bb) = n; + head = n; + } + } + + /* Look for conditional calls that do not return, and protect predicate + relations around them. Otherwise the assembler will assume the call + returns, and complain about uses of call-clobbered predicates after + the call. */ + FOR_EACH_BB_REVERSE_FN (bb, cfun) + { + rtx_insn *insn = BB_HEAD (bb); + + while (1) + { + if (CALL_P (insn) + && GET_CODE (PATTERN (insn)) == COND_EXEC + && find_reg_note (insn, REG_NORETURN, NULL_RTX)) + { + rtx_insn *b = + emit_insn_before (gen_safe_across_calls_all (), insn); + rtx_insn *a = emit_insn_after (gen_safe_across_calls_normal (), insn); + if (BB_HEAD (bb) == insn) + BB_HEAD (bb) = b; + if (BB_END (bb) == insn) + BB_END (bb) = a; + } + + if (insn == BB_END (bb)) + break; + insn = NEXT_INSN (insn); + } + } + } + + /* Perform machine dependent operations on the rtl chain INSNS. */ + + static void + ia64_reorg (void) + { + /* We are freeing block_for_insn in the toplev to keep compatibility + with old MDEP_REORGS that are not CFG based. Recompute it now. */ + compute_bb_for_insn (); + + /* If optimizing, we'll have split before scheduling. */ + if (optimize == 0) + split_all_insns (); + + if (optimize && flag_schedule_insns_after_reload + && dbg_cnt (ia64_sched2)) + { + basic_block bb; + timevar_push (TV_SCHED2); + ia64_final_schedule = 1; + + /* We can't let modulo-sched prevent us from scheduling any bbs, + since we need the final schedule to produce bundle information. */ + FOR_EACH_BB_FN (bb, cfun) + bb->flags &= ~BB_DISABLE_SCHEDULE; + + initiate_bundle_states (); + ia64_nop = make_insn_raw (gen_nop ()); + SET_PREV_INSN (ia64_nop) = SET_NEXT_INSN (ia64_nop) = NULL_RTX; + recog_memoized (ia64_nop); + clocks_length = get_max_uid () + 1; + stops_p = XCNEWVEC (char, clocks_length); + + if (ia64_tune == PROCESSOR_ITANIUM2) + { + pos_1 = get_cpu_unit_code ("2_1"); + pos_2 = get_cpu_unit_code ("2_2"); + pos_3 = get_cpu_unit_code ("2_3"); + pos_4 = get_cpu_unit_code ("2_4"); + pos_5 = get_cpu_unit_code ("2_5"); + pos_6 = get_cpu_unit_code ("2_6"); + _0mii_ = get_cpu_unit_code ("2b_0mii."); + _0mmi_ = get_cpu_unit_code ("2b_0mmi."); + _0mfi_ = get_cpu_unit_code ("2b_0mfi."); + _0mmf_ = get_cpu_unit_code ("2b_0mmf."); + _0bbb_ = get_cpu_unit_code ("2b_0bbb."); + _0mbb_ = get_cpu_unit_code ("2b_0mbb."); + _0mib_ = get_cpu_unit_code ("2b_0mib."); + _0mmb_ = get_cpu_unit_code ("2b_0mmb."); + _0mfb_ = get_cpu_unit_code ("2b_0mfb."); + _0mlx_ = get_cpu_unit_code ("2b_0mlx."); + _1mii_ = get_cpu_unit_code ("2b_1mii."); + _1mmi_ = get_cpu_unit_code ("2b_1mmi."); + _1mfi_ = get_cpu_unit_code ("2b_1mfi."); + _1mmf_ = get_cpu_unit_code ("2b_1mmf."); + _1bbb_ = get_cpu_unit_code ("2b_1bbb."); + _1mbb_ = get_cpu_unit_code ("2b_1mbb."); + _1mib_ = get_cpu_unit_code ("2b_1mib."); + _1mmb_ = get_cpu_unit_code ("2b_1mmb."); + _1mfb_ = get_cpu_unit_code ("2b_1mfb."); + _1mlx_ = get_cpu_unit_code ("2b_1mlx."); + } + else + { + pos_1 = get_cpu_unit_code ("1_1"); + pos_2 = get_cpu_unit_code ("1_2"); + pos_3 = get_cpu_unit_code ("1_3"); + pos_4 = get_cpu_unit_code ("1_4"); + pos_5 = get_cpu_unit_code ("1_5"); + pos_6 = get_cpu_unit_code ("1_6"); + _0mii_ = get_cpu_unit_code ("1b_0mii."); + _0mmi_ = get_cpu_unit_code ("1b_0mmi."); + _0mfi_ = get_cpu_unit_code ("1b_0mfi."); + _0mmf_ = get_cpu_unit_code ("1b_0mmf."); + _0bbb_ = get_cpu_unit_code ("1b_0bbb."); + _0mbb_ = get_cpu_unit_code ("1b_0mbb."); + _0mib_ = get_cpu_unit_code ("1b_0mib."); + _0mmb_ = get_cpu_unit_code ("1b_0mmb."); + _0mfb_ = get_cpu_unit_code ("1b_0mfb."); + _0mlx_ = get_cpu_unit_code ("1b_0mlx."); + _1mii_ = get_cpu_unit_code ("1b_1mii."); + _1mmi_ = get_cpu_unit_code ("1b_1mmi."); + _1mfi_ = get_cpu_unit_code ("1b_1mfi."); + _1mmf_ = get_cpu_unit_code ("1b_1mmf."); + _1bbb_ = get_cpu_unit_code ("1b_1bbb."); + _1mbb_ = get_cpu_unit_code ("1b_1mbb."); + _1mib_ = get_cpu_unit_code ("1b_1mib."); + _1mmb_ = get_cpu_unit_code ("1b_1mmb."); + _1mfb_ = get_cpu_unit_code ("1b_1mfb."); + _1mlx_ = get_cpu_unit_code ("1b_1mlx."); + } + + if (flag_selective_scheduling2 + && !maybe_skip_selective_scheduling ()) + run_selective_scheduling (); + else + schedule_ebbs (); + + /* Redo alignment computation, as it might gone wrong. */ + compute_alignments (); + + /* We cannot reuse this one because it has been corrupted by the + evil glat. */ + finish_bundle_states (); + free (stops_p); + stops_p = NULL; + emit_insn_group_barriers (dump_file); + + ia64_final_schedule = 0; + timevar_pop (TV_SCHED2); + } + else + emit_all_insn_group_barriers (dump_file); + + df_analyze (); + + /* A call must not be the last instruction in a function, so that the + return address is still within the function, so that unwinding works + properly. Note that IA-64 differs from dwarf2 on this point. */ + if (ia64_except_unwind_info (&global_options) == UI_TARGET) + { + rtx_insn *insn; + int saw_stop = 0; + + insn = get_last_insn (); + if (! INSN_P (insn)) + insn = prev_active_insn (insn); + if (insn) + { + /* Skip over insns that expand to nothing. */ + while (NONJUMP_INSN_P (insn) + && get_attr_empty (insn) == EMPTY_YES) + { + if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE + && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER) + saw_stop = 1; + insn = prev_active_insn (insn); + } + if (CALL_P (insn)) + { + if (! saw_stop) + emit_insn (gen_insn_group_barrier (GEN_INT (3))); + emit_insn (gen_break_f ()); + emit_insn (gen_insn_group_barrier (GEN_INT (3))); + } + } + } + + emit_predicate_relation_info (); + + if (flag_var_tracking) + { + timevar_push (TV_VAR_TRACKING); + variable_tracking_main (); + timevar_pop (TV_VAR_TRACKING); + } + df_finish_pass (false); + } + + /* Return true if REGNO is used by the epilogue. */ + + int + ia64_epilogue_uses (int regno) + { + switch (regno) + { + case R_GR (1): + /* With a call to a function in another module, we will write a new + value to "gp". After returning from such a call, we need to make + sure the function restores the original gp-value, even if the + function itself does not use the gp anymore. */ + return !(TARGET_AUTO_PIC || TARGET_NO_PIC); + + case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3): + case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7): + /* For functions defined with the syscall_linkage attribute, all + input registers are marked as live at all function exits. This + prevents the register allocator from using the input registers, + which in turn makes it possible to restart a system call after + an interrupt without having to save/restore the input registers. + This also prevents kernel data from leaking to application code. */ + return lookup_attribute ("syscall_linkage", + TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL; + + case R_BR (0): + /* Conditional return patterns can't represent the use of `b0' as + the return address, so we force the value live this way. */ + return 1; + + case AR_PFS_REGNUM: + /* Likewise for ar.pfs, which is used by br.ret. */ + return 1; + + default: + return 0; + } + } + + /* Return true if REGNO is used by the frame unwinder. */ + + int + ia64_eh_uses (int regno) + { + unsigned int r; + + if (! reload_completed) + return 0; + + if (regno == 0) + return 0; + + for (r = reg_save_b0; r <= reg_save_ar_lc; r++) + if (regno == current_frame_info.r[r] + || regno == emitted_frame_related_regs[r]) + return 1; + + return 0; + } + + /* Return true if this goes in small data/bss. */ + + /* ??? We could also support own long data here. Generating movl/add/ld8 + instead of addl,ld8/ld8. This makes the code bigger, but should make the + code faster because there is one less load. This also includes incomplete + types which can't go in sdata/sbss. */ + + static bool + ia64_in_small_data_p (const_tree exp) + { + if (TARGET_NO_SDATA) + return false; + + /* We want to merge strings, so we never consider them small data. */ + if (TREE_CODE (exp) == STRING_CST) + return false; + + /* Functions are never small data. */ + if (TREE_CODE (exp) == FUNCTION_DECL) + return false; + + if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) + { + const char *section = DECL_SECTION_NAME (exp); + + if (strcmp (section, ".sdata") == 0 + || startswith (section, ".sdata.") + || startswith (section, ".gnu.linkonce.s.") + || strcmp (section, ".sbss") == 0 + || startswith (section, ".sbss.") + || startswith (section, ".gnu.linkonce.sb.")) + return true; + } + else + { + HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); + + /* If this is an incomplete type with size 0, then we can't put it + in sdata because it might be too big when completed. */ + if (size > 0 && size <= ia64_section_threshold) + return true; + } + + return false; + } + + /* Output assembly directives for prologue regions. */ + + /* The current basic block number. */ + + static bool last_block; + + /* True if we need a copy_state command at the start of the next block. */ + + static bool need_copy_state; + + #ifndef MAX_ARTIFICIAL_LABEL_BYTES + # define MAX_ARTIFICIAL_LABEL_BYTES 30 + #endif + + /* The function emits unwind directives for the start of an epilogue. */ + + static void + process_epilogue (FILE *out_file, rtx insn ATTRIBUTE_UNUSED, + bool unwind, bool frame ATTRIBUTE_UNUSED) + { + /* If this isn't the last block of the function, then we need to label the + current state, and copy it back in at the start of the next block. */ + + if (!last_block) + { + if (unwind) + fprintf (out_file, "\t.label_state %d\n", + ++cfun->machine->state_num); + need_copy_state = true; + } + + if (unwind) + fprintf (out_file, "\t.restore sp\n"); + } + + /* This function processes a SET pattern for REG_CFA_ADJUST_CFA. */ + + static void + process_cfa_adjust_cfa (FILE *out_file, rtx pat, rtx insn, + bool unwind, bool frame) + { + rtx dest = SET_DEST (pat); + rtx src = SET_SRC (pat); + + if (dest == stack_pointer_rtx) + { + if (GET_CODE (src) == PLUS) + { + rtx op0 = XEXP (src, 0); + rtx op1 = XEXP (src, 1); + + gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT); + + if (INTVAL (op1) < 0) + { + gcc_assert (!frame_pointer_needed); + if (unwind) + fprintf (out_file, + "\t.fframe " HOST_WIDE_INT_PRINT_DEC"\n", + -INTVAL (op1)); + } + else + process_epilogue (out_file, insn, unwind, frame); + } + else + { + gcc_assert (src == hard_frame_pointer_rtx); + process_epilogue (out_file, insn, unwind, frame); + } + } + else if (dest == hard_frame_pointer_rtx) + { + gcc_assert (src == stack_pointer_rtx); + gcc_assert (frame_pointer_needed); + + if (unwind) + fprintf (out_file, "\t.vframe r%d\n", + ia64_dbx_register_number (REGNO (dest))); + } + else + gcc_unreachable (); + } + + /* This function processes a SET pattern for REG_CFA_REGISTER. */ + + static void + process_cfa_register (FILE *out_file, rtx pat, bool unwind) + { + rtx dest = SET_DEST (pat); + rtx src = SET_SRC (pat); + int dest_regno = REGNO (dest); + int src_regno; + + if (src == pc_rtx) + { + /* Saving return address pointer. */ + if (unwind) + fprintf (out_file, "\t.save rp, r%d\n", + ia64_dbx_register_number (dest_regno)); + return; + } + + src_regno = REGNO (src); + + switch (src_regno) + { + case PR_REG (0): + gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]); + if (unwind) + fprintf (out_file, "\t.save pr, r%d\n", + ia64_dbx_register_number (dest_regno)); + break; + + case AR_UNAT_REGNUM: + gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]); + if (unwind) + fprintf (out_file, "\t.save ar.unat, r%d\n", + ia64_dbx_register_number (dest_regno)); + break; + + case AR_LC_REGNUM: + gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]); + if (unwind) + fprintf (out_file, "\t.save ar.lc, r%d\n", + ia64_dbx_register_number (dest_regno)); + break; + + default: + /* Everything else should indicate being stored to memory. */ + gcc_unreachable (); + } + } + + /* This function processes a SET pattern for REG_CFA_OFFSET. */ + + static void + process_cfa_offset (FILE *out_file, rtx pat, bool unwind) + { + rtx dest = SET_DEST (pat); + rtx src = SET_SRC (pat); + int src_regno = REGNO (src); + const char *saveop; + HOST_WIDE_INT off; + rtx base; + + gcc_assert (MEM_P (dest)); + if (GET_CODE (XEXP (dest, 0)) == REG) + { + base = XEXP (dest, 0); + off = 0; + } + else + { + gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS + && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT); + base = XEXP (XEXP (dest, 0), 0); + off = INTVAL (XEXP (XEXP (dest, 0), 1)); + } + + if (base == hard_frame_pointer_rtx) + { + saveop = ".savepsp"; + off = - off; + } + else + { + gcc_assert (base == stack_pointer_rtx); + saveop = ".savesp"; + } + + src_regno = REGNO (src); + switch (src_regno) + { + case BR_REG (0): + gcc_assert (!current_frame_info.r[reg_save_b0]); + if (unwind) + fprintf (out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n", + saveop, off); + break; + + case PR_REG (0): + gcc_assert (!current_frame_info.r[reg_save_pr]); + if (unwind) + fprintf (out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n", + saveop, off); + break; + + case AR_LC_REGNUM: + gcc_assert (!current_frame_info.r[reg_save_ar_lc]); + if (unwind) + fprintf (out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n", + saveop, off); + break; + + case AR_PFS_REGNUM: + gcc_assert (!current_frame_info.r[reg_save_ar_pfs]); + if (unwind) + fprintf (out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n", + saveop, off); + break; + + case AR_UNAT_REGNUM: + gcc_assert (!current_frame_info.r[reg_save_ar_unat]); + if (unwind) + fprintf (out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n", + saveop, off); + break; + + case GR_REG (4): + case GR_REG (5): + case GR_REG (6): + case GR_REG (7): + if (unwind) + fprintf (out_file, "\t.save.g 0x%x\n", + 1 << (src_regno - GR_REG (4))); + break; + + case BR_REG (1): + case BR_REG (2): + case BR_REG (3): + case BR_REG (4): + case BR_REG (5): + if (unwind) + fprintf (out_file, "\t.save.b 0x%x\n", + 1 << (src_regno - BR_REG (1))); + break; + + case FR_REG (2): + case FR_REG (3): + case FR_REG (4): + case FR_REG (5): + if (unwind) + fprintf (out_file, "\t.save.f 0x%x\n", + 1 << (src_regno - FR_REG (2))); + break; + + case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19): + case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23): + case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27): + case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31): + if (unwind) + fprintf (out_file, "\t.save.gf 0x0, 0x%x\n", + 1 << (src_regno - FR_REG (12))); + break; + + default: + /* ??? For some reason we mark other general registers, even those + we can't represent in the unwind info. Ignore them. */ + break; + } + } + + /* This function looks at a single insn and emits any directives + required to unwind this insn. */ + + static void + ia64_asm_unwind_emit (FILE *out_file, rtx_insn *insn) + { + bool unwind = ia64_except_unwind_info (&global_options) == UI_TARGET; + bool frame = dwarf2out_do_frame (); + rtx note, pat; + bool handled_one; + + if (!unwind && !frame) + return; + + if (NOTE_INSN_BASIC_BLOCK_P (insn)) + { + last_block = NOTE_BASIC_BLOCK (insn)->next_bb + == EXIT_BLOCK_PTR_FOR_FN (cfun); + + /* Restore unwind state from immediately before the epilogue. */ + if (need_copy_state) + { + if (unwind) + { + fprintf (out_file, "\t.body\n"); + fprintf (out_file, "\t.copy_state %d\n", + cfun->machine->state_num); + } + need_copy_state = false; + } + } + + if (NOTE_P (insn) || ! RTX_FRAME_RELATED_P (insn)) + return; + + /* Look for the ALLOC insn. */ + if (INSN_CODE (insn) == CODE_FOR_alloc) + { + rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0)); + int dest_regno = REGNO (dest); + + /* If this is the final destination for ar.pfs, then this must + be the alloc in the prologue. */ + if (dest_regno == current_frame_info.r[reg_save_ar_pfs]) + { + if (unwind) + fprintf (out_file, "\t.save ar.pfs, r%d\n", + ia64_dbx_register_number (dest_regno)); + } + else + { + /* This must be an alloc before a sibcall. We must drop the + old frame info. The easiest way to drop the old frame + info is to ensure we had a ".restore sp" directive + followed by a new prologue. If the procedure doesn't + have a memory-stack frame, we'll issue a dummy ".restore + sp" now. */ + if (current_frame_info.total_size == 0 && !frame_pointer_needed) + /* if haven't done process_epilogue() yet, do it now */ + process_epilogue (out_file, insn, unwind, frame); + if (unwind) + fprintf (out_file, "\t.prologue\n"); + } + return; + } + + handled_one = false; + for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) + switch (REG_NOTE_KIND (note)) + { + case REG_CFA_ADJUST_CFA: + pat = XEXP (note, 0); + if (pat == NULL) + pat = PATTERN (insn); + process_cfa_adjust_cfa (out_file, pat, insn, unwind, frame); + handled_one = true; + break; + + case REG_CFA_OFFSET: + pat = XEXP (note, 0); + if (pat == NULL) + pat = PATTERN (insn); + process_cfa_offset (out_file, pat, unwind); + handled_one = true; + break; + + case REG_CFA_REGISTER: + pat = XEXP (note, 0); + if (pat == NULL) + pat = PATTERN (insn); + process_cfa_register (out_file, pat, unwind); + handled_one = true; + break; + + case REG_FRAME_RELATED_EXPR: + case REG_CFA_DEF_CFA: + case REG_CFA_EXPRESSION: + case REG_CFA_RESTORE: + case REG_CFA_SET_VDRAP: + /* Not used in the ia64 port. */ + gcc_unreachable (); + + default: + /* Not a frame-related note. */ + break; + } + + /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the + explicit action to take. No guessing required. */ + gcc_assert (handled_one); + } + + /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */ + + static void + ia64_asm_emit_except_personality (rtx personality) + { + fputs ("\t.personality\t", asm_out_file); + output_addr_const (asm_out_file, personality); + fputc ('\n', asm_out_file); + } + + /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */ + + static void + ia64_asm_init_sections (void) + { + exception_section = get_unnamed_section (0, output_section_asm_op, + "\t.handlerdata"); + } + + /* Implement TARGET_DEBUG_UNWIND_INFO. */ + + static enum unwind_info_type + ia64_debug_unwind_info (void) + { + return UI_TARGET; + } + + enum ia64_builtins + { + IA64_BUILTIN_BSP, + IA64_BUILTIN_COPYSIGNQ, + IA64_BUILTIN_FABSQ, + IA64_BUILTIN_FLUSHRS, + IA64_BUILTIN_INFQ, + IA64_BUILTIN_HUGE_VALQ, + IA64_BUILTIN_NANQ, + IA64_BUILTIN_NANSQ, + IA64_BUILTIN_max + }; + + static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max]; + + void + ia64_init_builtins (void) + { + tree fpreg_type; + tree float80_type; + tree decl; + + /* The __fpreg type. */ + fpreg_type = make_node (REAL_TYPE); + TYPE_PRECISION (fpreg_type) = 82; + layout_type (fpreg_type); + (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg"); + + /* The __float80 type. */ + if (float64x_type_node != NULL_TREE + && TYPE_MODE (float64x_type_node) == XFmode) + float80_type = float64x_type_node; + else + { + float80_type = make_node (REAL_TYPE); + TYPE_PRECISION (float80_type) = 80; + layout_type (float80_type); + } + (*lang_hooks.types.register_builtin_type) (float80_type, "__float80"); + + /* The __float128 type. */ + if (!TARGET_HPUX) + { + tree ftype; + tree const_string_type + = build_pointer_type (build_qualified_type + (char_type_node, TYPE_QUAL_CONST)); + + (*lang_hooks.types.register_builtin_type) (float128_type_node, + "__float128"); + + /* TFmode support builtins. */ + ftype = build_function_type_list (float128_type_node, NULL_TREE); + decl = add_builtin_function ("__builtin_infq", ftype, + IA64_BUILTIN_INFQ, BUILT_IN_MD, + NULL, NULL_TREE); + ia64_builtins[IA64_BUILTIN_INFQ] = decl; + + decl = add_builtin_function ("__builtin_huge_valq", ftype, + IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD, + NULL, NULL_TREE); + ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl; + + ftype = build_function_type_list (float128_type_node, + const_string_type, + NULL_TREE); + decl = add_builtin_function ("__builtin_nanq", ftype, + IA64_BUILTIN_NANQ, BUILT_IN_MD, + "nanq", NULL_TREE); + TREE_READONLY (decl) = 1; + ia64_builtins[IA64_BUILTIN_NANQ] = decl; + + decl = add_builtin_function ("__builtin_nansq", ftype, + IA64_BUILTIN_NANSQ, BUILT_IN_MD, + "nansq", NULL_TREE); + TREE_READONLY (decl) = 1; + ia64_builtins[IA64_BUILTIN_NANSQ] = decl; + + ftype = build_function_type_list (float128_type_node, + float128_type_node, + NULL_TREE); + decl = add_builtin_function ("__builtin_fabsq", ftype, + IA64_BUILTIN_FABSQ, BUILT_IN_MD, + "__fabstf2", NULL_TREE); + TREE_READONLY (decl) = 1; + ia64_builtins[IA64_BUILTIN_FABSQ] = decl; + + ftype = build_function_type_list (float128_type_node, + float128_type_node, + float128_type_node, + NULL_TREE); + decl = add_builtin_function ("__builtin_copysignq", ftype, + IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD, + "__copysigntf3", NULL_TREE); + TREE_READONLY (decl) = 1; + ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl; + } + else + /* Under HPUX, this is a synonym for "long double". */ + (*lang_hooks.types.register_builtin_type) (long_double_type_node, + "__float128"); + + /* Fwrite on VMS is non-standard. */ + #if TARGET_ABI_OPEN_VMS + vms_patch_builtins (); + #endif + + #define def_builtin(name, type, code) \ + add_builtin_function ((name), (type), (code), BUILT_IN_MD, \ + NULL, NULL_TREE) + + decl = def_builtin ("__builtin_ia64_bsp", + build_function_type_list (ptr_type_node, NULL_TREE), + IA64_BUILTIN_BSP); + ia64_builtins[IA64_BUILTIN_BSP] = decl; + + decl = def_builtin ("__builtin_ia64_flushrs", + build_function_type_list (void_type_node, NULL_TREE), + IA64_BUILTIN_FLUSHRS); + ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl; + + #undef def_builtin + + if (TARGET_HPUX) + { + if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE) + set_user_assembler_name (decl, "_Isfinite"); + if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE) + set_user_assembler_name (decl, "_Isfinitef"); + if ((decl = builtin_decl_explicit (BUILT_IN_FINITEL)) != NULL_TREE) + set_user_assembler_name (decl, "_Isfinitef128"); + } + } + + static tree + ia64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, + tree *args, bool ignore ATTRIBUTE_UNUSED) + { + if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD) + { + enum ia64_builtins fn_code + = (enum ia64_builtins) DECL_MD_FUNCTION_CODE (fndecl); + switch (fn_code) + { + case IA64_BUILTIN_NANQ: + case IA64_BUILTIN_NANSQ: + { + tree type = TREE_TYPE (TREE_TYPE (fndecl)); + const char *str = c_getstr (*args); + int quiet = fn_code == IA64_BUILTIN_NANQ; + REAL_VALUE_TYPE real; + + if (str && real_nan (&real, str, quiet, TYPE_MODE (type))) + return build_real (type, real); + return NULL_TREE; + } + + default: + break; + } + } + + #ifdef SUBTARGET_FOLD_BUILTIN + return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore); + #endif + + return NULL_TREE; + } + + rtx + ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, + machine_mode mode ATTRIBUTE_UNUSED, + int ignore ATTRIBUTE_UNUSED) + { + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl); + + switch (fcode) + { + case IA64_BUILTIN_BSP: + if (! target || ! register_operand (target, DImode)) + target = gen_reg_rtx (DImode); + emit_insn (gen_bsp_value (target)); + #ifdef POINTERS_EXTEND_UNSIGNED + target = convert_memory_address (ptr_mode, target); + #endif + return target; + + case IA64_BUILTIN_FLUSHRS: + emit_insn (gen_flushrs ()); + return const0_rtx; + + case IA64_BUILTIN_INFQ: + case IA64_BUILTIN_HUGE_VALQ: + { + machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp)); + REAL_VALUE_TYPE inf; + rtx tmp; + + real_inf (&inf); + tmp = const_double_from_real_value (inf, target_mode); + + tmp = validize_mem (force_const_mem (target_mode, tmp)); + + if (target == 0) + target = gen_reg_rtx (target_mode); + + emit_move_insn (target, tmp); + return target; + } + + case IA64_BUILTIN_NANQ: + case IA64_BUILTIN_NANSQ: + case IA64_BUILTIN_FABSQ: + case IA64_BUILTIN_COPYSIGNQ: + return expand_call (exp, target, ignore); + + default: + gcc_unreachable (); + } + + return NULL_RTX; + } + + /* Return the ia64 builtin for CODE. */ + + static tree + ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) + { + if (code >= IA64_BUILTIN_max) + return error_mark_node; + + return ia64_builtins[code]; + } + + /* Implement TARGET_FUNCTION_ARG_PADDING. + + For the HP-UX IA64 aggregate parameters are passed stored in the + most significant bits of the stack slot. */ + + static pad_direction + ia64_function_arg_padding (machine_mode mode, const_tree type) + { + /* Exception to normal case for structures/unions/etc. */ + if (TARGET_HPUX + && type + && AGGREGATE_TYPE_P (type) + && int_size_in_bytes (type) < UNITS_PER_WORD) + return PAD_UPWARD; + + /* Fall back to the default. */ + return default_function_arg_padding (mode, type); + } + + /* Emit text to declare externally defined variables and functions, because + the Intel assembler does not support undefined externals. */ + + void + ia64_asm_output_external (FILE *file, tree decl, const char *name) + { + /* We output the name if and only if TREE_SYMBOL_REFERENCED is + set in order to avoid putting out names that are never really + used. */ + if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl))) + { + /* maybe_assemble_visibility will return 1 if the assembler + visibility directive is output. */ + int need_visibility = ((*targetm.binds_local_p) (decl) + && maybe_assemble_visibility (decl)); + + /* GNU as does not need anything here, but the HP linker does + need something for external functions. */ + if ((TARGET_HPUX_LD || !TARGET_GNU_AS) + && TREE_CODE (decl) == FUNCTION_DECL) + (*targetm.asm_out.globalize_decl_name) (file, decl); + else if (need_visibility && !TARGET_GNU_AS) + (*targetm.asm_out.globalize_label) (file, name); + } + } + + /* Set SImode div/mod functions, init_integral_libfuncs only initializes + modes of word_mode and larger. Rename the TFmode libfuncs using the + HPUX conventions. __divtf3 is used for XFmode. We need to keep it for + backward compatibility. */ + + static void + ia64_init_libfuncs (void) + { + set_optab_libfunc (sdiv_optab, SImode, "__divsi3"); + set_optab_libfunc (udiv_optab, SImode, "__udivsi3"); + set_optab_libfunc (smod_optab, SImode, "__modsi3"); + set_optab_libfunc (umod_optab, SImode, "__umodsi3"); + + set_optab_libfunc (add_optab, TFmode, "_U_Qfadd"); + set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub"); + set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy"); + set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv"); + set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg"); + + set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad"); + set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad"); + set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad"); + set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl"); + set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl"); + set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80"); + + set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl"); + set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl"); + set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad"); + set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl"); + set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl"); + + set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad"); + set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad"); + set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad"); + /* HP-UX 11.23 libc does not have a function for unsigned + SImode-to-TFmode conversion. */ + set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad"); + } + + /* Rename all the TFmode libfuncs using the HPUX conventions. */ + + static void + ia64_hpux_init_libfuncs (void) + { + ia64_init_libfuncs (); + + /* The HP SI millicode division and mod functions expect DI arguments. + By turning them off completely we avoid using both libgcc and the + non-standard millicode routines and use the HP DI millicode routines + instead. */ + + set_optab_libfunc (sdiv_optab, SImode, 0); + set_optab_libfunc (udiv_optab, SImode, 0); + set_optab_libfunc (smod_optab, SImode, 0); + set_optab_libfunc (umod_optab, SImode, 0); + + set_optab_libfunc (sdiv_optab, DImode, "__milli_divI"); + set_optab_libfunc (udiv_optab, DImode, "__milli_divU"); + set_optab_libfunc (smod_optab, DImode, "__milli_remI"); + set_optab_libfunc (umod_optab, DImode, "__milli_remU"); + + /* HP-UX libc has TF min/max/abs routines in it. */ + set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin"); + set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax"); + set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs"); + + /* ia64_expand_compare uses this. */ + cmptf_libfunc = init_one_libfunc ("_U_Qfcmp"); + + /* These should never be used. */ + set_optab_libfunc (eq_optab, TFmode, 0); + set_optab_libfunc (ne_optab, TFmode, 0); + set_optab_libfunc (gt_optab, TFmode, 0); + set_optab_libfunc (ge_optab, TFmode, 0); + set_optab_libfunc (lt_optab, TFmode, 0); + set_optab_libfunc (le_optab, TFmode, 0); + } + + /* Rename the division and modulus functions in VMS. */ + + static void + ia64_vms_init_libfuncs (void) + { + set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I"); + set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L"); + set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI"); + set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL"); + set_optab_libfunc (smod_optab, SImode, "OTS$REM_I"); + set_optab_libfunc (smod_optab, DImode, "OTS$REM_L"); + set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI"); + set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL"); + #ifdef MEM_LIBFUNCS_INIT + MEM_LIBFUNCS_INIT; + #endif + } + + /* Rename the TFmode libfuncs available from soft-fp in glibc using + the HPUX conventions. */ + + static void + ia64_sysv4_init_libfuncs (void) + { + ia64_init_libfuncs (); + + /* These functions are not part of the HPUX TFmode interface. We + use them instead of _U_Qfcmp, which doesn't work the way we + expect. */ + set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq"); + set_optab_libfunc (ne_optab, TFmode, "_U_Qfne"); + set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt"); + set_optab_libfunc (ge_optab, TFmode, "_U_Qfge"); + set_optab_libfunc (lt_optab, TFmode, "_U_Qflt"); + set_optab_libfunc (le_optab, TFmode, "_U_Qfle"); + + /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in + glibc doesn't have them. */ + } + + /* Use soft-fp. */ + + static void + ia64_soft_fp_init_libfuncs (void) + { + } + + static bool + ia64_vms_valid_pointer_mode (scalar_int_mode mode) + { + return (mode == SImode || mode == DImode); + } + + /* For HPUX, it is illegal to have relocations in shared segments. */ + + static int + ia64_hpux_reloc_rw_mask (void) + { + return 3; + } + + /* For others, relax this so that relocations to local data goes in + read-only segments, but we still cannot allow global relocations + in read-only segments. */ + + static int + ia64_reloc_rw_mask (void) + { + return flag_pic ? 3 : 2; + } + + /* Return the section to use for X. The only special thing we do here + is to honor small data. */ + + static section * + ia64_select_rtx_section (machine_mode mode, rtx x, + unsigned HOST_WIDE_INT align) + { + if (GET_MODE_SIZE (mode) > 0 + && GET_MODE_SIZE (mode) <= ia64_section_threshold + && !TARGET_NO_SDATA) + return sdata_section; + else + return default_elf_select_rtx_section (mode, x, align); + } + + static unsigned int + ia64_section_type_flags (tree decl, const char *name, int reloc) + { + unsigned int flags = 0; + + if (strcmp (name, ".sdata") == 0 + || startswith (name, ".sdata.") + || startswith (name, ".gnu.linkonce.s.") + || startswith (name, ".sdata2.") + || startswith (name, ".gnu.linkonce.s2.") + || strcmp (name, ".sbss") == 0 + || startswith (name, ".sbss.") + || startswith (name, ".gnu.linkonce.sb.")) + flags = SECTION_SMALL; + + flags |= default_section_type_flags (decl, name, reloc); + return flags; + } + + /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a + structure type and that the address of that type should be passed + in out0, rather than in r8. */ + + static bool + ia64_struct_retval_addr_is_first_parm_p (tree fntype) + { + tree ret_type = TREE_TYPE (fntype); + + /* The Itanium C++ ABI requires that out0, rather than r8, be used + as the structure return address parameter, if the return value + type has a non-trivial copy constructor or destructor. It is not + clear if this same convention should be used for other + programming languages. Until G++ 3.4, we incorrectly used r8 for + these return values. */ + return (abi_version_at_least (2) + && ret_type + && TYPE_MODE (ret_type) == BLKmode + && TREE_ADDRESSABLE (ret_type) + && lang_GNU_CXX ()); + } + + /* Output the assembler code for a thunk function. THUNK_DECL is the + declaration for the thunk function itself, FUNCTION is the decl for + the target function. DELTA is an immediate constant offset to be + added to THIS. If VCALL_OFFSET is nonzero, the word at + *(*this + vcall_offset) should be added to THIS. */ + + static void + ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, + tree function) + { + const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk)); + rtx this_rtx, funexp; + rtx_insn *insn; + unsigned int this_parmno; + unsigned int this_regno; + rtx delta_rtx; + + reload_completed = 1; + epilogue_completed = 1; + + /* Set things up as ia64_expand_prologue might. */ + last_scratch_gr_reg = 15; + + memset (¤t_frame_info, 0, sizeof (current_frame_info)); + current_frame_info.spill_cfa_off = -16; + current_frame_info.n_input_regs = 1; + current_frame_info.need_regstk = (TARGET_REG_NAMES != 0); + + /* Mark the end of the (empty) prologue. */ + emit_note (NOTE_INSN_PROLOGUE_END); + + /* Figure out whether "this" will be the first parameter (the + typical case) or the second parameter (as happens when the + virtual function returns certain class objects). */ + this_parmno + = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk)) + ? 1 : 0); + this_regno = IN_REG (this_parmno); + if (!TARGET_REG_NAMES) + reg_names[this_regno] = ia64_reg_numbers[this_parmno]; + + this_rtx = gen_rtx_REG (Pmode, this_regno); + + /* Apply the constant offset, if required. */ + delta_rtx = GEN_INT (delta); + if (TARGET_ILP32) + { + rtx tmp = gen_rtx_REG (ptr_mode, this_regno); + REG_POINTER (tmp) = 1; + if (delta && satisfies_constraint_I (delta_rtx)) + { + emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx)); + delta = 0; + } + else + emit_insn (gen_ptr_extend (this_rtx, tmp)); + } + if (delta) + { + if (!satisfies_constraint_I (delta_rtx)) + { + rtx tmp = gen_rtx_REG (Pmode, 2); + emit_move_insn (tmp, delta_rtx); + delta_rtx = tmp; + } + emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx)); + } + + /* Apply the offset from the vtable, if required. */ + if (vcall_offset) + { + rtx vcall_offset_rtx = GEN_INT (vcall_offset); + rtx tmp = gen_rtx_REG (Pmode, 2); + + if (TARGET_ILP32) + { + rtx t = gen_rtx_REG (ptr_mode, 2); + REG_POINTER (t) = 1; + emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx)); + if (satisfies_constraint_I (vcall_offset_rtx)) + { + emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx)); + vcall_offset = 0; + } + else + emit_insn (gen_ptr_extend (tmp, t)); + } + else + emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx)); + + if (vcall_offset) + { + if (!satisfies_constraint_J (vcall_offset_rtx)) + { + rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ()); + emit_move_insn (tmp2, vcall_offset_rtx); + vcall_offset_rtx = tmp2; + } + emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx)); + } + + if (TARGET_ILP32) + emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp))); + else + emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp)); + + emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp)); + } + + /* Generate a tail call to the target function. */ + if (! TREE_USED (function)) + { + assemble_external (function); + TREE_USED (function) = 1; + } + funexp = XEXP (DECL_RTL (function), 0); + funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); + ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1); + insn = get_last_insn (); + SIBLING_CALL_P (insn) = 1; + + /* Code generation for calls relies on splitting. */ + reload_completed = 1; + epilogue_completed = 1; + try_split (PATTERN (insn), insn, 0); + + emit_barrier (); + + /* Run just enough of rest_of_compilation to get the insns emitted. + There's not really enough bulk here to make other passes such as + instruction scheduling worth while. */ + + emit_all_insn_group_barriers (NULL); + insn = get_insns (); + shorten_branches (insn); + assemble_start_function (thunk, fnname); + final_start_function (insn, file, 1); + final (insn, file, 1); + final_end_function (); + assemble_end_function (thunk, fnname); + + reload_completed = 0; + epilogue_completed = 0; + } + + /* Worker function for TARGET_STRUCT_VALUE_RTX. */ + + static rtx + ia64_struct_value_rtx (tree fntype, + int incoming ATTRIBUTE_UNUSED) + { + if (TARGET_ABI_OPEN_VMS || + (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype))) + return NULL_RTX; + return gen_rtx_REG (Pmode, GR_REG (8)); + } + + static bool + ia64_scalar_mode_supported_p (scalar_mode mode) + { + switch (mode) + { + case E_QImode: + case E_HImode: + case E_SImode: + case E_DImode: + case E_TImode: + return true; + + case E_SFmode: + case E_DFmode: + case E_XFmode: + case E_RFmode: + return true; + + case E_TFmode: + return true; + + default: + return false; + } + } + + static bool + ia64_vector_mode_supported_p (machine_mode mode) + { + switch (mode) + { + case E_V8QImode: + case E_V4HImode: + case E_V2SImode: + return true; + + case E_V2SFmode: + return true; + + default: + return false; + } + } + + /* Implement the FUNCTION_PROFILER macro. */ + + void + ia64_output_function_profiler (FILE *file, int labelno) + { + bool indirect_call; + + /* If the function needs a static chain and the static chain + register is r15, we use an indirect call so as to bypass + the PLT stub in case the executable is dynamically linked, + because the stub clobbers r15 as per 5.3.6 of the psABI. + We don't need to do that in non canonical PIC mode. */ + + if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC) + { + gcc_assert (STATIC_CHAIN_REGNUM == 15); + indirect_call = true; + } + else + indirect_call = false; + + if (TARGET_GNU_AS) + fputs ("\t.prologue 4, r40\n", file); + else + fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file); + fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file); + + if (NO_PROFILE_COUNTERS) + fputs ("\tmov out3 = r0\n", file); + else + { + char buf[20]; + ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno); + + if (TARGET_AUTO_PIC) + fputs ("\tmovl out3 = @gprel(", file); + else + fputs ("\taddl out3 = @ltoff(", file); + assemble_name (file, buf); + if (TARGET_AUTO_PIC) + fputs (")\n", file); + else + fputs ("), r1\n", file); + } + + if (indirect_call) + fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file); + fputs ("\t;;\n", file); + + fputs ("\t.save rp, r42\n", file); + fputs ("\tmov out2 = b0\n", file); + if (indirect_call) + fputs ("\tld8 r14 = [r14]\n\t;;\n", file); + fputs ("\t.body\n", file); + fputs ("\tmov out1 = r1\n", file); + if (indirect_call) + { + fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file); + fputs ("\tmov b6 = r16\n", file); + fputs ("\tld8 r1 = [r14]\n", file); + fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file); + } + else + fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file); + } + + static GTY(()) rtx mcount_func_rtx; + static rtx + gen_mcount_func_rtx (void) + { + if (!mcount_func_rtx) + mcount_func_rtx = init_one_libfunc ("_mcount"); + return mcount_func_rtx; + } + + void + ia64_profile_hook (int labelno) + { + rtx label, ip; + + if (NO_PROFILE_COUNTERS) + label = const0_rtx; + else + { + char buf[30]; + const char *label_name; + ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno); + label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf)); + label = gen_rtx_SYMBOL_REF (Pmode, label_name); + SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL; + } + ip = gen_reg_rtx (Pmode); + emit_insn (gen_ip_value (ip)); + emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL, + VOIDmode, + gen_rtx_REG (Pmode, BR_REG (0)), Pmode, + ip, Pmode, + label, Pmode); + } + + /* Return the mangling of TYPE if it is an extended fundamental type. */ + + static const char * + ia64_mangle_type (const_tree type) + { + type = TYPE_MAIN_VARIANT (type); + + if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE + && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE) + return NULL; + + /* On HP-UX, "long double" is mangled as "e" so __float128 is + mangled as "e". */ + if (!TARGET_HPUX && TYPE_MODE (type) == TFmode) + return "g"; + /* On HP-UX, "e" is not available as a mangling of __float80 so use + an extended mangling. Elsewhere, "e" is available since long + double is 80 bits. */ + if (TYPE_MODE (type) == XFmode) + return TARGET_HPUX ? "u9__float80" : "e"; + if (TYPE_MODE (type) == RFmode) + return "u7__fpreg"; + return NULL; + } + + /* Return the diagnostic message string if conversion from FROMTYPE to + TOTYPE is not allowed, NULL otherwise. */ + static const char * + ia64_invalid_conversion (const_tree fromtype, const_tree totype) + { + /* Reject nontrivial conversion to or from __fpreg. */ + if (TYPE_MODE (fromtype) == RFmode + && TYPE_MODE (totype) != RFmode + && TYPE_MODE (totype) != VOIDmode) + return N_("invalid conversion from %<__fpreg%>"); + if (TYPE_MODE (totype) == RFmode + && TYPE_MODE (fromtype) != RFmode) + return N_("invalid conversion to %<__fpreg%>"); + return NULL; + } + + /* Return the diagnostic message string if the unary operation OP is + not permitted on TYPE, NULL otherwise. */ + static const char * + ia64_invalid_unary_op (int op, const_tree type) + { + /* Reject operations on __fpreg other than unary + or &. */ + if (TYPE_MODE (type) == RFmode + && op != CONVERT_EXPR + && op != ADDR_EXPR) + return N_("invalid operation on %<__fpreg%>"); + return NULL; + } + + /* Return the diagnostic message string if the binary operation OP is + not permitted on TYPE1 and TYPE2, NULL otherwise. */ + static const char * + ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2) + { + /* Reject operations on __fpreg. */ + if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode) + return N_("invalid operation on %<__fpreg%>"); + return NULL; + } + + /* HP-UX version_id attribute. + For object foo, if the version_id is set to 1234 put out an alias + of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything + other than an alias statement because it is an illegal symbol name. */ + + static tree + ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED, + tree name ATTRIBUTE_UNUSED, + tree args, + int flags ATTRIBUTE_UNUSED, + bool *no_add_attrs) + { + tree arg = TREE_VALUE (args); + + if (TREE_CODE (arg) != STRING_CST) + { + error("version attribute is not a string"); + *no_add_attrs = true; + return NULL_TREE; + } + return NULL_TREE; + } + + /* Target hook for c_mode_for_suffix. */ + + static machine_mode + ia64_c_mode_for_suffix (char suffix) + { + if (suffix == 'q') + return TFmode; + if (suffix == 'w') + return XFmode; + + return VOIDmode; + } + + static GTY(()) rtx ia64_dconst_0_5_rtx; + + rtx + ia64_dconst_0_5 (void) + { + if (! ia64_dconst_0_5_rtx) + { + REAL_VALUE_TYPE rv; + real_from_string (&rv, "0.5"); + ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode); + } + return ia64_dconst_0_5_rtx; + } + + static GTY(()) rtx ia64_dconst_0_375_rtx; + + rtx + ia64_dconst_0_375 (void) + { + if (! ia64_dconst_0_375_rtx) + { + REAL_VALUE_TYPE rv; + real_from_string (&rv, "0.375"); + ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode); + } + return ia64_dconst_0_375_rtx; + } + + static fixed_size_mode + ia64_get_reg_raw_mode (int regno) + { + if (FR_REGNO_P (regno)) + return XFmode; + return default_get_reg_raw_mode(regno); + } + + /* Implement TARGET_MEMBER_TYPE_FORCES_BLK. ??? Might not be needed + anymore. */ + + bool + ia64_member_type_forces_blk (const_tree, machine_mode mode) + { + return TARGET_HPUX && mode == TFmode; + } + + /* Always default to .text section until HP-UX linker is fixed. */ + + ATTRIBUTE_UNUSED static section * + ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED, + enum node_frequency freq ATTRIBUTE_UNUSED, + bool startup ATTRIBUTE_UNUSED, + bool exit ATTRIBUTE_UNUSED) + { + return NULL; + } + + /* Construct (set target (vec_select op0 (parallel perm))) and + return true if that's a valid instruction in the active ISA. */ + + static bool + expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt) + { + rtx rperm[MAX_VECT_LEN], x; + unsigned i; + + for (i = 0; i < nelt; ++i) + rperm[i] = GEN_INT (perm[i]); + + x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm)); + x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x); + x = gen_rtx_SET (target, x); + + rtx_insn *insn = emit_insn (x); + if (recog_memoized (insn) < 0) + { + remove_insn (insn); + return false; + } + return true; + } + + /* Similar, but generate a vec_concat from op0 and op1 as well. */ + + static bool + expand_vselect_vconcat (rtx target, rtx op0, rtx op1, + const unsigned char *perm, unsigned nelt) + { + machine_mode v2mode; + rtx x; + + if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode)) + return false; + x = gen_rtx_VEC_CONCAT (v2mode, op0, op1); + return expand_vselect (target, x, perm, nelt); + } + + /* Try to expand a no-op permutation. */ + + static bool + expand_vec_perm_identity (struct expand_vec_perm_d *d) + { + unsigned i, nelt = d->nelt; + + for (i = 0; i < nelt; ++i) + if (d->perm[i] != i) + return false; + + if (!d->testing_p) + emit_move_insn (d->target, d->op0); + + return true; + } + + /* Try to expand D via a shrp instruction. */ + + static bool + expand_vec_perm_shrp (struct expand_vec_perm_d *d) + { + unsigned i, nelt = d->nelt, shift, mask; + rtx tmp, hi, lo; + + /* ??? Don't force V2SFmode into the integer registers. */ + if (d->vmode == V2SFmode) + return false; + + mask = (d->one_operand_p ? nelt - 1 : 2 * nelt - 1); + + shift = d->perm[0]; + if (BYTES_BIG_ENDIAN && shift > nelt) + return false; + + for (i = 1; i < nelt; ++i) + if (d->perm[i] != ((shift + i) & mask)) + return false; + + if (d->testing_p) + return true; + + hi = shift < nelt ? d->op1 : d->op0; + lo = shift < nelt ? d->op0 : d->op1; + + shift %= nelt; + + shift *= GET_MODE_UNIT_SIZE (d->vmode) * BITS_PER_UNIT; + + /* We've eliminated the shift 0 case via expand_vec_perm_identity. */ + gcc_assert (IN_RANGE (shift, 1, 63)); + + /* Recall that big-endian elements are numbered starting at the top of + the register. Ideally we'd have a shift-left-pair. But since we + don't, convert to a shift the other direction. */ + if (BYTES_BIG_ENDIAN) + shift = 64 - shift; + + tmp = gen_reg_rtx (DImode); + hi = gen_lowpart (DImode, hi); + lo = gen_lowpart (DImode, lo); + emit_insn (gen_shrp (tmp, hi, lo, GEN_INT (shift))); + + emit_move_insn (d->target, gen_lowpart (d->vmode, tmp)); + return true; + } + + /* Try to instantiate D in a single instruction. */ + + static bool + expand_vec_perm_1 (struct expand_vec_perm_d *d) + { + unsigned i, nelt = d->nelt; + unsigned char perm2[MAX_VECT_LEN]; + + /* Try single-operand selections. */ + if (d->one_operand_p) + { + if (expand_vec_perm_identity (d)) + return true; + if (expand_vselect (d->target, d->op0, d->perm, nelt)) + return true; + } + + /* Try two operand selections. */ + if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt)) + return true; + + /* Recognize interleave style patterns with reversed operands. */ + if (!d->one_operand_p) + { + for (i = 0; i < nelt; ++i) + { + unsigned e = d->perm[i]; + if (e >= nelt) + e -= nelt; + else + e += nelt; + perm2[i] = e; + } + + if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt)) + return true; + } + + if (expand_vec_perm_shrp (d)) + return true; + + /* ??? Look for deposit-like permutations where most of the result + comes from one vector unchanged and the rest comes from a + sequential hunk of the other vector. */ + + return false; + } + + /* Pattern match broadcast permutations. */ + + static bool + expand_vec_perm_broadcast (struct expand_vec_perm_d *d) + { + unsigned i, elt, nelt = d->nelt; + unsigned char perm2[2]; + rtx temp; + bool ok; + + if (!d->one_operand_p) + return false; + + elt = d->perm[0]; + for (i = 1; i < nelt; ++i) + if (d->perm[i] != elt) + return false; + + switch (d->vmode) + { + case E_V2SImode: + case E_V2SFmode: + /* Implementable by interleave. */ + perm2[0] = elt; + perm2[1] = elt + 2; + ok = expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, 2); + gcc_assert (ok); + break; + + case E_V8QImode: + /* Implementable by extract + broadcast. */ + if (BYTES_BIG_ENDIAN) + elt = 7 - elt; + elt *= BITS_PER_UNIT; + temp = gen_reg_rtx (DImode); + emit_insn (gen_extzv (temp, gen_lowpart (DImode, d->op0), + GEN_INT (8), GEN_INT (elt))); + emit_insn (gen_mux1_brcst_qi (d->target, gen_lowpart (QImode, temp))); + break; + + case E_V4HImode: + /* Should have been matched directly by vec_select. */ + default: + gcc_unreachable (); + } + + return true; + } + + /* A subroutine of ia64_expand_vec_perm_const_1. Try to simplify a + two vector permutation into a single vector permutation by using + an interleave operation to merge the vectors. */ + + static bool + expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d) + { + struct expand_vec_perm_d dremap, dfinal; + unsigned char remap[2 * MAX_VECT_LEN]; + unsigned contents, i, nelt, nelt2; + unsigned h0, h1, h2, h3; + rtx_insn *seq; + bool ok; + + if (d->one_operand_p) + return false; + + nelt = d->nelt; + nelt2 = nelt / 2; + + /* Examine from whence the elements come. */ + contents = 0; + for (i = 0; i < nelt; ++i) + contents |= 1u << d->perm[i]; + + memset (remap, 0xff, sizeof (remap)); + dremap = *d; + + h0 = (1u << nelt2) - 1; + h1 = h0 << nelt2; + h2 = h0 << nelt; + h3 = h0 << (nelt + nelt2); + + if ((contents & (h0 | h2)) == contents) /* punpck even halves */ + { + for (i = 0; i < nelt; ++i) + { + unsigned which = i / 2 + (i & 1 ? nelt : 0); + remap[which] = i; + dremap.perm[i] = which; + } + } + else if ((contents & (h1 | h3)) == contents) /* punpck odd halves */ + { + for (i = 0; i < nelt; ++i) + { + unsigned which = i / 2 + nelt2 + (i & 1 ? nelt : 0); + remap[which] = i; + dremap.perm[i] = which; + } + } + else if ((contents & 0x5555) == contents) /* mix even elements */ + { + for (i = 0; i < nelt; ++i) + { + unsigned which = (i & ~1) + (i & 1 ? nelt : 0); + remap[which] = i; + dremap.perm[i] = which; + } + } + else if ((contents & 0xaaaa) == contents) /* mix odd elements */ + { + for (i = 0; i < nelt; ++i) + { + unsigned which = (i | 1) + (i & 1 ? nelt : 0); + remap[which] = i; + dremap.perm[i] = which; + } + } + else if (floor_log2 (contents) - ctz_hwi (contents) < (int)nelt) /* shrp */ + { + unsigned shift = ctz_hwi (contents); + for (i = 0; i < nelt; ++i) + { + unsigned which = (i + shift) & (2 * nelt - 1); + remap[which] = i; + dremap.perm[i] = which; + } + } + else + return false; + + /* Use the remapping array set up above to move the elements from their + swizzled locations into their final destinations. */ + dfinal = *d; + for (i = 0; i < nelt; ++i) + { + unsigned e = remap[d->perm[i]]; + gcc_assert (e < nelt); + dfinal.perm[i] = e; + } + if (d->testing_p) + dfinal.op0 = gen_raw_REG (dfinal.vmode, LAST_VIRTUAL_REGISTER + 1); + else + dfinal.op0 = gen_reg_rtx (dfinal.vmode); + dfinal.op1 = dfinal.op0; + dfinal.one_operand_p = true; + dremap.target = dfinal.op0; + + /* Test if the final remap can be done with a single insn. For V4HImode + this *will* succeed. For V8QImode or V2SImode it may not. */ + start_sequence (); + ok = expand_vec_perm_1 (&dfinal); + seq = get_insns (); + end_sequence (); + if (!ok) + return false; + if (d->testing_p) + return true; + + ok = expand_vec_perm_1 (&dremap); + gcc_assert (ok); + + emit_insn (seq); + return true; + } + + /* A subroutine of ia64_expand_vec_perm_const_1. Emit a full V4HImode + constant permutation via two mux2 and a merge. */ + + static bool + expand_vec_perm_v4hi_5 (struct expand_vec_perm_d *d) + { + unsigned char perm2[4]; + rtx rmask[4]; + unsigned i; + rtx t0, t1, mask, x; + bool ok; + + if (d->vmode != V4HImode || d->one_operand_p) + return false; + if (d->testing_p) + return true; + + for (i = 0; i < 4; ++i) + { + perm2[i] = d->perm[i] & 3; + rmask[i] = (d->perm[i] & 4 ? const0_rtx : constm1_rtx); + } + mask = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmask)); + mask = force_reg (V4HImode, mask); + + t0 = gen_reg_rtx (V4HImode); + t1 = gen_reg_rtx (V4HImode); + + ok = expand_vselect (t0, d->op0, perm2, 4); + gcc_assert (ok); + ok = expand_vselect (t1, d->op1, perm2, 4); + gcc_assert (ok); + + x = gen_rtx_AND (V4HImode, mask, t0); + emit_insn (gen_rtx_SET (t0, x)); + + x = gen_rtx_NOT (V4HImode, mask); + x = gen_rtx_AND (V4HImode, x, t1); + emit_insn (gen_rtx_SET (t1, x)); + + x = gen_rtx_IOR (V4HImode, t0, t1); + emit_insn (gen_rtx_SET (d->target, x)); + + return true; + } + + /* The guts of ia64_expand_vec_perm_const, also used by the ok hook. + With all of the interface bits taken care of, perform the expansion + in D and return true on success. */ + + static bool + ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) + { + if (expand_vec_perm_1 (d)) + return true; + if (expand_vec_perm_broadcast (d)) + return true; + if (expand_vec_perm_interleave_2 (d)) + return true; + if (expand_vec_perm_v4hi_5 (d)) + return true; + return false; + } + + /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ + + static bool + ia64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, + rtx op1, const vec_perm_indices &sel) + { + struct expand_vec_perm_d d; + unsigned char perm[MAX_VECT_LEN]; + unsigned int i, nelt, which; + + d.target = target; + if (op0) + { + rtx nop0 = force_reg (vmode, op0); + if (op0 == op1) + op1 = nop0; + op0 = nop0; + } + if (op1) + op1 = force_reg (vmode, op1); + d.op0 = op0; + d.op1 = op1; + + d.vmode = vmode; + gcc_assert (VECTOR_MODE_P (d.vmode)); + d.nelt = nelt = GET_MODE_NUNITS (d.vmode); + d.testing_p = !target; + + gcc_assert (sel.length () == nelt); + gcc_checking_assert (sizeof (d.perm) == sizeof (perm)); + + for (i = which = 0; i < nelt; ++i) + { + unsigned int ei = sel[i] & (2 * nelt - 1); + + which |= (ei < nelt ? 1 : 2); + d.perm[i] = ei; + perm[i] = ei; + } + + switch (which) + { + default: + gcc_unreachable(); + + case 3: + if (d.testing_p || !rtx_equal_p (d.op0, d.op1)) + { + d.one_operand_p = false; + break; + } + + /* The elements of PERM do not suggest that only the first operand + is used, but both operands are identical. Allow easier matching + of the permutation by folding the permutation into the single + input vector. */ + for (i = 0; i < nelt; ++i) + if (d.perm[i] >= nelt) + d.perm[i] -= nelt; + /* FALLTHRU */ + + case 1: + d.op1 = d.op0; + d.one_operand_p = true; + break; + + case 2: + for (i = 0; i < nelt; ++i) + d.perm[i] -= nelt; + d.op0 = d.op1; + d.one_operand_p = true; + break; + } + + if (d.testing_p) + { + /* We have to go through the motions and see if we can + figure out how to generate the requested permutation. */ + d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); + d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); + if (!d.one_operand_p) + d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); + + start_sequence (); + bool ret = ia64_expand_vec_perm_const_1 (&d); + end_sequence (); + + return ret; + } + + if (ia64_expand_vec_perm_const_1 (&d)) + return true; + + /* If the mask says both arguments are needed, but they are the same, + the above tried to expand with one_operand_p true. If that didn't + work, retry with one_operand_p false, as that's what we used in _ok. */ + if (which == 3 && d.one_operand_p) + { + memcpy (d.perm, perm, sizeof (perm)); + d.one_operand_p = false; + return ia64_expand_vec_perm_const_1 (&d); + } + + return false; + } + + void + ia64_expand_vec_setv2sf (rtx operands[3]) + { + struct expand_vec_perm_d d; + unsigned int which; + bool ok; + + d.target = operands[0]; + d.op0 = operands[0]; + d.op1 = gen_reg_rtx (V2SFmode); + d.vmode = V2SFmode; + d.nelt = 2; + d.one_operand_p = false; + d.testing_p = false; + + which = INTVAL (operands[2]); + gcc_assert (which <= 1); + d.perm[0] = 1 - which; + d.perm[1] = which + 2; + + emit_insn (gen_fpack (d.op1, operands[1], CONST0_RTX (SFmode))); + + ok = ia64_expand_vec_perm_const_1 (&d); + gcc_assert (ok); + } + + void + ia64_expand_vec_perm_even_odd (rtx target, rtx op0, rtx op1, int odd) + { + struct expand_vec_perm_d d; + machine_mode vmode = GET_MODE (target); + unsigned int i, nelt = GET_MODE_NUNITS (vmode); + bool ok; + + d.target = target; + d.op0 = op0; + d.op1 = op1; + d.vmode = vmode; + d.nelt = nelt; + d.one_operand_p = false; + d.testing_p = false; + + for (i = 0; i < nelt; ++i) + d.perm[i] = i * 2 + odd; + + ok = ia64_expand_vec_perm_const_1 (&d); + gcc_assert (ok); + } + + /* Implement TARGET_CAN_CHANGE_MODE_CLASS. + + In BR regs, we can't change the DImode at all. + In FP regs, we can't change FP values to integer values and vice versa, + but we can change e.g. DImode to SImode, and V2SFmode into DImode. */ + + static bool + ia64_can_change_mode_class (machine_mode from, machine_mode to, + reg_class_t rclass) + { + if (reg_classes_intersect_p (rclass, BR_REGS)) + return from == to; + if (SCALAR_FLOAT_MODE_P (from) != SCALAR_FLOAT_MODE_P (to)) + return !reg_classes_intersect_p (rclass, FR_REGS); + return true; + } + ++static void ++ia64_linux_file_end (void) ++{ ++ int saved_trampolines_created = trampolines_created; ++ trampolines_created = 0; ++ file_end_indicate_exec_stack (); ++ trampolines_created = saved_trampolines_created; ++} ++ + #include "gt-ia64.h" diff --cc gcc/config/rs6000/rs6000.cc index 00000000000,7a4ef5e6c0a..3b21fdc8bb0 mode 000000,100644..100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@@ -1,0 -1,28925 +1,28942 @@@ + // SPDX-License-Identifier: GPL-3.0-or-later + /* Subroutines used for code generation on IBM RS/6000. + Copyright (C) 1991-2022 Free Software Foundation, Inc. + Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + + #define IN_TARGET_CODE 1 + + #include "config.h" + #include "system.h" + #include "coretypes.h" + #include "backend.h" + #include "rtl.h" + #include "tree.h" + #include "memmodel.h" + #include "gimple.h" + #include "cfghooks.h" + #include "cfgloop.h" + #include "df.h" + #include "tm_p.h" + #include "stringpool.h" + #include "expmed.h" + #include "optabs.h" + #include "regs.h" + #include "ira.h" + #include "recog.h" + #include "cgraph.h" + #include "diagnostic-core.h" + #include "insn-attr.h" + #include "flags.h" + #include "alias.h" + #include "fold-const.h" + #include "attribs.h" + #include "stor-layout.h" + #include "calls.h" + #include "print-tree.h" + #include "varasm.h" + #include "explow.h" + #include "expr.h" + #include "output.h" + #include "common/common-target.h" + #include "langhooks.h" + #include "reload.h" + #include "sched-int.h" + #include "gimplify.h" + #include "gimple-fold.h" + #include "gimple-iterator.h" + #include "gimple-walk.h" + #include "ssa.h" + #include "tree-vectorizer.h" + #include "tree-ssa-propagate.h" + #include "intl.h" + #include "tm-constrs.h" + #include "target-globals.h" + #include "builtins.h" + #include "tree-vector-builder.h" + #include "context.h" + #include "tree-pass.h" + #include "symbol-summary.h" + #include "ipa-prop.h" + #include "ipa-fnsummary.h" + #include "except.h" + #if TARGET_XCOFF + #include "xcoffout.h" /* get declarations of xcoff_*_section_name */ + #endif + #include "case-cfn-macros.h" + #include "ppc-auxv.h" + #include "rs6000-internal.h" + #include "opts.h" + + /* This file should be included last. */ + #include "target-def.h" + + /* Set -mabi=ieeelongdouble on some old targets. In the future, power server + systems will also set long double to be IEEE 128-bit. AIX and Darwin + explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so + those systems will not pick up this default. This needs to be after all + of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are + properly defined. */ + #ifndef TARGET_IEEEQUAD_DEFAULT + #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD) + #define TARGET_IEEEQUAD_DEFAULT 1 + #else + #define TARGET_IEEEQUAD_DEFAULT 0 + #endif + #endif + + /* Don't enable PC-relative addressing if the target does not support it. */ + #ifndef PCREL_SUPPORTED_BY_OS + #define PCREL_SUPPORTED_BY_OS 0 + #endif + + /* Support targetm.vectorize.builtin_mask_for_load. */ + tree altivec_builtin_mask_for_load; + + #ifdef USING_ELFOS_H + /* Counter for labels which are to be placed in .fixup. */ + int fixuplabelno = 0; + #endif + + /* Whether to use variant of AIX ABI for PowerPC64 Linux. */ + int dot_symbols; + + /* Specify the machine mode that pointers have. After generation of rtl, the + compiler makes no further distinction between pointers and any other objects + of this machine mode. */ + scalar_int_mode rs6000_pmode; + + #if TARGET_ELF + /* Note whether IEEE 128-bit floating point was passed or returned, either as + the __float128/_Float128 explicit type, or when long double is IEEE 128-bit + floating point. We changed the default C++ mangling for these types and we + may want to generate a weak alias of the old mangling (U10__float128) to the + new mangling (u9__ieee128). */ + bool rs6000_passes_ieee128 = false; + #endif + + /* Track use of r13 in 64bit AIX TLS. */ + static bool xcoff_tls_exec_model_detected = false; + + /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the + name used in current releases (i.e. u9__ieee128). */ + static bool ieee128_mangling_gcc_8_1; + + /* Width in bits of a pointer. */ + unsigned rs6000_pointer_size; + + #ifdef HAVE_AS_GNU_ATTRIBUTE + # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE + # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0 + # endif + /* Flag whether floating point values have been passed/returned. + Note that this doesn't say whether fprs are used, since the + Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls + should be set for soft-float values passed in gprs and ieee128 + values passed in vsx registers. */ + bool rs6000_passes_float = false; + bool rs6000_passes_long_double = false; + /* Flag whether vector values have been passed/returned. */ + bool rs6000_passes_vector = false; + /* Flag whether small (<= 8 byte) structures have been returned. */ + bool rs6000_returns_struct = false; + #endif + + /* Value is TRUE if register/mode pair is acceptable. */ + static bool rs6000_hard_regno_mode_ok_p + [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER]; + + /* Maximum number of registers needed for a given register class and mode. */ + unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES]; + + /* How many registers are needed for a given register and mode. */ + unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER]; + + /* Map register number to register class. */ + enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER]; + + static int dbg_cost_ctrl; + + /* Built in types. */ + tree rs6000_builtin_types[RS6000_BTI_MAX]; + + /* Flag to say the TOC is initialized */ + int toc_initialized, need_toc_init; + char toc_label_name[10]; + + /* Cached value of rs6000_variable_issue. This is cached in + rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */ + static short cached_can_issue_more; + + static GTY(()) section *read_only_data_section; + static GTY(()) section *private_data_section; + static GTY(()) section *tls_data_section; + static GTY(()) section *tls_private_data_section; + static GTY(()) section *read_only_private_data_section; + static GTY(()) section *sdata2_section; + + section *toc_section = 0; + + /* Describe the vector unit used for modes. */ + enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES]; + enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES]; + + /* Register classes for various constraints that are based on the target + switches. */ + enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX]; + + /* Describe the alignment of a vector. */ + int rs6000_vector_align[NUM_MACHINE_MODES]; + + /* Map selected modes to types for builtins. */ + tree builtin_mode_to_type[MAX_MACHINE_MODE][2]; + + /* What modes to automatically generate reciprocal divide estimate (fre) and + reciprocal sqrt (frsqrte) for. */ + unsigned char rs6000_recip_bits[MAX_MACHINE_MODE]; + + /* Masks to determine which reciprocal esitmate instructions to generate + automatically. */ + enum rs6000_recip_mask { + RECIP_SF_DIV = 0x001, /* Use divide estimate */ + RECIP_DF_DIV = 0x002, + RECIP_V4SF_DIV = 0x004, + RECIP_V2DF_DIV = 0x008, + + RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */ + RECIP_DF_RSQRT = 0x020, + RECIP_V4SF_RSQRT = 0x040, + RECIP_V2DF_RSQRT = 0x080, + + /* Various combination of flags for -mrecip=xxx. */ + RECIP_NONE = 0, + RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV + | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT + | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT), + + RECIP_HIGH_PRECISION = RECIP_ALL, + + /* On low precision machines like the power5, don't enable double precision + reciprocal square root estimate, since it isn't accurate enough. */ + RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT)) + }; + + /* -mrecip options. */ + static struct + { + const char *string; /* option name */ + unsigned int mask; /* mask bits to set */ + } recip_options[] = { + { "all", RECIP_ALL }, + { "none", RECIP_NONE }, + { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV + | RECIP_V2DF_DIV) }, + { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) }, + { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) }, + { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT + | RECIP_V2DF_RSQRT) }, + { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) }, + { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) }, + }; + + /* On PowerPC, we have a limited number of target clones that we care about + which means we can use an array to hold the options, rather than having more + elaborate data structures to identify each possible variation. Order the + clones from the default to the highest ISA. */ + enum { + CLONE_DEFAULT = 0, /* default clone. */ + CLONE_ISA_2_05, /* ISA 2.05 (power6). */ + CLONE_ISA_2_06, /* ISA 2.06 (power7). */ + CLONE_ISA_2_07, /* ISA 2.07 (power8). */ + CLONE_ISA_3_00, /* ISA 3.0 (power9). */ + CLONE_ISA_3_1, /* ISA 3.1 (power10). */ + CLONE_MAX + }; + + /* Map compiler ISA bits into HWCAP names. */ + struct clone_map { + HOST_WIDE_INT isa_mask; /* rs6000_isa mask */ + const char *name; /* name to use in __builtin_cpu_supports. */ + }; + + static const struct clone_map rs6000_clone_map[CLONE_MAX] = { + { 0, "" }, /* Default options. */ + { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */ + { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */ + { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */ + { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.0 (power9). */ + { OPTION_MASK_POWER10, "arch_3_1" }, /* ISA 3.1 (power10). */ + }; + + + /* Newer LIBCs explicitly export this symbol to declare that they provide + the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a + reference to this symbol whenever we expand a CPU builtin, so that + we never link against an old LIBC. */ + const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform"; + + /* True if we have expanded a CPU builtin. */ + bool cpu_builtin_p = false; + + /* Pointer to function (in rs6000-c.cc) that can define or undefine target + macros that have changed. Languages that don't support the preprocessor + don't link in rs6000-c.cc, so we can't call it directly. */ + void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT); + + /* Simplfy register classes into simpler classifications. We assume + GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range + check for standard register classes (gpr/floating/altivec/vsx) and + floating/vector classes (float/altivec/vsx). */ + + enum rs6000_reg_type { + NO_REG_TYPE, + PSEUDO_REG_TYPE, + GPR_REG_TYPE, + VSX_REG_TYPE, + ALTIVEC_REG_TYPE, + FPR_REG_TYPE, + SPR_REG_TYPE, + CR_REG_TYPE + }; + + /* Map register class to register type. */ + static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES]; + + /* First/last register type for the 'normal' register types (i.e. general + purpose, floating point, altivec, and VSX registers). */ + #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE) + + #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE) + + + /* Register classes we care about in secondary reload or go if legitimate + address. We only need to worry about GPR, FPR, and Altivec registers here, + along an ANY field that is the OR of the 3 register classes. */ + + enum rs6000_reload_reg_type { + RELOAD_REG_GPR, /* General purpose registers. */ + RELOAD_REG_FPR, /* Traditional floating point regs. */ + RELOAD_REG_VMX, /* Altivec (VMX) registers. */ + RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */ + N_RELOAD_REG + }; + + /* For setting up register classes, loop through the 3 register classes mapping + into real registers, and skip the ANY class, which is just an OR of the + bits. */ + #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR + #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX + + /* Map reload register type to a register in the register class. */ + struct reload_reg_map_type { + const char *name; /* Register class name. */ + int reg; /* Register in the register class. */ + }; + + static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = { + { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */ + { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */ + { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */ + { "Any", -1 }, /* RELOAD_REG_ANY. */ + }; + + /* Mask bits for each register class, indexed per mode. Historically the + compiler has been more restrictive which types can do PRE_MODIFY instead of + PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */ + typedef unsigned char addr_mask_type; + + #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */ + #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */ + #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */ + #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */ + #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */ + #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */ + #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */ + #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */ + + /* Register type masks based on the type, of valid addressing modes. */ + struct rs6000_reg_addr { + enum insn_code reload_load; /* INSN to reload for loading. */ + enum insn_code reload_store; /* INSN to reload for storing. */ + enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */ + enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */ + enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */ + addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */ + bool scalar_in_vmx_p; /* Scalar value can go in VMX. */ + }; + + static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES]; + + /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */ + static inline bool + mode_supports_pre_incdec_p (machine_mode mode) + { + return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC) + != 0); + } + + /* Helper function to say whether a mode supports PRE_MODIFY. */ + static inline bool + mode_supports_pre_modify_p (machine_mode mode) + { + return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY) + != 0); + } + + /* Return true if we have D-form addressing in altivec registers. */ + static inline bool + mode_supports_vmx_dform (machine_mode mode) + { + return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0); + } + + /* Return true if we have D-form addressing in VSX registers. This addressing + is more limited than normal d-form addressing in that the offset must be + aligned on a 16-byte boundary. */ + static inline bool + mode_supports_dq_form (machine_mode mode) + { + return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET) + != 0); + } + + /* Given that there exists at least one variable that is set (produced) + by OUT_INSN and read (consumed) by IN_INSN, return true iff + IN_INSN represents one or more memory store operations and none of + the variables set by OUT_INSN is used by IN_INSN as the address of a + store operation. If either IN_INSN or OUT_INSN does not represent + a "single" RTL SET expression (as loosely defined by the + implementation of the single_set function) or a PARALLEL with only + SETs, CLOBBERs, and USEs inside, this function returns false. + + This rs6000-specific version of store_data_bypass_p checks for + certain conditions that result in assertion failures (and internal + compiler errors) in the generic store_data_bypass_p function and + returns false rather than calling store_data_bypass_p if one of the + problematic conditions is detected. */ + + int + rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn) + { + rtx out_set, in_set; + rtx out_pat, in_pat; + rtx out_exp, in_exp; + int i, j; + + in_set = single_set (in_insn); + if (in_set) + { + if (MEM_P (SET_DEST (in_set))) + { + out_set = single_set (out_insn); + if (!out_set) + { + out_pat = PATTERN (out_insn); + if (GET_CODE (out_pat) == PARALLEL) + { + for (i = 0; i < XVECLEN (out_pat, 0); i++) + { + out_exp = XVECEXP (out_pat, 0, i); + if ((GET_CODE (out_exp) == CLOBBER) + || (GET_CODE (out_exp) == USE)) + continue; + else if (GET_CODE (out_exp) != SET) + return false; + } + } + } + } + } + else + { + in_pat = PATTERN (in_insn); + if (GET_CODE (in_pat) != PARALLEL) + return false; + + for (i = 0; i < XVECLEN (in_pat, 0); i++) + { + in_exp = XVECEXP (in_pat, 0, i); + if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE)) + continue; + else if (GET_CODE (in_exp) != SET) + return false; + + if (MEM_P (SET_DEST (in_exp))) + { + out_set = single_set (out_insn); + if (!out_set) + { + out_pat = PATTERN (out_insn); + if (GET_CODE (out_pat) != PARALLEL) + return false; + for (j = 0; j < XVECLEN (out_pat, 0); j++) + { + out_exp = XVECEXP (out_pat, 0, j); + if ((GET_CODE (out_exp) == CLOBBER) + || (GET_CODE (out_exp) == USE)) + continue; + else if (GET_CODE (out_exp) != SET) + return false; + } + } + } + } + } + return store_data_bypass_p (out_insn, in_insn); + } + + + /* Processor costs (relative to an add) */ + + const struct processor_costs *rs6000_cost; + + /* Instruction size costs on 32bit processors. */ + static const + struct processor_costs size32_cost = { + COSTS_N_INSNS (1), /* mulsi */ + COSTS_N_INSNS (1), /* mulsi_const */ + COSTS_N_INSNS (1), /* mulsi_const9 */ + COSTS_N_INSNS (1), /* muldi */ + COSTS_N_INSNS (1), /* divsi */ + COSTS_N_INSNS (1), /* divdi */ + COSTS_N_INSNS (1), /* fp */ + COSTS_N_INSNS (1), /* dmul */ + COSTS_N_INSNS (1), /* sdiv */ + COSTS_N_INSNS (1), /* ddiv */ + 32, /* cache line size */ + 0, /* l1 cache */ + 0, /* l2 cache */ + 0, /* streams */ + 0, /* SF->DF convert */ + }; + + /* Instruction size costs on 64bit processors. */ + static const + struct processor_costs size64_cost = { + COSTS_N_INSNS (1), /* mulsi */ + COSTS_N_INSNS (1), /* mulsi_const */ + COSTS_N_INSNS (1), /* mulsi_const9 */ + COSTS_N_INSNS (1), /* muldi */ + COSTS_N_INSNS (1), /* divsi */ + COSTS_N_INSNS (1), /* divdi */ + COSTS_N_INSNS (1), /* fp */ + COSTS_N_INSNS (1), /* dmul */ + COSTS_N_INSNS (1), /* sdiv */ + COSTS_N_INSNS (1), /* ddiv */ + 128, /* cache line size */ + 0, /* l1 cache */ + 0, /* l2 cache */ + 0, /* streams */ + 0, /* SF->DF convert */ + }; + + /* Instruction costs on RS64A processors. */ + static const + struct processor_costs rs64a_cost = { + COSTS_N_INSNS (20), /* mulsi */ + COSTS_N_INSNS (12), /* mulsi_const */ + COSTS_N_INSNS (8), /* mulsi_const9 */ + COSTS_N_INSNS (34), /* muldi */ + COSTS_N_INSNS (65), /* divsi */ + COSTS_N_INSNS (67), /* divdi */ + COSTS_N_INSNS (4), /* fp */ + COSTS_N_INSNS (4), /* dmul */ + COSTS_N_INSNS (31), /* sdiv */ + COSTS_N_INSNS (31), /* ddiv */ + 128, /* cache line size */ + 128, /* l1 cache */ + 2048, /* l2 cache */ + 1, /* streams */ + 0, /* SF->DF convert */ + }; + + /* Instruction costs on MPCCORE processors. */ + static const + struct processor_costs mpccore_cost = { + COSTS_N_INSNS (2), /* mulsi */ + COSTS_N_INSNS (2), /* mulsi_const */ + COSTS_N_INSNS (2), /* mulsi_const9 */ + COSTS_N_INSNS (2), /* muldi */ + COSTS_N_INSNS (6), /* divsi */ + COSTS_N_INSNS (6), /* divdi */ + COSTS_N_INSNS (4), /* fp */ + COSTS_N_INSNS (5), /* dmul */ + COSTS_N_INSNS (10), /* sdiv */ + COSTS_N_INSNS (17), /* ddiv */ + 32, /* cache line size */ + 4, /* l1 cache */ + 16, /* l2 cache */ + 1, /* streams */ + 0, /* SF->DF convert */ + }; + + /* Instruction costs on PPC403 processors. */ + static const + struct processor_costs ppc403_cost = { + COSTS_N_INSNS (4), /* mulsi */ + COSTS_N_INSNS (4), /* mulsi_const */ + COSTS_N_INSNS (4), /* mulsi_const9 */ + COSTS_N_INSNS (4), /* muldi */ + COSTS_N_INSNS (33), /* divsi */ + COSTS_N_INSNS (33), /* divdi */ + COSTS_N_INSNS (11), /* fp */ + COSTS_N_INSNS (11), /* dmul */ + COSTS_N_INSNS (11), /* sdiv */ + COSTS_N_INSNS (11), /* ddiv */ + 32, /* cache line size */ + 4, /* l1 cache */ + 16, /* l2 cache */ + 1, /* streams */ + 0, /* SF->DF convert */ + }; + + /* Instruction costs on PPC405 processors. */ + static const + struct processor_costs ppc405_cost = { + COSTS_N_INSNS (5), /* mulsi */ + COSTS_N_INSNS (4), /* mulsi_const */ + COSTS_N_INSNS (3), /* mulsi_const9 */ + COSTS_N_INSNS (5), /* muldi */ + COSTS_N_INSNS (35), /* divsi */ + COSTS_N_INSNS (35), /* divdi */ + COSTS_N_INSNS (11), /* fp */ + COSTS_N_INSNS (11), /* dmul */ + COSTS_N_INSNS (11), /* sdiv */ + COSTS_N_INSNS (11), /* ddiv */ + 32, /* cache line size */ + 16, /* l1 cache */ + 128, /* l2 cache */ + 1, /* streams */ + 0, /* SF->DF convert */ + }; + + /* Instruction costs on PPC440 processors. */ + static const + struct processor_costs ppc440_cost = { + COSTS_N_INSNS (3), /* mulsi */ + COSTS_N_INSNS (2), /* mulsi_const */ + COSTS_N_INSNS (2), /* mulsi_const9 */ + COSTS_N_INSNS (3), /* muldi */ + COSTS_N_INSNS (34), /* divsi */ + COSTS_N_INSNS (34), /* divdi */ + COSTS_N_INSNS (5), /* fp */ + COSTS_N_INSNS (5), /* dmul */ + COSTS_N_INSNS (19), /* sdiv */ + COSTS_N_INSNS (33), /* ddiv */ + 32, /* cache line size */ + 32, /* l1 cache */ + 256, /* l2 cache */ + 1, /* streams */ + 0, /* SF->DF convert */ + }; + + /* Instruction costs on PPC476 processors. */ + static const + struct processor_costs ppc476_cost = { + COSTS_N_INSNS (4), /* mulsi */ + COSTS_N_INSNS (4), /* mulsi_const */ + COSTS_N_INSNS (4), /* mulsi_const9 */ + COSTS_N_INSNS (4), /* muldi */ + COSTS_N_INSNS (11), /* divsi */ + COSTS_N_INSNS (11), /* divdi */ + COSTS_N_INSNS (6), /* fp */ + COSTS_N_INSNS (6), /* dmul */ + COSTS_N_INSNS (19), /* sdiv */ + COSTS_N_INSNS (33), /* ddiv */ + 32, /* l1 cache line size */ + 32, /* l1 cache */ + 512, /* l2 cache */ + 1, /* streams */ + 0, /* SF->DF convert */ + }; + + /* Instruction costs on PPC601 processors. */ + static const + struct processor_costs ppc601_cost = { + COSTS_N_INSNS (5), /* mulsi */ + COSTS_N_INSNS (5), /* mulsi_const */ + COSTS_N_INSNS (5), /* mulsi_const9 */ + COSTS_N_INSNS (5), /* muldi */ + COSTS_N_INSNS (36), /* divsi */ + COSTS_N_INSNS (36), /* divdi */ + COSTS_N_INSNS (4), /* fp */ + COSTS_N_INSNS (5), /* dmul */ + COSTS_N_INSNS (17), /* sdiv */ + COSTS_N_INSNS (31), /* ddiv */ + 32, /* cache line size */ + 32, /* l1 cache */ + 256, /* l2 cache */ + 1, /* streams */ + 0, /* SF->DF convert */ + }; + + /* Instruction costs on PPC603 processors. */ + static const + struct processor_costs ppc603_cost = { + COSTS_N_INSNS (5), /* mulsi */ + COSTS_N_INSNS (3), /* mulsi_const */ + COSTS_N_INSNS (2), /* mulsi_const9 */ + COSTS_N_INSNS (5), /* muldi */ + COSTS_N_INSNS (37), /* divsi */ + COSTS_N_INSNS (37), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (4), /* dmul */ + COSTS_N_INSNS (18), /* sdiv */ + COSTS_N_INSNS (33), /* ddiv */ + 32, /* cache line size */ + 8, /* l1 cache */ + 64, /* l2 cache */ + 1, /* streams */ + 0, /* SF->DF convert */ + }; + + /* Instruction costs on PPC604 processors. */ + static const + struct processor_costs ppc604_cost = { + COSTS_N_INSNS (4), /* mulsi */ + COSTS_N_INSNS (4), /* mulsi_const */ + COSTS_N_INSNS (4), /* mulsi_const9 */ + COSTS_N_INSNS (4), /* muldi */ + COSTS_N_INSNS (20), /* divsi */ + COSTS_N_INSNS (20), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (18), /* sdiv */ + COSTS_N_INSNS (32), /* ddiv */ + 32, /* cache line size */ + 16, /* l1 cache */ + 512, /* l2 cache */ + 1, /* streams */ + 0, /* SF->DF convert */ + }; + + /* Instruction costs on PPC604e processors. */ + static const + struct processor_costs ppc604e_cost = { + COSTS_N_INSNS (2), /* mulsi */ + COSTS_N_INSNS (2), /* mulsi_const */ + COSTS_N_INSNS (2), /* mulsi_const9 */ + COSTS_N_INSNS (2), /* muldi */ + COSTS_N_INSNS (20), /* divsi */ + COSTS_N_INSNS (20), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (18), /* sdiv */ + COSTS_N_INSNS (32), /* ddiv */ + 32, /* cache line size */ + 32, /* l1 cache */ + 1024, /* l2 cache */ + 1, /* streams */ + 0, /* SF->DF convert */ + }; + + /* Instruction costs on PPC620 processors. */ + static const + struct processor_costs ppc620_cost = { + COSTS_N_INSNS (5), /* mulsi */ + COSTS_N_INSNS (4), /* mulsi_const */ + COSTS_N_INSNS (3), /* mulsi_const9 */ + COSTS_N_INSNS (7), /* muldi */ + COSTS_N_INSNS (21), /* divsi */ + COSTS_N_INSNS (37), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (18), /* sdiv */ + COSTS_N_INSNS (32), /* ddiv */ + 128, /* cache line size */ + 32, /* l1 cache */ + 1024, /* l2 cache */ + 1, /* streams */ + 0, /* SF->DF convert */ + }; + + /* Instruction costs on PPC630 processors. */ + static const + struct processor_costs ppc630_cost = { + COSTS_N_INSNS (5), /* mulsi */ + COSTS_N_INSNS (4), /* mulsi_const */ + COSTS_N_INSNS (3), /* mulsi_const9 */ + COSTS_N_INSNS (7), /* muldi */ + COSTS_N_INSNS (21), /* divsi */ + COSTS_N_INSNS (37), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (17), /* sdiv */ + COSTS_N_INSNS (21), /* ddiv */ + 128, /* cache line size */ + 64, /* l1 cache */ + 1024, /* l2 cache */ + 1, /* streams */ + 0, /* SF->DF convert */ + }; + + /* Instruction costs on Cell processor. */ + /* COSTS_N_INSNS (1) ~ one add. */ + static const + struct processor_costs ppccell_cost = { + COSTS_N_INSNS (9/2)+2, /* mulsi */ + COSTS_N_INSNS (6/2), /* mulsi_const */ + COSTS_N_INSNS (6/2), /* mulsi_const9 */ + COSTS_N_INSNS (15/2)+2, /* muldi */ + COSTS_N_INSNS (38/2), /* divsi */ + COSTS_N_INSNS (70/2), /* divdi */ + COSTS_N_INSNS (10/2), /* fp */ + COSTS_N_INSNS (10/2), /* dmul */ + COSTS_N_INSNS (74/2), /* sdiv */ + COSTS_N_INSNS (74/2), /* ddiv */ + 128, /* cache line size */ + 32, /* l1 cache */ + 512, /* l2 cache */ + 6, /* streams */ + 0, /* SF->DF convert */ + }; + + /* Instruction costs on PPC750 and PPC7400 processors. */ + static const + struct processor_costs ppc750_cost = { + COSTS_N_INSNS (5), /* mulsi */ + COSTS_N_INSNS (3), /* mulsi_const */ + COSTS_N_INSNS (2), /* mulsi_const9 */ + COSTS_N_INSNS (5), /* muldi */ + COSTS_N_INSNS (17), /* divsi */ + COSTS_N_INSNS (17), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (17), /* sdiv */ + COSTS_N_INSNS (31), /* ddiv */ + 32, /* cache line size */ + 32, /* l1 cache */ + 512, /* l2 cache */ + 1, /* streams */ + 0, /* SF->DF convert */ + }; + + /* Instruction costs on PPC7450 processors. */ + static const + struct processor_costs ppc7450_cost = { + COSTS_N_INSNS (4), /* mulsi */ + COSTS_N_INSNS (3), /* mulsi_const */ + COSTS_N_INSNS (3), /* mulsi_const9 */ + COSTS_N_INSNS (4), /* muldi */ + COSTS_N_INSNS (23), /* divsi */ + COSTS_N_INSNS (23), /* divdi */ + COSTS_N_INSNS (5), /* fp */ + COSTS_N_INSNS (5), /* dmul */ + COSTS_N_INSNS (21), /* sdiv */ + COSTS_N_INSNS (35), /* ddiv */ + 32, /* cache line size */ + 32, /* l1 cache */ + 1024, /* l2 cache */ + 1, /* streams */ + 0, /* SF->DF convert */ + }; + + /* Instruction costs on PPC8540 processors. */ + static const + struct processor_costs ppc8540_cost = { + COSTS_N_INSNS (4), /* mulsi */ + COSTS_N_INSNS (4), /* mulsi_const */ + COSTS_N_INSNS (4), /* mulsi_const9 */ + COSTS_N_INSNS (4), /* muldi */ + COSTS_N_INSNS (19), /* divsi */ + COSTS_N_INSNS (19), /* divdi */ + COSTS_N_INSNS (4), /* fp */ + COSTS_N_INSNS (4), /* dmul */ + COSTS_N_INSNS (29), /* sdiv */ + COSTS_N_INSNS (29), /* ddiv */ + 32, /* cache line size */ + 32, /* l1 cache */ + 256, /* l2 cache */ + 1, /* prefetch streams /*/ + 0, /* SF->DF convert */ + }; + + /* Instruction costs on E300C2 and E300C3 cores. */ + static const + struct processor_costs ppce300c2c3_cost = { + COSTS_N_INSNS (4), /* mulsi */ + COSTS_N_INSNS (4), /* mulsi_const */ + COSTS_N_INSNS (4), /* mulsi_const9 */ + COSTS_N_INSNS (4), /* muldi */ + COSTS_N_INSNS (19), /* divsi */ + COSTS_N_INSNS (19), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (4), /* dmul */ + COSTS_N_INSNS (18), /* sdiv */ + COSTS_N_INSNS (33), /* ddiv */ + 32, + 16, /* l1 cache */ + 16, /* l2 cache */ + 1, /* prefetch streams /*/ + 0, /* SF->DF convert */ + }; + + /* Instruction costs on PPCE500MC processors. */ + static const + struct processor_costs ppce500mc_cost = { + COSTS_N_INSNS (4), /* mulsi */ + COSTS_N_INSNS (4), /* mulsi_const */ + COSTS_N_INSNS (4), /* mulsi_const9 */ + COSTS_N_INSNS (4), /* muldi */ + COSTS_N_INSNS (14), /* divsi */ + COSTS_N_INSNS (14), /* divdi */ + COSTS_N_INSNS (8), /* fp */ + COSTS_N_INSNS (10), /* dmul */ + COSTS_N_INSNS (36), /* sdiv */ + COSTS_N_INSNS (66), /* ddiv */ + 64, /* cache line size */ + 32, /* l1 cache */ + 128, /* l2 cache */ + 1, /* prefetch streams /*/ + 0, /* SF->DF convert */ + }; + + /* Instruction costs on PPCE500MC64 processors. */ + static const + struct processor_costs ppce500mc64_cost = { + COSTS_N_INSNS (4), /* mulsi */ + COSTS_N_INSNS (4), /* mulsi_const */ + COSTS_N_INSNS (4), /* mulsi_const9 */ + COSTS_N_INSNS (4), /* muldi */ + COSTS_N_INSNS (14), /* divsi */ + COSTS_N_INSNS (14), /* divdi */ + COSTS_N_INSNS (4), /* fp */ + COSTS_N_INSNS (10), /* dmul */ + COSTS_N_INSNS (36), /* sdiv */ + COSTS_N_INSNS (66), /* ddiv */ + 64, /* cache line size */ + 32, /* l1 cache */ + 128, /* l2 cache */ + 1, /* prefetch streams /*/ + 0, /* SF->DF convert */ + }; + + /* Instruction costs on PPCE5500 processors. */ + static const + struct processor_costs ppce5500_cost = { + COSTS_N_INSNS (5), /* mulsi */ + COSTS_N_INSNS (5), /* mulsi_const */ + COSTS_N_INSNS (4), /* mulsi_const9 */ + COSTS_N_INSNS (5), /* muldi */ + COSTS_N_INSNS (14), /* divsi */ + COSTS_N_INSNS (14), /* divdi */ + COSTS_N_INSNS (7), /* fp */ + COSTS_N_INSNS (10), /* dmul */ + COSTS_N_INSNS (36), /* sdiv */ + COSTS_N_INSNS (66), /* ddiv */ + 64, /* cache line size */ + 32, /* l1 cache */ + 128, /* l2 cache */ + 1, /* prefetch streams /*/ + 0, /* SF->DF convert */ + }; + + /* Instruction costs on PPCE6500 processors. */ + static const + struct processor_costs ppce6500_cost = { + COSTS_N_INSNS (5), /* mulsi */ + COSTS_N_INSNS (5), /* mulsi_const */ + COSTS_N_INSNS (4), /* mulsi_const9 */ + COSTS_N_INSNS (5), /* muldi */ + COSTS_N_INSNS (14), /* divsi */ + COSTS_N_INSNS (14), /* divdi */ + COSTS_N_INSNS (7), /* fp */ + COSTS_N_INSNS (10), /* dmul */ + COSTS_N_INSNS (36), /* sdiv */ + COSTS_N_INSNS (66), /* ddiv */ + 64, /* cache line size */ + 32, /* l1 cache */ + 128, /* l2 cache */ + 1, /* prefetch streams /*/ + 0, /* SF->DF convert */ + }; + + /* Instruction costs on AppliedMicro Titan processors. */ + static const + struct processor_costs titan_cost = { + COSTS_N_INSNS (5), /* mulsi */ + COSTS_N_INSNS (5), /* mulsi_const */ + COSTS_N_INSNS (5), /* mulsi_const9 */ + COSTS_N_INSNS (5), /* muldi */ + COSTS_N_INSNS (18), /* divsi */ + COSTS_N_INSNS (18), /* divdi */ + COSTS_N_INSNS (10), /* fp */ + COSTS_N_INSNS (10), /* dmul */ + COSTS_N_INSNS (46), /* sdiv */ + COSTS_N_INSNS (72), /* ddiv */ + 32, /* cache line size */ + 32, /* l1 cache */ + 512, /* l2 cache */ + 1, /* prefetch streams /*/ + 0, /* SF->DF convert */ + }; + + /* Instruction costs on POWER4 and POWER5 processors. */ + static const + struct processor_costs power4_cost = { + COSTS_N_INSNS (3), /* mulsi */ + COSTS_N_INSNS (2), /* mulsi_const */ + COSTS_N_INSNS (2), /* mulsi_const9 */ + COSTS_N_INSNS (4), /* muldi */ + COSTS_N_INSNS (18), /* divsi */ + COSTS_N_INSNS (34), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (17), /* sdiv */ + COSTS_N_INSNS (17), /* ddiv */ + 128, /* cache line size */ + 32, /* l1 cache */ + 1024, /* l2 cache */ + 8, /* prefetch streams /*/ + 0, /* SF->DF convert */ + }; + + /* Instruction costs on POWER6 processors. */ + static const + struct processor_costs power6_cost = { + COSTS_N_INSNS (8), /* mulsi */ + COSTS_N_INSNS (8), /* mulsi_const */ + COSTS_N_INSNS (8), /* mulsi_const9 */ + COSTS_N_INSNS (8), /* muldi */ + COSTS_N_INSNS (22), /* divsi */ + COSTS_N_INSNS (28), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (13), /* sdiv */ + COSTS_N_INSNS (16), /* ddiv */ + 128, /* cache line size */ + 64, /* l1 cache */ + 2048, /* l2 cache */ + 16, /* prefetch streams */ + 0, /* SF->DF convert */ + }; + + /* Instruction costs on POWER7 processors. */ + static const + struct processor_costs power7_cost = { + COSTS_N_INSNS (2), /* mulsi */ + COSTS_N_INSNS (2), /* mulsi_const */ + COSTS_N_INSNS (2), /* mulsi_const9 */ + COSTS_N_INSNS (2), /* muldi */ + COSTS_N_INSNS (18), /* divsi */ + COSTS_N_INSNS (34), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (13), /* sdiv */ + COSTS_N_INSNS (16), /* ddiv */ + 128, /* cache line size */ + 32, /* l1 cache */ + 256, /* l2 cache */ + 12, /* prefetch streams */ + COSTS_N_INSNS (3), /* SF->DF convert */ + }; + + /* Instruction costs on POWER8 processors. */ + static const + struct processor_costs power8_cost = { + COSTS_N_INSNS (3), /* mulsi */ + COSTS_N_INSNS (3), /* mulsi_const */ + COSTS_N_INSNS (3), /* mulsi_const9 */ + COSTS_N_INSNS (3), /* muldi */ + COSTS_N_INSNS (19), /* divsi */ + COSTS_N_INSNS (35), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (14), /* sdiv */ + COSTS_N_INSNS (17), /* ddiv */ + 128, /* cache line size */ + 32, /* l1 cache */ + 512, /* l2 cache */ + 12, /* prefetch streams */ + COSTS_N_INSNS (3), /* SF->DF convert */ + }; + + /* Instruction costs on POWER9 processors. */ + static const + struct processor_costs power9_cost = { + COSTS_N_INSNS (3), /* mulsi */ + COSTS_N_INSNS (3), /* mulsi_const */ + COSTS_N_INSNS (3), /* mulsi_const9 */ + COSTS_N_INSNS (3), /* muldi */ + COSTS_N_INSNS (8), /* divsi */ + COSTS_N_INSNS (12), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (13), /* sdiv */ + COSTS_N_INSNS (18), /* ddiv */ + 128, /* cache line size */ + 32, /* l1 cache */ + 512, /* l2 cache */ + 8, /* prefetch streams */ + COSTS_N_INSNS (3), /* SF->DF convert */ + }; + + /* Instruction costs on POWER10 processors. */ + static const + struct processor_costs power10_cost = { + COSTS_N_INSNS (2), /* mulsi */ + COSTS_N_INSNS (2), /* mulsi_const */ + COSTS_N_INSNS (2), /* mulsi_const9 */ + COSTS_N_INSNS (2), /* muldi */ + COSTS_N_INSNS (6), /* divsi */ + COSTS_N_INSNS (6), /* divdi */ + COSTS_N_INSNS (2), /* fp */ + COSTS_N_INSNS (2), /* dmul */ + COSTS_N_INSNS (11), /* sdiv */ + COSTS_N_INSNS (13), /* ddiv */ + 128, /* cache line size */ + 32, /* l1 cache */ + 512, /* l2 cache */ + 16, /* prefetch streams */ + COSTS_N_INSNS (2), /* SF->DF convert */ + }; + + /* Instruction costs on POWER A2 processors. */ + static const + struct processor_costs ppca2_cost = { + COSTS_N_INSNS (16), /* mulsi */ + COSTS_N_INSNS (16), /* mulsi_const */ + COSTS_N_INSNS (16), /* mulsi_const9 */ + COSTS_N_INSNS (16), /* muldi */ + COST[...] [diff truncated at 524288 bytes]