commit 9344656f2863b72fbaf16126338ed73741722192 Author: Kyrylo Tkachov Date: Fri Apr 10 16:39:27 2015 +0100 [AArch64] Implement -m{tune,cpu,arch}=native on AArch64 GNU/Linux diff --git a/gcc/config.host b/gcc/config.host index b0f5940..a8896d1 100644 --- a/gcc/config.host +++ b/gcc/config.host @@ -99,6 +99,14 @@ case ${host} in esac case ${host} in + aarch64*-*-linux*) + case ${target} in + aarch64*-*-*) + host_extra_gcc_objs="driver-aarch64.o" + host_xmake_file="${host_xmake_file} aarch64/x-aarch64" + ;; + esac + ;; arm*-*-freebsd* | arm*-*-linux*) case ${target} in arm*-*-*) diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index e46d91b..f26640e 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -21,7 +21,7 @@ Before using #include to read this file, define a macro: - AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHEDULER_IDENT, ARCH, FLAGS, COSTS) + AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHEDULER_IDENT, ARCH, FLAGS, COSTS, IMP, PART) The CORE_NAME is the name of the core, represented as a string constant. The CORE_IDENT is the name of the core, represented as an identifier. @@ -30,18 +30,23 @@ ARCH is the architecture revision implemented by the chip. FLAGS are the bitwise-or of the traits that apply to that core. This need not include flags implied by the architecture. - COSTS is the name of the rtx_costs routine to use. */ + COSTS is the name of the rtx_costs routine to use. + IMP is the implementer ID of the CPU vendor. On a GNU/Linux system it can + be found in /proc/cpuinfo. + PART is the part number of the CPU. On a GNU/Linux system it can be found + in /proc/cpuinfo. For big.LITTLE systems this should have the form at of + ".". */ /* V8 Architecture Processors. */ -AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53) -AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57) -AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57) -AARCH64_CORE("exynos-m1", exynosm1, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57) -AARCH64_CORE("thunderx", thunderx, thunderx, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx) -AARCH64_CORE("xgene1", xgene1, xgene1, 8, AARCH64_FL_FOR_ARCH8, xgene1) +AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, "0x41", "0xd03") +AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07") +AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd08") +AARCH64_CORE("exynos-m1", exynosm1, cortexa57, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, "samsung", "exynosm1") +AARCH64_CORE("thunderx", thunderx, thunderx, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, "cavium", "thunderx") +AARCH64_CORE("xgene1", xgene1, xgene1, 8, AARCH64_FL_FOR_ARCH8, xgene1, "0x50", "0x000") /* V8 big.LITTLE implementations. */ -AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57) -AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57) +AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07.0xd03") +AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd08.0xd03") diff --git a/gcc/config/aarch64/aarch64-elf.h b/gcc/config/aarch64/aarch64-elf.h index a5ec8cb..1ce6343 100644 --- a/gcc/config/aarch64/aarch64-elf.h +++ b/gcc/config/aarch64/aarch64-elf.h @@ -132,7 +132,8 @@ #undef DRIVER_SELF_SPECS #define DRIVER_SELF_SPECS \ " %{!mbig-endian:%{!mlittle-endian:" ENDIAN_SPEC "}}" \ - " %{!mabi=*:" ABI_SPEC "}" + " %{!mabi=*:" ABI_SPEC "}" \ + MCPU_MTUNE_NATIVE_SPECS #ifdef HAVE_AS_MABI_OPTION #define ASM_MABI_SPEC "%{mabi=*:-mabi=%*}" diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index 6ec3ed6..f296296 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -21,18 +21,25 @@ Before using #include to read this file, define a macro: - AARCH64_OPT_EXTENSION(EXT_NAME, FLAGS_ON, FLAGS_OFF) + AARCH64_OPT_EXTENSION(EXT_NAME, FLAGS_ON, FLAGS_OFF, FEATURE_STRING) EXT_NAME is the name of the extension, represented as a string constant. FLAGS_ON are the bitwise-or of the features that the extension adds. - FLAGS_OFF are the bitwise-or of the features that the extension removes. */ + FLAGS_OFF are the bitwise-or of the features that the extension removes. + FEAT_STRING is a string containing the entries in the 'Features' field of + /proc/cpuinfo on a GNU/Linux system that correspond to this architecture + extension being available. Sometimes multiple entries are needed to enable + the extension (for example, the 'crypto' extension depends on four + entries: aes, pmull, sha1, sha2 being present). In that case this field + should contain a whitespace-separated list of the strings in 'Features' + that are required. Their order is not important. */ /* V8 Architecture Extensions. This list currently contains example extensions for CPUs that implement AArch64, and therefore serves as a template for adding more CPUs in the future. */ -AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, AARCH64_FL_FPSIMD | AARCH64_FL_CRYPTO) -AARCH64_OPT_EXTENSION("simd", AARCH64_FL_FPSIMD, AARCH64_FL_SIMD | AARCH64_FL_CRYPTO) -AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO | AARCH64_FL_FPSIMD, AARCH64_FL_CRYPTO) -AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, AARCH64_FL_CRC) +AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, AARCH64_FL_FPSIMD | AARCH64_FL_CRYPTO, "fp") +AARCH64_OPT_EXTENSION("simd", AARCH64_FL_FPSIMD, AARCH64_FL_SIMD | AARCH64_FL_CRYPTO, "asimd") +AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO | AARCH64_FL_FPSIMD, AARCH64_FL_CRYPTO, "aes pmull sha1 sha2") +AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, AARCH64_FL_CRC, "crc32") diff --git a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts.h index f88ae5b..ea64cf4 100644 --- a/gcc/config/aarch64/aarch64-opts.h +++ b/gcc/config/aarch64/aarch64-opts.h @@ -25,7 +25,7 @@ /* The various cores that implement AArch64. */ enum aarch64_processor { -#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS) \ +#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \ INTERNAL_IDENT, #include "aarch64-cores.def" #undef AARCH64_CORE diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 954e110..ea6020f 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -441,7 +441,7 @@ struct processor /* Processor cores implementing AArch64. */ static const struct processor all_cores[] = { -#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS) \ +#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \ {NAME, SCHED, #ARCH, ARCH, FLAGS, &COSTS##_tunings}, #include "aarch64-cores.def" #undef AARCH64_CORE @@ -478,7 +478,7 @@ struct aarch64_option_extension /* ISA extensions in AArch64. */ static const struct aarch64_option_extension all_extensions[] = { -#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \ +#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF, FEATURE_STRING) \ {NAME, FLAGS_ON, FLAGS_OFF}, #include "aarch64-option-extensions.def" #undef AARCH64_OPT_EXTENSION diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index bf59e40..1f7187b 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -506,7 +506,7 @@ enum reg_class enum target_cpus { -#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS) \ +#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \ TARGET_CPU_##INTERNAL_IDENT, #include "aarch64-cores.def" #undef AARCH64_CORE @@ -929,11 +929,24 @@ extern const char *aarch64_rewrite_mcpu (int argc, const char **argv); #define BIG_LITTLE_CPU_SPEC_FUNCTIONS \ { "rewrite_mcpu", aarch64_rewrite_mcpu }, +#if defined(__aarch64__) +extern const char *host_detect_local_cpu (int argc, const char **argv); +# define EXTRA_SPEC_FUNCTIONS \ + { "local_cpu_detect", host_detect_local_cpu }, \ + BIG_LITTLE_CPU_SPEC_FUNCTIONS + +# define MCPU_MTUNE_NATIVE_SPECS \ + " %{march=native:%. */ + +#include "config.h" +#include "system.h" + +struct arch_extension +{ + const char *ext; + const char *feat_string; +}; + +#define AARCH64_OPT_EXTENSION(EXT_NAME, FLAGS_ON, FLAGS_OFF, FEATURE_STRING) \ + { EXT_NAME, FEATURE_STRING }, +static struct arch_extension ext_to_feat_string[] = +{ +#include "aarch64-option-extensions.def" +}; +#undef AARCH64_OPT_EXTENSION + + +struct aarch64_core_data +{ + const char* name; + const char* arch; + const char* implementer_id; + const char* part_no; +}; + +#define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \ + { CORE_NAME, #ARCH, IMP, PART }, + +static struct aarch64_core_data cpu_data [] = +{ +#include "aarch64-cores.def" + { NULL, NULL, NULL, NULL } +}; + +#undef AARCH64_CORE + +struct aarch64_arch +{ + const char* id; + const char* name; +}; + +#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \ + { #ARCH, NAME }, + +static struct aarch64_arch aarch64_arches [] = +{ +#include "aarch64-arches.def" + {NULL, NULL} +}; + +#undef AARCH64_ARCH + +/* Return the full architecture name string corresponding to the + identifier ID. */ + +static const char* +get_arch_name_from_id (const char* id) +{ + unsigned int i = 0; + + for (i = 0; aarch64_arches[i].id != NULL; i++) + { + if (strcmp (id, aarch64_arches[i].id) == 0) + return aarch64_arches[i].name; + } + + return NULL; +} + + +/* Check wether the string CORE contains the same CPU part numbers + as BL_STRING. For example CORE="{0xd03, 0xd07}" and BL_STRING="0xd07.0xd03" + should return true. */ + +static bool +valid_bL_string_p (const char** core, const char* bL_string) +{ + return strstr (bL_string, core[0]) != NULL + && strstr (bL_string, core[1]) != NULL; +} + +/* Return true iff ARR contains STR in one of its two elements. */ + +static bool +contains_string_p (const char** arr, const char* str) +{ + bool res = false; + + if (arr[0] != NULL) + { + res = strstr (arr[0], str) != NULL; + if (res) + return res; + + if (arr[1] != NULL) + return strstr (arr[1], str) != NULL; + } + + return false; +} + +/* This will be called by the spec parser in gcc.c when it sees + a %:local_cpu_detect(args) construct. Currently it will be called + with either "arch", "cpu" or "tune" as argument depending on if + -march=native, -mcpu=native or -mtune=native is to be substituted. + + It returns a string containing new command line parameters to be + put at the place of the above two options, depending on what CPU + this is executed. E.g. "-march=armv8-a" on a Cortex-A57 for + -march=native. If the routine can't detect a known processor, + the -march or -mtune option is discarded. + + For -mtune and -mcpu arguments it attempts to detect the CPU or + a big.LITTLE system. + ARGC and ARGV are set depending on the actual arguments given + in the spec. */ + +const char * +host_detect_local_cpu (int argc, const char **argv) +{ + const char *arch_id = NULL; + const char *res = NULL; + static const int num_exts = ARRAY_SIZE (ext_to_feat_string); + char buf[128]; + FILE *f = NULL; + bool arch = false; + bool tune = false; + bool cpu = false; + unsigned int i = 0; + unsigned int core_idx = 0; + const char* imps[2] = { NULL, NULL }; + const char* cores[2] = { NULL, NULL }; + unsigned int n_cores = 0; + unsigned int n_imps = 0; + bool processed_exts = false; + const char *ext_string = ""; + + gcc_assert (argc); + + if (!argv[0]) + goto not_found; + + /* Are we processing -march, mtune or mcpu? */ + arch = strcmp (argv[0], "arch") == 0; + if (!arch) + tune = strcmp (argv[0], "tune") == 0; + + if (!arch && !tune) + cpu = strcmp (argv[0], "cpu") == 0; + + if (!arch && !tune && !cpu) + goto not_found; + + f = fopen ("/proc/cpuinfo", "r"); + + if (f == NULL) + goto not_found; + + /* Look through /proc/cpuinfo to determine the implementer + and then the part number that identifies a particular core. */ + while (fgets (buf, sizeof (buf), f) != NULL) + { + if (strstr (buf, "implementer") != NULL) + { + for (i = 0; cpu_data[i].name != NULL; i++) + if (strstr (buf, cpu_data[i].implementer_id) != NULL + && !contains_string_p (imps, cpu_data[i].implementer_id)) + { + if (n_imps == 2) + goto not_found; + + imps[n_imps++] = cpu_data[i].implementer_id; + + break; + } + continue; + } + + if (strstr (buf, "part") != NULL) + { + for (i = 0; cpu_data[i].name != NULL; i++) + if (strstr (buf, cpu_data[i].part_no) != NULL + && !contains_string_p (cores, cpu_data[i].part_no)) + { + if (n_cores == 2) + goto not_found; + + cores[n_cores++] = cpu_data[i].part_no; + core_idx = i; + arch_id = cpu_data[i].arch; + break; + } + continue; + } + if (!tune && !processed_exts && strstr (buf, "Features") != NULL) + { + for (i = 0; i < num_exts; i++) + { + bool enabled = true; + char *p = NULL; + char *feat_string = concat (ext_to_feat_string[i].feat_string, NULL); + + p = strtok (feat_string, " "); + + while (p != NULL) + { + if (strstr (buf, p) == NULL) + { + enabled = false; + break; + } + p = strtok (NULL, " "); + } + ext_string = concat (ext_string, "+", enabled ? "" : "no", + ext_to_feat_string[i].ext, NULL); + } + processed_exts = true; + } + } + + fclose (f); + f = NULL; + + /* Weird cpuinfo format that we don't know how to handle. */ + if (n_cores == 0 || n_cores > 2 || n_imps != 1) + goto not_found; + + if (arch && !arch_id) + goto not_found; + + if (arch) + { + const char* arch_name = get_arch_name_from_id (arch_id); + + /* We got some arch indentifier that's not in aarch64-arches.def? */ + if (!arch_name) + goto not_found; + + res = concat ("-march=", arch_name, NULL); + } + /* We have big.LITTLE. */ + else if (n_cores == 2) + { + for (i = 0; cpu_data[i].name != NULL; i++) + { + if (strchr (cpu_data[i].part_no, '.') != NULL + && strncmp (cpu_data[i].implementer_id, imps[0], strlen (imps[0]) - 1) == 0 + && valid_bL_string_p (cores, cpu_data[i].part_no)) + { + res = concat ("-m", cpu ? "cpu" : "tune", "=", cpu_data[i].name, NULL); + break; + } + } + if (!res) + goto not_found; + } + /* The simple, non-big.LITTLE case. */ + else + { + if (strncmp (cpu_data[core_idx].implementer_id, imps[0], + strlen (imps[0]) - 1) != 0) + goto not_found; + + res = concat ("-m", cpu ? "cpu" : "tune", "=", + cpu_data[core_idx].name, NULL); + } + + if (tune) + return res; + + res = concat (res, ext_string, NULL); + + return res; + +not_found: + { + /* If detection fails we ignore the option. + Clean up and return empty string. */ + + if (f) + fclose (f); + + return ""; + } +} + diff --git a/gcc/config/aarch64/x-aarch64 b/gcc/config/aarch64/x-aarch64 new file mode 100644 index 0000000..8c09e04 --- /dev/null +++ b/gcc/config/aarch64/x-aarch64 @@ -0,0 +1,3 @@ +driver-aarch64.o: $(srcdir)/config/aarch64/driver-aarch64.c \ + $(CONFIG_H) $(SYSTEM_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index e2918cb..5787524 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -12318,8 +12318,12 @@ This involves inserting a NOP instruction between memory instructions and Specify the name of the target architecture, optionally suffixed by one or more feature modifiers. This option has the form @option{-march=@var{arch}@r{@{}+@r{[}no@r{]}@var{feature}@r{@}*}}, where the -only permissible value for @var{arch} is @samp{armv8-a}. The permissible -values for @var{feature} are documented in the sub-section below. +only permissible value for @var{arch} is @samp{armv8-a}. +The permissible values for @var{feature} are documented in the sub-section +below. Additionally on native AArch64 GNU/Linux systems the value +@samp{native} is available. This option causes the compiler to pick the +architecture of the host system. If the compiler is unable to recognize the +architecture of the host system this option has no effect. Where conflicting feature modifiers are specified, the right-most feature is used. @@ -12343,6 +12347,13 @@ Additionally, this option can specify that GCC should tune the performance of the code for a big.LITTLE system. Permissible values for this option are: @samp{cortex-a57.cortex-a53}, @samp{cortex-a72.cortex-a53}. +Additionally on native AArch64 GNU/Linux systems the value @samp{native} +is available. +This option causes the compiler to pick the architecture of and tune the +performance of the code for the processor of the host system. +If the compiler is unable to recognize the processor of the host system +this option has no effect. + Where none of @option{-mtune=}, @option{-mcpu=} or @option{-march=} are specified, the code is tuned to perform well across a range of target processors. @@ -12355,7 +12366,11 @@ Specify the name of the target processor, optionally suffixed by one or more feature modifiers. This option has the form @option{-mcpu=@var{cpu}@r{@{}+@r{[}no@r{]}@var{feature}@r{@}*}}, where the permissible values for @var{cpu} are the same as those available for -@option{-mtune}. +@option{-mtune}. Additionally on native AArch64 GNU/Linux systems the +value @samp{native} is available. +This option causes the compiler to tune the performance of the code for the +processor of the host system. If the compiler is unable to recognize the +processor of the host system this option has no effect. The permissible values for @var{feature} are documented in the sub-section below.