From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 19014 invoked by alias); 23 Jul 2019 08:30:32 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 18831 invoked by uid 89); 23 Jul 2019 08:30:17 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-17.3 required=5.0 tests=AWL,BAYES_00,GIT_PATCH_0,GIT_PATCH_1,GIT_PATCH_2,GIT_PATCH_3,SPF_PASS autolearn=ham version=3.3.1 spammy=online X-HELO: mx1.suse.de Received: from mx2.suse.de (HELO mx1.suse.de) (195.135.220.15) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Tue, 23 Jul 2019 08:30:14 +0000 Received: from relay2.suse.de (unknown [195.135.220.254]) by mx1.suse.de (Postfix) with ESMTP id 444EFAF7C; Tue, 23 Jul 2019 08:30:08 +0000 (UTC) From: =?UTF-8?Q?Martin_Li=c5=a1ka?= Subject: [PATCH] Come up with -flto=auto option. To: gcc-patches@gcc.gnu.org Cc: Jan Hubicka , Michael Matz , Richard Biener Message-ID: <95f20c0a-81a3-2319-326b-3c5baf71e2d1@suse.cz> Date: Tue, 23 Jul 2019 08:55:00 -0000 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Thunderbird/60.8.0 MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="------------CD4D252274F2DD186D6B6F97" X-IsSubscribed: yes X-SW-Source: 2019-07/txt/msg01489.txt.bz2 This is a multi-part message in MIME format. --------------CD4D252274F2DD186D6B6F97 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 7bit Content-length: 616 Hi. As we as openSUSE started using -flto, I see it very handy to have an option value that will automatically detect number of cores that can be used for parallel LTRANS phase. Thoughts? gcc/ChangeLog: 2019-07-23 Martin Liska * doc/invoke.texi: Document the new option value. * lto-wrapper.c (cpuset_popcount): New function is a copy of libgomp/config/linux/proc.c. (init_num_threads): Likewise. (run_gcc): Support -flto=auto. --- gcc/doc/invoke.texi | 3 ++ gcc/lto-wrapper.c | 124 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 126 insertions(+), 1 deletion(-) --------------CD4D252274F2DD186D6B6F97 Content-Type: text/x-patch; name="0001-Come-up-with-flto-auto-option.patch" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="0001-Come-up-with-flto-auto-option.patch" Content-length: 5275 diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 77a2d561e38..58656fbe1e1 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -10398,6 +10398,9 @@ parallel jobs by utilizing an installed @command{make} program. The environment variable @env{MAKE} may be used to override the program used. The default value for @var{n} is 1. +You can specify @var{auto} to automatically detect number of +cores that will determine the number of parallel jobs. + You can also specify @option{-flto=jobserver} to use GNU make's job server mode to determine the number of parallel jobs. This is useful when the Makefile calling GCC is already executing in parallel. diff --git a/gcc/lto-wrapper.c b/gcc/lto-wrapper.c index 946897726d0..5451285f896 100644 --- a/gcc/lto-wrapper.c +++ b/gcc/lto-wrapper.c @@ -1110,6 +1110,110 @@ cmp_priority (const void *a, const void *b) return *((const int *)b)-*((const int *)a); } +/* Number of CPUs that can be used for parallel LTRANS phase. */ + +static unsigned long nthreads_var = 0; + +#ifdef HAVE_PTHREAD_AFFINITY_NP +unsigned long cpuset_size; +static unsigned long get_cpuset_size; +cpu_set_t *cpusetp; + +unsigned long +static cpuset_popcount (unsigned long cpusetsize, cpu_set_t *cpusetp) +{ +#ifdef CPU_COUNT_S + /* glibc 2.7 and above provide a macro for this. */ + return CPU_COUNT_S (cpusetsize, cpusetp); +#else +#ifdef CPU_COUNT + if (cpusetsize == sizeof (cpu_set_t)) + /* glibc 2.6 and above provide a macro for this. */ + return CPU_COUNT (cpusetp); +#endif + size_t i; + unsigned long ret = 0; + STATIC_ASSERT (sizeof (cpusetp->__bits[0]) == sizeof (unsigned long int)); + for (i = 0; i < cpusetsize / sizeof (cpusetp->__bits[0]); i++) + { + unsigned long int mask = cpusetp->__bits[i]; + if (mask == 0) + continue; + ret += __builtin_popcountl (mask); + } + return ret; +#endif +} +#endif + +/* At startup, determine the default number of threads. It would seem + this should be related to the number of cpus online. */ + +static void +init_num_threads (void) +{ +#ifdef HAVE_PTHREAD_AFFINITY_NP +#if defined (_SC_NPROCESSORS_CONF) && defined (CPU_ALLOC_SIZE) + cpuset_size = sysconf (_SC_NPROCESSORS_CONF); + cpuset_size = CPU_ALLOC_SIZE (cpuset_size); +#else + cpuset_size = sizeof (cpu_set_t); +#endif + + cpusetp = (cpu_set_t *) xmalloc (gomp_cpuset_size); + do + { + int ret = pthread_getaffinity_np (pthread_self (), gomp_cpuset_size, + cpusetp); + if (ret == 0) + { + /* Count only the CPUs this process can use. */ + nthreads_var = cpuset_popcount (cpuset_size, cpusetp); + if (nthreads_var == 0) + break; + get_cpuset_size = cpuset_size; +#ifdef CPU_ALLOC_SIZE + unsigned long i; + for (i = cpuset_size * 8; i; i--) + if (CPU_ISSET_S (i - 1, cpuset_size, cpusetp)) + break; + cpuset_size = CPU_ALLOC_SIZE (i); +#endif + return; + } + if (ret != EINVAL) + break; +#ifdef CPU_ALLOC_SIZE + if (cpuset_size < sizeof (cpu_set_t)) + cpuset_size = sizeof (cpu_set_t); + else + cpuset_size = cpuset_size * 2; + if (cpuset_size < 8 * sizeof (cpu_set_t)) + cpusetp + = (cpu_set_t *) realloc (cpusetp, cpuset_size); + else + { + /* Avoid fatal if too large memory allocation would be + requested, e.g. kernel returning EINVAL all the time. */ + void *p = realloc (cpusetp, cpuset_size); + if (p == NULL) + break; + cpusetp = (cpu_set_t *) p; + } +#else + break; +#endif + } + while (1); + cpuset_size = 0; + nthreads_var = 1; + free (cpusetp); + cpusetp = NULL; +#endif +#ifdef _SC_NPROCESSORS_ONLN + nthreads_var = sysconf (_SC_NPROCESSORS_ONLN); +#endif +} /* Execute gcc. ARGC is the number of arguments. ARGV contains the arguments. */ @@ -1124,6 +1228,7 @@ run_gcc (unsigned argc, char *argv[]) const char *collect_gcc, *collect_gcc_options; int parallel = 0; int jobserver = 0; + int auto_parallel = 0; bool no_partition = false; struct cl_decoded_option *fdecoded_options = NULL; struct cl_decoded_option *offload_fdecoded_options = NULL; @@ -1251,6 +1356,11 @@ run_gcc (unsigned argc, char *argv[]) jobserver = 1; parallel = 1; } + else if (strcmp (option->arg, "auto") == 0) + { + auto_parallel = 1; + parallel = 1; + } else { parallel = atoi (option->arg); @@ -1291,6 +1401,7 @@ run_gcc (unsigned argc, char *argv[]) { lto_mode = LTO_MODE_LTO; jobserver = 0; + auto_parallel = 0; parallel = 0; } @@ -1485,6 +1596,16 @@ cont1: if (jobserver) obstack_ptr_grow (&argv_obstack, xstrdup ("-fwpa=jobserver")); + else if (auto_parallel) + { + char buf[256]; + init_num_threads (); + if (verbose) + fprintf (stderr, "LTO parallelism level set to %ld\n", + nthreads_var); + sprintf (buf, "-fwpa=%ld", nthreads_var); + obstack_ptr_grow (&argv_obstack, xstrdup (buf)); + } else if (parallel > 1) { char buf[256]; @@ -1692,7 +1813,8 @@ cont: i = 3; if (!jobserver) { - snprintf (jobs, 31, "-j%d", parallel); + snprintf (jobs, 31, "-j%ld", + auto_parallel ? nthreads_var : parallel); new_argv[i++] = jobs; } new_argv[i++] = "all"; --------------CD4D252274F2DD186D6B6F97--