* [patch] Detect cache size with -mtune=native
@ 2007-02-26 16:01 Zdenek Dvorak
2007-02-27 16:14 ` Jan Hubicka
0 siblings, 1 reply; 5+ messages in thread
From: Zdenek Dvorak @ 2007-02-26 16:01 UTC (permalink / raw)
To: gcc-patches
Hello,
this patch extends -mtune=native to detect the size of the l1 cache and
of the cache line, and to pass it to the compiler by params. I have
tested this on several machines to that I have access, and the results
seem to be correct (but especially the intel way of describing the cache
sizes is quite ugly.
Bootstrapped & regtested on i686.
Zdenek
* config/i386/driver-i386.c (describe_cache, detect_caches_amd,
decode_caches_intel, detect_caches_intel): New functions.
(host_detect_local_cpu): Use detect_caches_amd and
detect_caches_intel.
Index: config/i386/driver-i386.c
===================================================================
*** config/i386/driver-i386.c (revision 122257)
--- config/i386/driver-i386.c (working copy)
*************** const char *host_detect_local_cpu (int a
*** 47,52 ****
--- 47,181 ----
#define bit_3DNOWP (1 << 30)
#define bit_LM (1 << 29)
+ /* Returns parameters that describe L1_ASSOC associative cache of size
+ L1_SIZEKB with lines of size L1_LINE. */
+
+ static char *
+ describe_cache (unsigned l1_sizekb, unsigned l1_line, unsigned l1_assoc)
+ {
+ char size[1000], line[1000];
+ unsigned reduced_size, loss, size_in_lines, i;
+
+ /* We approximate the effects of associativity by considering the cache to be
+ (1 - 1/2^assoc) times smaller. */
+ reduced_size = l1_sizekb * 1024;
+ loss = reduced_size;
+ for (i = 0; i < l1_assoc && loss > 0; i++)
+ loss /= 2;
+ reduced_size -= loss;
+
+ size_in_lines = reduced_size / l1_line;
+
+ sprintf (size, "--param l1-cache-size=%u", size_in_lines);
+ sprintf (line, "--param l1-cache-line-size=%u", l1_line);
+
+ return concat (size, " ", line, " ", NULL);
+ }
+
+ /* Returns the description of caches for an AMD processor. */
+
+ static char *
+ detect_caches_amd (unsigned max_ext_level)
+ {
+ unsigned eax, ebx, ecx, edx;
+ unsigned l1_sizekb, l1_line, l1_assoc;
+
+ if (max_ext_level < 0x80000005)
+ return NULL;
+
+ cpuid (0x80000005, eax, ebx, ecx, edx);
+
+ l1_line = ecx & 0xff;
+ l1_sizekb = (ecx >> 24) & 0xff;
+ l1_assoc = (ecx >> 16) & 0xff;
+
+ return describe_cache (l1_sizekb, l1_line, l1_assoc);
+ }
+
+ /* Stores the size of the L1 cache and cache line, and the associativity
+ of the cache according to REG to L1_SIZEKB, L1_LINE and L1_ASSOC. */
+
+ static void
+ decode_caches_intel (unsigned reg, unsigned *l1_sizekb, unsigned *l1_line,
+ unsigned *l1_assoc)
+ {
+ unsigned i, val;
+
+ if (((reg >> 31) & 1) != 0)
+ return;
+
+ for (i = 0; i < 4; i++)
+ {
+ val = reg & 0xff;
+ reg >>= 8;
+
+ switch (val)
+ {
+ case 0xa:
+ *l1_sizekb = 8;
+ *l1_line = 32;
+ *l1_assoc = 2;
+ break;
+ case 0xc:
+ *l1_sizekb = 16;
+ *l1_line = 32;
+ *l1_assoc = 4;
+ break;
+ case 0x2c:
+ *l1_sizekb = 32;
+ *l1_line = 64;
+ *l1_assoc = 8;
+ break;
+ case 0x60:
+ *l1_sizekb = 16;
+ *l1_line = 64;
+ *l1_assoc = 8;
+ break;
+ case 0x66:
+ *l1_sizekb = 8;
+ *l1_line = 64;
+ *l1_assoc = 4;
+ break;
+ case 0x67:
+ *l1_sizekb = 16;
+ *l1_line = 64;
+ *l1_assoc = 4;
+ break;
+ case 0x68:
+ *l1_sizekb = 32;
+ *l1_line = 64;
+ *l1_assoc = 4;
+ break;
+
+ default:
+ break;
+ }
+ }
+ }
+
+ /* Returns the description of caches for an intel processor. */
+
+ static char *
+ detect_caches_intel (unsigned max_level)
+ {
+ unsigned eax, ebx, ecx, edx;
+ unsigned l1_sizekb = 0, l1_line = 0, assoc = 0;
+
+ if (max_level < 2)
+ return NULL;
+
+ cpuid (2, eax, ebx, ecx, edx);
+
+ decode_caches_intel (eax, &l1_sizekb, &l1_line, &assoc);
+ decode_caches_intel (ebx, &l1_sizekb, &l1_line, &assoc);
+ decode_caches_intel (ecx, &l1_sizekb, &l1_line, &assoc);
+ decode_caches_intel (edx, &l1_sizekb, &l1_line, &assoc);
+ if (!l1_sizekb)
+ return (char *) "";
+
+ return describe_cache (l1_sizekb, l1_line, assoc);
+ }
+
/* This will be called by the spec parser in gcc.c when it sees
a %:local_cpu_detect(args) construct. Currently it will be called
with either "arch" or "tune" as argument depending on if -march=native
*************** const char *host_detect_local_cpu (int a
*** 62,67 ****
--- 191,197 ----
const char *host_detect_local_cpu (int argc, const char **argv)
{
const char *cpu = NULL;
+ char *cache = (char *) "";
enum processor_type processor = PROCESSOR_I386;
unsigned int eax, ebx, ecx, edx;
unsigned int max_level;
*************** const char *host_detect_local_cpu (int a
*** 126,131 ****
--- 256,269 ----
is_amd = vendor == *(unsigned int*)"Auth";
+ if (!arch)
+ {
+ if (is_amd)
+ cache = detect_caches_amd (ext_level);
+ else if (vendor == *(unsigned int*)"Genu")
+ cache = detect_caches_intel (max_level);
+ }
+
if (is_amd)
{
if (has_mmx)
*************** const char *host_detect_local_cpu (int a
*** 283,289 ****
}
done:
! return concat ("-m", argv[0], "=", cpu, NULL);
}
#else
/* If we aren't compiling with GCC we just provide a minimal
--- 421,427 ----
}
done:
! return concat (cache, "-m", argv[0], "=", cpu, NULL);
}
#else
/* If we aren't compiling with GCC we just provide a minimal
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [patch] Detect cache size with -mtune=native
2007-02-26 16:01 [patch] Detect cache size with -mtune=native Zdenek Dvorak
@ 2007-02-27 16:14 ` Jan Hubicka
2007-03-01 10:06 ` Zdenek Dvorak
0 siblings, 1 reply; 5+ messages in thread
From: Jan Hubicka @ 2007-02-27 16:14 UTC (permalink / raw)
To: Zdenek Dvorak; +Cc: gcc-patches
> Hello,
>
> this patch extends -mtune=native to detect the size of the l1 cache and
> of the cache line, and to pass it to the compiler by params. I have
> tested this on several machines to that I have access, and the results
> seem to be correct (but especially the intel way of describing the cache
> sizes is quite ugly.
>
> Bootstrapped & regtested on i686.
>
> Zdenek
>
> * config/i386/driver-i386.c (describe_cache, detect_caches_amd,
> decode_caches_intel, detect_caches_intel): New functions.
> (host_detect_local_cpu): Use detect_caches_amd and
> detect_caches_intel.
OK, thanks, just two minor questions.
> + /* We approximate the effects of associativity by considering the cache to be
> + (1 - 1/2^assoc) times smaller. */
Hmm, is this realistic? I know that we don't have page coloring but
still for continuous arrays we are interested in, the associativity of
caches should not be major limiting factor..
> + char *cache = (char *) "";
Is there reason why cache chan't be const?
BTW Uros, I think your i386 maintainership also subsume x86-64, since it
is just subtarget of i386 backend.
Honza
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [patch] Detect cache size with -mtune=native
2007-02-27 16:14 ` Jan Hubicka
@ 2007-03-01 10:06 ` Zdenek Dvorak
0 siblings, 0 replies; 5+ messages in thread
From: Zdenek Dvorak @ 2007-03-01 10:06 UTC (permalink / raw)
To: Jan Hubicka; +Cc: gcc-patches
Hello,
> > this patch extends -mtune=native to detect the size of the l1 cache and
> > of the cache line, and to pass it to the compiler by params. I have
> > tested this on several machines to that I have access, and the results
> > seem to be correct (but especially the intel way of describing the cache
> > sizes is quite ugly.
> >
> > Bootstrapped & regtested on i686.
> >
> > Zdenek
> >
> > * config/i386/driver-i386.c (describe_cache, detect_caches_amd,
> > decode_caches_intel, detect_caches_intel): New functions.
> > (host_detect_local_cpu): Use detect_caches_amd and
> > detect_caches_intel.
>
> OK, thanks, just two minor questions.
> > + /* We approximate the effects of associativity by considering the cache to be
> > + (1 - 1/2^assoc) times smaller. */
>
> Hmm, is this realistic? I know that we don't have page coloring but
> still for continuous arrays we are interested in, the associativity of
> caches should not be major limiting factor..
you are right, it certainly depends on the context; I guess that in
loop
for (x)
for (i)
use (a[c][i]);
the cache will behave the same way regardless of the associativity, while in
for (x)
for (i)
use (a[i][c]);
the 1-1/2^assoc factor might be realistic. I will pass the full size of
the cache to gcc for now, if someone implements cache modeling, he may
decide how to deal with it then.
Zdenek
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [patch] Detect cache size with -mtune=native
2007-02-26 18:05 Uros Bizjak
@ 2007-02-27 13:49 ` Zdenek Dvorak
0 siblings, 0 replies; 5+ messages in thread
From: Zdenek Dvorak @ 2007-02-27 13:49 UTC (permalink / raw)
To: Uros Bizjak; +Cc: GCC Patches, jh
Hello,
> >this patch extends -mtune=native to detect the size of the l1 cache and
> >of the cache line, and to pass it to the compiler by params. I have
> >tested this on several machines to that I have access, and the results
> >seem to be correct (but especially the intel way of describing the cache
> >sizes is quite ugly.
>
> >Bootstrapped & regtested on i686.
>
> Did you bootstrap gcc with BOOT_CFLAGS="... -mtune/-march=native ..."
> ?
not originally, I will try it now (although I do not expect any changes,
for the reasons below).
> There were some interesting bugs that blocked this for certain
> targets, but they should all be fixed in recent SVN.
>
> Is there any measurable gain on bootstrap time using this patch?
No; at the moment, the information about cache size is unused. It is
used in reuse analysis pass that I am going to submit soon. Later, it
also should be used in the heuristics to determine usefulness of
high-level loop transformations, although I think nobody works on that
just now.
Zdenek
> Otherwise, I think this is a nice addition to have for i386 targets.
> But let's give Jan the last word, as it also affects x86_64.
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [patch] Detect cache size with -mtune=native
@ 2007-02-26 18:05 Uros Bizjak
2007-02-27 13:49 ` Zdenek Dvorak
0 siblings, 1 reply; 5+ messages in thread
From: Uros Bizjak @ 2007-02-26 18:05 UTC (permalink / raw)
To: GCC Patches; +Cc: Zdenek Dvorak
Hello!
> this patch extends -mtune=native to detect the size of the l1 cache and
> of the cache line, and to pass it to the compiler by params. I have
> tested this on several machines to that I have access, and the results
> seem to be correct (but especially the intel way of describing the cache
> sizes is quite ugly.
> Bootstrapped & regtested on i686.
Did you bootstrap gcc with BOOT_CFLAGS="... -mtune/-march=native ..."
? There were some interesting bugs that blocked this for certain
targets, but they should all be fixed in recent SVN.
Is there any measurable gain on bootstrap time using this patch?
Otherwise, I think this is a nice addition to have for i386 targets.
But let's give Jan the last word, as it also affects x86_64.
Thanks,
Uros.
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2007-03-01 10:06 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-02-26 16:01 [patch] Detect cache size with -mtune=native Zdenek Dvorak
2007-02-27 16:14 ` Jan Hubicka
2007-03-01 10:06 ` Zdenek Dvorak
2007-02-26 18:05 Uros Bizjak
2007-02-27 13:49 ` Zdenek Dvorak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).