public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-5769] x86: Speed up target attribute handling by using a cache
@ 2021-12-03 10:09 Jakub Jelinek
  0 siblings, 0 replies; only message in thread
From: Jakub Jelinek @ 2021-12-03 10:09 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:7e71909af2cf3aeec9bed4f6a3cc42c1d17cd661

commit r12-5769-g7e71909af2cf3aeec9bed4f6a3cc42c1d17cd661
Author: Jakub Jelinek <jakub@redhat.com>
Date:   Fri Dec 3 11:07:44 2021 +0100

    x86: Speed up target attribute handling by using a cache
    
    The target attribute handling is very expensive and for the common case
    from x86intrin.h where many functions get implicitly the same target
    attribute, we can speed up compilation a lot by caching it.
    
    The following patches both create a single entry cache, where they cache
    for a particular target attribute argument list the resulting
    DECL_FUNCTION_SPECIFIC_TARGET and DECL_FUNCTION_SPECIFIC_OPTIMIZATION
    values from ix86_valid_target_attribute_p and use the cache if the
    args are the same as last time and we start either from NULL values
    of those, or from the recorded values for those from last time.
    
    Compiling a simple:
     #include <x86intrin.h>
    
     int i;
    testcase with ./cc1 -quiet -O2 -isystem include/ test.c
    takes on my WS without the patches ~0.392s and with either of the
    patches ~0.182s, i.e. roughly half the time as before.
    For ./cc1plus -quiet -O2 -isystem include/ test.c
    it is slightly worse, the speed up is from ~0.613s to ~0.403s.
    
    The difference between the 2 patches is that the first one uses copy_list
    while the second one uses a vec, so I think the second one has the advantage
    of creating less GC garbage.
    I've verified both patches achieve the same content of those
    DECL_FUNCTION_SPECIFIC_TARGET and DECL_FUNCTION_SPECIFIC_OPTIMIZATION
    nodes as before on x86intrin.h by doing debug_tree on those and comparing
    the stderr from without these patches to with these patches.
    
    2021-12-03  Jakub Jelinek  <jakub@redhat.com>
    
            * attribs.h (simple_cst_list_equal): Declare.
            * attribs.c (simple_cst_list_equal): No longer static.
            * config/i386/i386-options.c (target_attribute_cache): New variable.
            (ix86_valid_target_attribute_p): Cache DECL_FUNCTION_SPECIFIC_TARGET
            and DECL_FUNCTION_SPECIFIC_OPTIMIZATION based on args.

Diff:
---
 gcc/attribs.c                  |  2 +-
 gcc/attribs.h                  |  1 +
 gcc/config/i386/i386-options.c | 26 +++++++++++++++++++++++++-
 3 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/gcc/attribs.c b/gcc/attribs.c
index c252f5af07b..32c941506f4 100644
--- a/gcc/attribs.c
+++ b/gcc/attribs.c
@@ -1285,7 +1285,7 @@ cmp_attrib_identifiers (const_tree attr1, const_tree attr2)
 /* Compare two constructor-element-type constants.  Return 1 if the lists
    are known to be equal; otherwise return 0.  */
 
-static bool
+bool
 simple_cst_list_equal (const_tree l1, const_tree l2)
 {
   while (l1 != NULL_TREE && l2 != NULL_TREE)
diff --git a/gcc/attribs.h b/gcc/attribs.h
index 73f0ccc24b8..f5899d83c0b 100644
--- a/gcc/attribs.h
+++ b/gcc/attribs.h
@@ -60,6 +60,7 @@ extern tree build_type_attribute_variant (tree, tree);
 extern tree build_decl_attribute_variant (tree, tree);
 extern tree build_type_attribute_qual_variant (tree, tree, int);
 
+extern bool simple_cst_list_equal (const_tree, const_tree);
 extern bool attribute_value_equal (const_tree, const_tree);
 
 /* Return 0 if the attributes for two types are incompatible, 1 if they
diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c
index a4da8331b8b..f971e03abad 100644
--- a/gcc/config/i386/i386-options.c
+++ b/gcc/config/i386/i386-options.c
@@ -1403,6 +1403,8 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args,
   return t;
 }
 
+static GTY(()) tree target_attribute_cache[3];
+
 /* Hook to validate attribute((target("string"))).  */
 
 bool
@@ -1423,6 +1425,19 @@ ix86_valid_target_attribute_p (tree fndecl,
       && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
     return true;
 
+  if ((DECL_FUNCTION_SPECIFIC_TARGET (fndecl) == target_attribute_cache[1]
+       || DECL_FUNCTION_SPECIFIC_TARGET (fndecl) == NULL_TREE)
+      && (DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl)
+	  == target_attribute_cache[2]
+	  || DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) == NULL_TREE)
+      && simple_cst_list_equal (args, target_attribute_cache[0]))
+    {
+      DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = target_attribute_cache[1];
+      DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl)
+	= target_attribute_cache[2];
+      return true;
+    }
+
   tree old_optimize = build_optimization_node (&global_options,
 					       &global_options_set);
 
@@ -1459,8 +1474,17 @@ ix86_valid_target_attribute_p (tree fndecl,
   if (new_target == error_mark_node)
     ret = false;
 
-  else if (fndecl && new_target)
+  else if (new_target)
     {
+      if (DECL_FUNCTION_SPECIFIC_TARGET (fndecl) == NULL_TREE
+	  && DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) == NULL_TREE)
+	{
+	  target_attribute_cache[0] = copy_list (args);
+	  target_attribute_cache[1] = new_target;
+	  target_attribute_cache[2]
+	    = old_optimize != new_optimize ? new_optimize : NULL_TREE;
+	}
+
       DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
 
       if (old_optimize != new_optimize)


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-12-03 10:09 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-03 10:09 [gcc r12-5769] x86: Speed up target attribute handling by using a cache Jakub Jelinek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).