Ping [PATCH V2] libcpp: Optimize #pragma once with a hash table [PR58770]

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

From: Paul Hollinsky <paulhollinsky@gmail.com>
To: gcc-patches@gcc.gnu.org
Cc: Jason Merrill <jason@redhat.com>,
	David Malcom <dmalcolm@redhat.com>, Per Bothner <per@bothner.com>,
	Nathan Sidwell <nathan@acm.org>, Tom Tromey <tom@tromey.com>
Subject: Ping [PATCH V2] libcpp: Optimize #pragma once with a hash table [PR58770]
Date: Fri, 19 Aug 2022 13:27:34 -0700	[thread overview]
Message-ID: <20220819202734.wvourvrje3x5vfla@perseverance.local> (raw)
In-Reply-To: <20220801051839.1454-1-paulhollinsky@gmail.com>

Hi all,

Would love some feedback on this patch!

Thanks,
Paul

On Mon, Aug 01, 2022 at 05:18:40AM +0000, Paul Hollinsky wrote:
> Rather than traversing the all_files linked list for every include,
> this factors out the quick idempotency checks (modification time
> and size) to be the keys in a hash table so we can find matching
> files quickly.
> 
> The hash table value type is a linked list, in case more than one
> file matches the quick check.
> 
> The table is only built if a once-only file is seen, so include
> guard performance is not affected.
> 
> My laptop would previously complete Ricardo's benchmark from the
> PR in ~1.1s using #pragma once, and ~0.35s using include guards.
> 
> After this change, both benchmarks now complete in ~0.35s. I did
> have to randomize the modification dates on the benchmark headers
> so the files did not all end up in the same hash table list, but
> that would likely not come up outside of the contrived benchmark.
> 
> I bootstrapped and ran the testsuite on x86_64 Darwin, as well as
> ppc64le and aarch64 Linux.
> 
> libcpp/ChangeLog:
> 
> 	PR preprocessor/58770
> 	* internal.h: Add hash table for #pragma once
> 	* files.cc: Optimize #pragma once with the hash table
> 
> Signed-off-by: Paul Hollinsky <paulhollinsky@gmail.com>
> ---
>  libcpp/files.cc   | 116 +++++++++++++++++++++++++++++++++++++++++++---
>  libcpp/internal.h |   3 ++
>  2 files changed, 112 insertions(+), 7 deletions(-)
> 
> diff --git a/libcpp/files.cc b/libcpp/files.cc
> index 24208f7b0f8..d4ffd77578e 100644
> --- a/libcpp/files.cc
> +++ b/libcpp/files.cc
> @@ -167,6 +167,33 @@ struct file_hash_entry_pool
>    struct cpp_file_hash_entry pool[FILE_HASH_POOL_SIZE];
>  };
>  
> +/* A set of attributes designed to quickly identify obviously different files
> +   in a hashtable.  Just in case there are collisions, we still maintain a
> +   list.  These sub-lists can then be checked for #pragma once rather than
> +   interating through all_files.  */
> +struct file_quick_idempotency_attrs
> +{
> +  file_quick_idempotency_attrs(const _cpp_file *f)
> +    : mtime(f->st.st_mtime), size(f->st.st_size) {}
> +
> +  time_t mtime;
> +  off_t size;
> +
> +  static hashval_t hash (/* _cpp_file* */ const void *p);
> +};
> +
> +/* Sub-list of very similar files kept in a hashtable to check for #pragma
> +   once.  */
> +struct file_sublist
> +{
> +  _cpp_file *f;
> +  file_sublist *next;
> +
> +  static int eq (/* _cpp_file* */ const void *p,
> +		 /* file_sublist* */ const void *q);
> +  static void del (/* file_sublist* */ void *p);
> +};
> +
>  static bool open_file (_cpp_file *file);
>  static bool pch_open_file (cpp_reader *pfile, _cpp_file *file,
>  			   bool *invalid_pch);
> @@ -849,17 +876,17 @@ has_unique_contents (cpp_reader *pfile, _cpp_file *file, bool import,
>    if (!pfile->seen_once_only)
>      return true;
>  
> -  /* We may have read the file under a different name.  Look
> -     for likely candidates and compare file contents to be sure.  */
> -  for (_cpp_file *f = pfile->all_files; f; f = f->next_file)
> +  /* We may have read the file under a different name.  We've kept
> +     similar looking files in this lists under this hash table, so
> +     check those more thoroughly.  */
> +  void* ent = htab_find(pfile->pragma_once_files, file);
> +  for (file_sublist *e = static_cast<file_sublist *> (ent); e; e = e->next)
>      {
> +      _cpp_file *f = e->f;
>        if (f == file)
>  	continue; /* It'sa me!  */
>  
> -      if ((import || f->once_only)
> -	  && f->err_no == 0
> -	  && f->st.st_mtime == file->st.st_mtime
> -	  && f->st.st_size == file->st.st_size)
> +      if ((import || f->once_only) && f->err_no == 0)
>  	{
>  	  _cpp_file *ref_file;
>  
> @@ -895,6 +922,38 @@ has_unique_contents (cpp_reader *pfile, _cpp_file *file, bool import,
>    return true;
>  }
>  
> +/* Add the given file to the #pragma once table so it can be
> +   quickly identified and excluded the next time it's seen.  */
> +static void
> +update_pragma_once_table (cpp_reader *pfile, _cpp_file *file)
> +{
> +  void **slot = htab_find_slot (pfile->pragma_once_files, file, INSERT);
> +  if (slot)
> +    {
> +      if (!*slot)
> +	*slot = xcalloc (1, sizeof (file_sublist));
> +
> +      file_sublist *e = static_cast<file_sublist *> (*slot);
> +      while (e->f)
> +	{
> +	  if (!e->next)
> +	    {
> +	      void *new_sublist = xcalloc(1, sizeof (file_sublist));
> +	      e->next = static_cast<file_sublist *> (new_sublist);
> +	    }
> +	  e = e->next;
> +	}
> +
> +      e->f = file;
> +    }
> +  else
> +    {
> +      cpp_error (pfile, CPP_DL_ERROR,
> +		 "Unable to create #pragma once table space for %s",
> +		 _cpp_get_file_name (file));
> +    }
> +}
> +
>  /* Place the file referenced by FILE into a new buffer on the buffer
>     stack if possible.  Returns true if a buffer is stacked.  Use LOC
>     for any diagnostics.  */
> @@ -950,6 +1009,9 @@ _cpp_stack_file (cpp_reader *pfile, _cpp_file *file, include_type type,
>        if (!has_unique_contents (pfile, file, type == IT_IMPORT, loc))
>  	return false;
>  
> +      if (pfile->seen_once_only && file->once_only)
> +	update_pragma_once_table (pfile, file);
> +
>        if (pfile->buffer && file->dir)
>  	sysp = MAX (pfile->buffer->sysp, file->dir->sysp);
>  
> @@ -1434,6 +1496,41 @@ nonexistent_file_hash_eq (const void *p, const void *q)
>    return filename_cmp ((const char *) p, (const char *) q) == 0;
>  }
>  
> +/* Hasher for the #pragma once hash table.  */
> +hashval_t
> +file_quick_idempotency_attrs::hash (const void *p)
> +{
> +  const _cpp_file *f = static_cast<const _cpp_file *> (p);
> +  file_quick_idempotency_attrs kh (f);
> +  return iterative_hash_object (kh, 0);
> +}
> +
> +/* Equality checker for the #pragma once hash table.  */
> +int
> +file_sublist::eq (const void *p, const void *q)
> +{
> +  /* Just check if the file q would be in the list p. Every
> +     file in the list should have these attributes the same,
> +     so we don't need to traverse.  */
> +  const file_sublist *e = static_cast<const file_sublist *> (p);
> +  const _cpp_file *f = static_cast<const _cpp_file *> (q);
> +  return f->st.st_mtime == e->f->st.st_mtime
> +	 && f->st.st_size == e->f->st.st_size;
> +}
> +
> +/* Cleanup for a file sub-list. Does not free the _cpp_file
> +   structures within.  */
> +void
> +file_sublist::del (void *p)
> +{
> +  file_sublist *e = static_cast<file_sublist *> (p);
> +  if (e->next)
> +    {
> +      file_sublist::del (e->next);
> +      free (e->next);
> +    }
> +}
> +
>  /* Initialize everything in this source file.  */
>  void
>  _cpp_init_files (cpp_reader *pfile)
> @@ -1442,6 +1539,10 @@ _cpp_init_files (cpp_reader *pfile)
>  					NULL, xcalloc, free);
>    pfile->dir_hash = htab_create_alloc (127, file_hash_hash, file_hash_eq,
>  					NULL, xcalloc, free);
> +  pfile->pragma_once_files = htab_create_alloc (127,
> +					file_quick_idempotency_attrs::hash,
> +					file_sublist::eq, file_sublist::del,
> +					xcalloc, free);
>    allocate_file_hash_entries (pfile);
>    pfile->nonexistent_file_hash = htab_create_alloc (127, htab_hash_string,
>  						    nonexistent_file_hash_eq,
> @@ -1456,6 +1557,7 @@ _cpp_cleanup_files (cpp_reader *pfile)
>  {
>    htab_delete (pfile->file_hash);
>    htab_delete (pfile->dir_hash);
> +  htab_delete (pfile->pragma_once_files);
>    htab_delete (pfile->nonexistent_file_hash);
>    obstack_free (&pfile->nonexistent_file_ob, 0);
>    free_file_hash_entries (pfile);
> diff --git a/libcpp/internal.h b/libcpp/internal.h
> index badfd1b40da..9c3c46df335 100644
> --- a/libcpp/internal.h
> +++ b/libcpp/internal.h
> @@ -485,6 +485,9 @@ struct cpp_reader
>       been used.  */
>    bool seen_once_only;
>  
> +  /* Optimization for #pragma once.  */
> +  struct htab *pragma_once_files;
> +
>    /* Multiple include optimization.  */
>    const cpp_hashnode *mi_cmacro;
>    const cpp_hashnode *mi_ind_cmacro;
> -- 
> 2.34.1
>

next prev parent reply	other threads:[~2022-08-19 20:28 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-06 23:03 [PATCH] " Paul Hollinsky
2022-08-01  5:18 ` [PATCH V2] " Paul Hollinsky
2022-08-19 20:27   ` Paul Hollinsky [this message]
2022-08-21 19:40     ` Ping " David Malcolm
2022-08-22 17:43       ` Paul Hollinsky
2022-08-22 13:19     ` Nathan Sidwell
2022-08-22 17:39       ` Paul Hollinsky
2022-08-23 12:15         ` Nathan Sidwell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220819202734.wvourvrje3x5vfla@perseverance.local \
    --to=paulhollinsky@gmail.com \
    --cc=dmalcolm@redhat.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=jason@redhat.com \
    --cc=nathan@acm.org \
    --cc=per@bothner.com \
    --cc=tom@tromey.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).