public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
* PR30724, cygwin ld performance regression since 014a602b86
@ 2023-08-08 23:21 Alan Modra
  2023-08-09 14:42 ` Clément Chigot
  0 siblings, 1 reply; 3+ messages in thread
From: Alan Modra @ 2023-08-08 23:21 UTC (permalink / raw)
  To: binutils; +Cc: Clément Chigot

According to the reporter of this bug the newlib fseek implementation
is likely slowed down by locking and fflush, only attempting to
optimise seeks when the file is opened read-only.  Thus when writing
the output we get a dramatic slowdown due to commit 014a602b86.

Clément would you please check that this doesn't regress anything on
mingw.

	PR 30724
	* bfd.c (enum bfd_last_io): New.
	(struct bfd): Add last_io field.
	* bfd-in2.h: Regenerate.
	* bfd-io.c (bfd_bread, bfd_bwrite): Force seek if last_io is
	opposite direction.
	(bfd_seek): Reinstate optimisation for seek to same position.

diff --git a/bfd/bfd-in2.h b/bfd/bfd-in2.h
index 5f49807d7f2..cc62ab19617 100644
--- a/bfd/bfd-in2.h
+++ b/bfd/bfd-in2.h
@@ -1916,6 +1916,14 @@ enum bfd_direction
     both_direction = 3
   };
 
+enum bfd_last_io
+  {
+    bfd_io_seek = 0,
+    bfd_io_read = 1,
+    bfd_io_write = 2,
+    bfd_io_force = 3
+  };
+
 enum bfd_plugin_format
   {
     bfd_plugin_unknown = 0,
@@ -2068,6 +2076,20 @@ struct bfd
   /* The direction with which the BFD was opened.  */
   ENUM_BITFIELD (bfd_direction) direction : 2;
 
+  /* POSIX.1-2017 (IEEE Std 1003.1) says of fopen : "When a file is
+     opened with update mode ('+' as the second or third character in
+     the mode argument), both input and output may be performed on
+     the associated stream.  However, the application shall ensure
+     that output is not directly followed by input without an
+     intervening call to fflush() or to a file positioning function
+     (fseek(), fsetpos(), or rewind()), and input is not directly
+     followed by output without an intervening call to a file
+     positioning function, unless the input operation encounters
+     end-of-file."
+     This field tracks the last IO operation, so that bfd can insert
+     a seek when IO direction changes.  */
+  ENUM_BITFIELD (bfd_last_io) last_io : 2;
+
   /* Is the file descriptor being cached?  That is, can it be closed as
      needed, and re-opened when accessed later?  */
   unsigned int cacheable : 1;
diff --git a/bfd/bfd.c b/bfd/bfd.c
index e43a388ac72..88943a042d6 100644
--- a/bfd/bfd.c
+++ b/bfd/bfd.c
@@ -53,6 +53,14 @@ EXTERNAL
 .    both_direction = 3
 .  };
 .
+.enum bfd_last_io
+.  {
+.    bfd_io_seek = 0,
+.    bfd_io_read = 1,
+.    bfd_io_write = 2,
+.    bfd_io_force = 3
+.  };
+.
 .enum bfd_plugin_format
 .  {
 .    bfd_plugin_unknown = 0,
@@ -208,6 +216,20 @@ CODE_FRAGMENT
 .  {* The direction with which the BFD was opened.  *}
 .  ENUM_BITFIELD (bfd_direction) direction : 2;
 .
+.  {* POSIX.1-2017 (IEEE Std 1003.1) says of fopen : "When a file is
+.     opened with update mode ('+' as the second or third character in
+.     the mode argument), both input and output may be performed on
+.     the associated stream.  However, the application shall ensure
+.     that output is not directly followed by input without an
+.     intervening call to fflush() or to a file positioning function
+.     (fseek(), fsetpos(), or rewind()), and input is not directly
+.     followed by output without an intervening call to a file
+.     positioning function, unless the input operation encounters
+.     end-of-file."
+.     This field tracks the last IO operation, so that bfd can insert
+.     a seek when IO direction changes.  *}
+.  ENUM_BITFIELD (bfd_last_io) last_io : 2;
+.
 .  {* Is the file descriptor being cached?  That is, can it be closed as
 .     needed, and re-opened when accessed later?  *}
 .  unsigned int cacheable : 1;
diff --git a/bfd/bfdio.c b/bfd/bfdio.c
index 22c39a7b0cc..e0d47b3ee1c 100644
--- a/bfd/bfdio.c
+++ b/bfd/bfdio.c
@@ -279,6 +279,14 @@ bfd_bread (void *ptr, bfd_size_type size, bfd *abfd)
       return -1;
     }
 
+  if (abfd->last_io == bfd_io_write)
+    {
+      abfd->last_io = bfd_io_force;
+      if (bfd_seek (abfd, 0, SEEK_CUR) != 0)
+	return -1;
+    }
+  abfd->last_io = bfd_io_read;
+
   nread = abfd->iovec->bread (abfd, ptr, size);
   if (nread != -1)
     abfd->where += nread;
@@ -313,6 +321,14 @@ bfd_bwrite (const void *ptr, bfd_size_type size, bfd *abfd)
       return -1;
     }
 
+  if (abfd->last_io == bfd_io_read)
+    {
+      abfd->last_io = bfd_io_force;
+      if (bfd_seek (abfd, 0, SEEK_CUR) != 0)
+	return -1;
+    }
+  abfd->last_io = bfd_io_write;
+
   nwrote = abfd->iovec->bwrite (abfd, ptr, size);
   if (nwrote != -1)
     abfd->where += nwrote;
@@ -456,6 +472,13 @@ bfd_seek (bfd *abfd, file_ptr position, int direction)
   if (direction != SEEK_CUR)
     position += offset;
 
+  if (((direction == SEEK_CUR && position == 0)
+       || (direction == SEEK_SET && (ufile_ptr) position == abfd->where))
+      && abfd->last_io != bfd_io_force)
+    return 0;
+
+  abfd->last_io = bfd_io_seek;
+
   result = abfd->iovec->bseek (abfd, position, direction);
   if (result != 0)
     {

-- 
Alan Modra
Australia Development Lab, IBM

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: PR30724, cygwin ld performance regression since 014a602b86
  2023-08-08 23:21 PR30724, cygwin ld performance regression since 014a602b86 Alan Modra
@ 2023-08-09 14:42 ` Clément Chigot
  2023-08-09 22:21   ` Alan Modra
  0 siblings, 1 reply; 3+ messages in thread
From: Clément Chigot @ 2023-08-09 14:42 UTC (permalink / raw)
  To: Alan Modra; +Cc: binutils

On Wed, Aug 9, 2023 at 1:21 AM Alan Modra <amodra@gmail.com> wrote:
>
> According to the reporter of this bug the newlib fseek implementation
> is likely slowed down by locking and fflush, only attempting to
> optimise seeks when the file is opened read-only.  Thus when writing
> the output we get a dramatic slowdown due to commit 014a602b86.
>
> Clément would you please check that this doesn't regress anything on
> mingw.

I'm not seeing any and the issue fixed by 014a602b86 is still fixed so
it looks good.
However, I have a question regarding the purpose of bfd_io_force.
Unless bfd_seek fails, it would be overridden right away. So are there
cases where bfd_seek fails and bfd doesn't report an error, continuing
to perform inputs/outputs as if nothing happens  ?


>         PR 30724
>         * bfd.c (enum bfd_last_io): New.
>         (struct bfd): Add last_io field.
>         * bfd-in2.h: Regenerate.
>         * bfd-io.c (bfd_bread, bfd_bwrite): Force seek if last_io is
>         opposite direction.
>         (bfd_seek): Reinstate optimisation for seek to same position.
>
> diff --git a/bfd/bfd-in2.h b/bfd/bfd-in2.h
> index 5f49807d7f2..cc62ab19617 100644
> --- a/bfd/bfd-in2.h
> +++ b/bfd/bfd-in2.h
> @@ -1916,6 +1916,14 @@ enum bfd_direction
>      both_direction = 3
>    };
>
> +enum bfd_last_io
> +  {
> +    bfd_io_seek = 0,
> +    bfd_io_read = 1,
> +    bfd_io_write = 2,
> +    bfd_io_force = 3
> +  };
> +
>  enum bfd_plugin_format
>    {
>      bfd_plugin_unknown = 0,
> @@ -2068,6 +2076,20 @@ struct bfd
>    /* The direction with which the BFD was opened.  */
>    ENUM_BITFIELD (bfd_direction) direction : 2;
>
> +  /* POSIX.1-2017 (IEEE Std 1003.1) says of fopen : "When a file is
> +     opened with update mode ('+' as the second or third character in
> +     the mode argument), both input and output may be performed on
> +     the associated stream.  However, the application shall ensure
> +     that output is not directly followed by input without an
> +     intervening call to fflush() or to a file positioning function
> +     (fseek(), fsetpos(), or rewind()), and input is not directly
> +     followed by output without an intervening call to a file
> +     positioning function, unless the input operation encounters
> +     end-of-file."
> +     This field tracks the last IO operation, so that bfd can insert
> +     a seek when IO direction changes.  */
> +  ENUM_BITFIELD (bfd_last_io) last_io : 2;
> +
>    /* Is the file descriptor being cached?  That is, can it be closed as
>       needed, and re-opened when accessed later?  */
>    unsigned int cacheable : 1;
> diff --git a/bfd/bfd.c b/bfd/bfd.c
> index e43a388ac72..88943a042d6 100644
> --- a/bfd/bfd.c
> +++ b/bfd/bfd.c
> @@ -53,6 +53,14 @@ EXTERNAL
>  .    both_direction = 3
>  .  };
>  .
> +.enum bfd_last_io
> +.  {
> +.    bfd_io_seek = 0,
> +.    bfd_io_read = 1,
> +.    bfd_io_write = 2,
> +.    bfd_io_force = 3
> +.  };
> +.
>  .enum bfd_plugin_format
>  .  {
>  .    bfd_plugin_unknown = 0,
> @@ -208,6 +216,20 @@ CODE_FRAGMENT
>  .  {* The direction with which the BFD was opened.  *}
>  .  ENUM_BITFIELD (bfd_direction) direction : 2;
>  .
> +.  {* POSIX.1-2017 (IEEE Std 1003.1) says of fopen : "When a file is
> +.     opened with update mode ('+' as the second or third character in
> +.     the mode argument), both input and output may be performed on
> +.     the associated stream.  However, the application shall ensure
> +.     that output is not directly followed by input without an
> +.     intervening call to fflush() or to a file positioning function
> +.     (fseek(), fsetpos(), or rewind()), and input is not directly
> +.     followed by output without an intervening call to a file
> +.     positioning function, unless the input operation encounters
> +.     end-of-file."
> +.     This field tracks the last IO operation, so that bfd can insert
> +.     a seek when IO direction changes.  *}
> +.  ENUM_BITFIELD (bfd_last_io) last_io : 2;
> +.
>  .  {* Is the file descriptor being cached?  That is, can it be closed as
>  .     needed, and re-opened when accessed later?  *}
>  .  unsigned int cacheable : 1;
> diff --git a/bfd/bfdio.c b/bfd/bfdio.c
> index 22c39a7b0cc..e0d47b3ee1c 100644
> --- a/bfd/bfdio.c
> +++ b/bfd/bfdio.c
> @@ -279,6 +279,14 @@ bfd_bread (void *ptr, bfd_size_type size, bfd *abfd)
>        return -1;
>      }
>
> +  if (abfd->last_io == bfd_io_write)
> +    {
> +      abfd->last_io = bfd_io_force;
> +      if (bfd_seek (abfd, 0, SEEK_CUR) != 0)
> +       return -1;
> +    }
> +  abfd->last_io = bfd_io_read;
> +
>    nread = abfd->iovec->bread (abfd, ptr, size);
>    if (nread != -1)
>      abfd->where += nread;
> @@ -313,6 +321,14 @@ bfd_bwrite (const void *ptr, bfd_size_type size, bfd *abfd)
>        return -1;
>      }
>
> +  if (abfd->last_io == bfd_io_read)
> +    {
> +      abfd->last_io = bfd_io_force;
> +      if (bfd_seek (abfd, 0, SEEK_CUR) != 0)
> +       return -1;
> +    }
> +  abfd->last_io = bfd_io_write;
> +
>    nwrote = abfd->iovec->bwrite (abfd, ptr, size);
>    if (nwrote != -1)
>      abfd->where += nwrote;
> @@ -456,6 +472,13 @@ bfd_seek (bfd *abfd, file_ptr position, int direction)
>    if (direction != SEEK_CUR)
>      position += offset;
>
> +  if (((direction == SEEK_CUR && position == 0)
> +       || (direction == SEEK_SET && (ufile_ptr) position == abfd->where))
> +      && abfd->last_io != bfd_io_force)
> +    return 0;
> +
> +  abfd->last_io = bfd_io_seek;
> +
>    result = abfd->iovec->bseek (abfd, position, direction);
>    if (result != 0)
>      {
>
> --
> Alan Modra
> Australia Development Lab, IBM

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: PR30724, cygwin ld performance regression since 014a602b86
  2023-08-09 14:42 ` Clément Chigot
@ 2023-08-09 22:21   ` Alan Modra
  0 siblings, 0 replies; 3+ messages in thread
From: Alan Modra @ 2023-08-09 22:21 UTC (permalink / raw)
  To: Clément Chigot; +Cc: binutils

On Wed, Aug 09, 2023 at 04:42:48PM +0200, Clément Chigot wrote:
> On Wed, Aug 9, 2023 at 1:21 AM Alan Modra <amodra@gmail.com> wrote:
> >
> > According to the reporter of this bug the newlib fseek implementation
> > is likely slowed down by locking and fflush, only attempting to
> > optimise seeks when the file is opened read-only.  Thus when writing
> > the output we get a dramatic slowdown due to commit 014a602b86.
> >
> > Clément would you please check that this doesn't regress anything on
> > mingw.
> 
> I'm not seeing any and the issue fixed by 014a602b86 is still fixed so
> it looks good.
> However, I have a question regarding the purpose of bfd_io_force.

It's a flag used to modify the operation of bfd_seek rather than being
the last IO operation performed.  Perhaps I should have done without
that enum value, as all uses of bfd_io_force could be replaced with
bfd_io_seek and given the same result, unless we have code that is
doing multiple bfd_seeks without intervening bfd_reads or bfd_writes.

> Unless bfd_seek fails, it would be overridden right away. So are there
> cases where bfd_seek fails and bfd doesn't report an error, continuing
> to perform inputs/outputs as if nothing happens  ?

Yes, there are cases where code in binutils and gdb does this.  They
are all bugs.  I have a patch, just didn't post yet.

-- 
Alan Modra
Australia Development Lab, IBM

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-08-09 22:21 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-08 23:21 PR30724, cygwin ld performance regression since 014a602b86 Alan Modra
2023-08-09 14:42 ` Clément Chigot
2023-08-09 22:21   ` Alan Modra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).