public inbox for newlib@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] newlib: fix fseek optimization with SEEK_CUR
@ 2019-11-09 16:28 Bastien Bouclet
  2019-11-13 10:15 ` Corinna Vinschen
  2019-11-18 10:11 ` Corinna Vinschen
  0 siblings, 2 replies; 5+ messages in thread
From: Bastien Bouclet @ 2019-11-09 16:28 UTC (permalink / raw)
  To: newlib; +Cc: Bastien Bouclet

The call to fflush was invalidating the read buffer, preventing relative
seeks to positions that would have been inside the read buffer from
being optimized. The call to srefill would then re-read mostly the same
data that was initially in the read buffer.
---
 newlib/libc/stdio/fseeko.c     | 31 ++++++-------------------------
 newlib/libc/stdio64/fseeko64.c | 31 ++++++-------------------------
 2 files changed, 12 insertions(+), 50 deletions(-)

diff --git a/newlib/libc/stdio/fseeko.c b/newlib/libc/stdio/fseeko.c
index 3e0f9e90b..bbf1af43e 100644
--- a/newlib/libc/stdio/fseeko.c
+++ b/newlib/libc/stdio/fseeko.c
@@ -141,31 +141,12 @@ _fseeko_r (struct _reent *ptr,
   switch (whence)
     {
     case SEEK_CUR:
-      /*
-       * In order to seek relative to the current stream offset,
-       * we have to first find the current stream offset a la
-       * ftell (see ftell for details).
-       */
-      _fflush_r (ptr, fp);   /* may adjust seek offset on append stream */
-      if (fp->_flags & __SOFF)
-	curoff = fp->_offset;
-      else
-	{
-	  curoff = seekfn (ptr, fp->_cookie, (_fpos_t) 0, SEEK_CUR);
-	  if (curoff == -1L)
-	    {
-	      _newlib_flockfile_exit (fp);
-	      return EOF;
-	    }
-	}
-      if (fp->_flags & __SRD)
-	{
-	  curoff -= fp->_r;
-	  if (HASUB (fp))
-	    curoff -= fp->_ur;
-	}
-      else if (fp->_flags & __SWR && fp->_p != NULL)
-	curoff += fp->_p - fp->_bf._base;
+      curoff = _ftello_r(ptr, fp);
+      if (curoff == -1L)
+        {
+          _newlib_flockfile_exit (fp);
+          return EOF;
+        }
 
       offset += curoff;
       whence = SEEK_SET;
diff --git a/newlib/libc/stdio64/fseeko64.c b/newlib/libc/stdio64/fseeko64.c
index 0672086a3..f38005570 100644
--- a/newlib/libc/stdio64/fseeko64.c
+++ b/newlib/libc/stdio64/fseeko64.c
@@ -142,31 +142,12 @@ _fseeko64_r (struct _reent *ptr,
   switch (whence)
     {
     case SEEK_CUR:
-      /*
-       * In order to seek relative to the current stream offset,
-       * we have to first find the current stream offset a la
-       * ftell (see ftell for details).
-       */
-      _fflush_r (ptr, fp);   /* may adjust seek offset on append stream */
-      if (fp->_flags & __SOFF)
-	curoff = fp->_offset;
-      else
-	{
-	  curoff = seekfn (ptr, fp->_cookie, (_fpos64_t) 0, SEEK_CUR);
-	  if (curoff == -1L)
-	    {
-	      _newlib_flockfile_exit(fp);
-	      return EOF;
-	    }
-	}
-      if (fp->_flags & __SRD)
-	{
-	  curoff -= fp->_r;
-	  if (HASUB (fp))
-	    curoff -= fp->_ur;
-	}
-      else if (fp->_flags & __SWR && fp->_p != NULL)
-	curoff += fp->_p - fp->_bf._base;
+      curoff = _ftello64_r(ptr, fp);
+      if (curoff == -1L)
+        {
+          _newlib_flockfile_exit (fp);
+          return EOF;
+        }
 
       offset += curoff;
       whence = SEEK_SET;
-- 
2.24.0

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] newlib: fix fseek optimization with SEEK_CUR
  2019-11-09 16:28 [PATCH] newlib: fix fseek optimization with SEEK_CUR Bastien Bouclet
@ 2019-11-13 10:15 ` Corinna Vinschen
  2019-11-18 10:11 ` Corinna Vinschen
  1 sibling, 0 replies; 5+ messages in thread
From: Corinna Vinschen @ 2019-11-13 10:15 UTC (permalink / raw)
  To: newlib

[-- Attachment #1: Type: text/plain, Size: 911 bytes --]

Hi Bastien,

On Nov  9 17:28, Bastien Bouclet wrote:
> The call to fflush was invalidating the read buffer, preventing relative
> seeks to positions that would have been inside the read buffer from
> being optimized. The call to srefill would then re-read mostly the same
> data that was initially in the read buffer.

I checked this against upstream BSD versions.  OpenBSD and NetBSD
operate like our code, including the flush, while FreeBSD uses its
internal ftello and never flushed since the repository import back in
1994.

I'm pretty unsure if we can do this.  Apparently the flush op is only
necessary for streams in append mode.  If at all.

Can we be sure this works as desired on append streams as well?

Also, given that this is changing very basic code, nobody is unaffected.
Any input from other folks?


Thanks,
Corinna

-- 
Corinna Vinschen
Cygwin Maintainer
Red Hat

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] newlib: fix fseek optimization with SEEK_CUR
  2019-11-09 16:28 [PATCH] newlib: fix fseek optimization with SEEK_CUR Bastien Bouclet
  2019-11-13 10:15 ` Corinna Vinschen
@ 2019-11-18 10:11 ` Corinna Vinschen
  2020-01-29 18:02   ` Corinna Vinschen
  1 sibling, 1 reply; 5+ messages in thread
From: Corinna Vinschen @ 2019-11-18 10:11 UTC (permalink / raw)
  To: Bastien Bouclet; +Cc: newlib

[-- Attachment #1: Type: text/plain, Size: 602 bytes --]

On Nov  9 17:28, Bastien Bouclet wrote:
> The call to fflush was invalidating the read buffer, preventing relative
> seeks to positions that would have been inside the read buffer from
> being optimized. The call to srefill would then re-read mostly the same
> data that was initially in the read buffer.
> ---
>  newlib/libc/stdio/fseeko.c     | 31 ++++++-------------------------
>  newlib/libc/stdio64/fseeko64.c | 31 ++++++-------------------------
>  2 files changed, 12 insertions(+), 50 deletions(-)

Pushed.


Thanks,
Corinna

-- 
Corinna Vinschen
Cygwin Maintainer
Red Hat

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] newlib: fix fseek optimization with SEEK_CUR
  2019-11-18 10:11 ` Corinna Vinschen
@ 2020-01-29 18:02   ` Corinna Vinschen
  0 siblings, 0 replies; 5+ messages in thread
From: Corinna Vinschen @ 2020-01-29 18:02 UTC (permalink / raw)
  To: Bastien Bouclet; +Cc: newlib, Takashi Yano, Marco Atzeri


[-- Attachment #1.1: Type: text/plain, Size: 1263 bytes --]

On Nov 18 11:11, Corinna Vinschen wrote:
> On Nov  9 17:28, Bastien Bouclet wrote:
> > The call to fflush was invalidating the read buffer, preventing relative
> > seeks to positions that would have been inside the read buffer from
> > being optimized. The call to srefill would then re-read mostly the same
> > data that was initially in the read buffer.
> > ---
> >  newlib/libc/stdio/fseeko.c     | 31 ++++++-------------------------
> >  newlib/libc/stdio64/fseeko64.c | 31 ++++++-------------------------
> >  2 files changed, 12 insertions(+), 50 deletions(-)
> 
> Pushed.

Sorry, but I had to revert this patch.  It breaks gnulib's autoconf
test.  The attached conftest.c returns 5, rather than 0 as before
because lseek and ftello return different results.

While this is expected on BSD systems, it's not expected on at least
Linux and Cygwin.  Since this breaks backward compatibility and
leads to gnulib wrongly providing its own fflush, fseek and fseeko
implementations when building for newlib/Cygwin.

I attached the gnulib testcase for completeness.

Many thanks to Takashi Yano for figuring this out after the CYgwin
octave build was broken.


Thanks,
Corinna

-- 
Corinna Vinschen
Cygwin Maintainer
Red Hat



[-- Attachment #1.2: conftest.c --]
[-- Type: text/plain, Size: 1462 bytes --]

#include <stdio.h>
# include <unistd.h>

int
main ()
{
FILE *f = fopen ("conftest.txt", "r");
         char buffer[10];
         int fd;
         int c;
         if (f == NULL)
           return 1;
         fd = fileno (f);
         if (fd < 0 || fread (buffer, 1, 5, f) != 5)
           { fclose (f); return 2; }
         /* For deterministic results, ensure f read a bigger buffer.  */
         if (lseek (fd, 0, SEEK_CUR) == 5)
           { fclose (f); return 3; }
         /* POSIX requires fflush-fseek to set file offset of fd.  This fails
            on BSD systems and on mingw.  */
         if (fflush (f) != 0 || fseek (f, 0, SEEK_CUR) != 0)
           { fclose (f); return 4; }
         if (lseek (fd, 0, SEEK_CUR) != 5)
           { fclose (f); return 5; }
         /* Verify behaviour of fflush after ungetc. See
            <http://www.opengroup.org/austin/aardvark/latest/xshbug3.txt>  */
         /* Verify behaviour of fflush after a backup ungetc.  This fails on
            mingw.  */
         c = fgetc (f);
         ungetc (c, f);
         fflush (f);
         if (fgetc (f) != c)
           { fclose (f); return 6; }
         /* Verify behaviour of fflush after a non-backup ungetc.  This fails
            on glibc 2.8 and on BSD systems.  */
         c = fgetc (f);
         ungetc ('@', f);
         fflush (f);
         if (fgetc (f) != c)
           { fclose (f); return 7; }
         fclose (f);
         return 0;

  ;
  return 0;
}

[-- Attachment #1.3: conftest.txt --]
[-- Type: text/plain, Size: 13 bytes --]

hello world


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] newlib: fix fseek optimization with SEEK_CUR
@ 2019-11-13 20:36 Bastien Bouclet
  0 siblings, 0 replies; 5+ messages in thread
From: Bastien Bouclet @ 2019-11-13 20:36 UTC (permalink / raw)
  To: newlib

[-- Attachment #1: Type: text/plain, Size: 1218 bytes --]

Hi Corinna,

Thank you for your answer.

> I checked this against upstream BSD versions.  OpenBSD and NetBSD
> operate like our code, including the flush, while FreeBSD uses its
> internal ftello and never flushed since the repository import back in
> 1994.

One difference I've noticed is that fflush does not invalidate the
stream read buffer in the BSD versions of libc.  In newlib this was
introduced in commit a8ef755c2776b8da4ea386360c1df74ce268c165.  Which
is probably why OpenBSD and NetBSD can call fflush in fseek with
SEEK_CUR.

> Can we be sure this works as desired on append streams as well?

Regarding the append streams, it's worth noting there is another call
to fflush at the beginning of fseek in that case.  I've written a small
test program to verify they did not regress in simple cases.

> Also, given that this is changing very basic code, nobody is unaffected.

I would like to see the performance issue fixed one way or another.
The systems I target do not have a page cache, the extra reads have a
noticeable impact on user experience.  Another other option could be
having a compile time option for disabling the code in fflush that
forces a disk access on the next read.

Regards,
Bastien

[-- Attachment #2: stdio-append.c --]
[-- Type: text/x-csrc, Size: 1559 bytes --]

#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main() {
	FILE *fp = fopen("test.txt", "a+");
	if (fp == NULL) {
		puts("failed to open test file in append mode");
		exit(1);
	}

	size_t written = fwrite("1234567890", 10, 1, fp);
	if (written != 1) {
		puts("failed to write");
		exit(1);
	}

	int ret = fseek(fp, -8, SEEK_CUR);
	if (ret == -1) {
		puts("failed to seek");
		exit(1);
	}

	char read_buf[100];
	memset(read_buf, 0, sizeof(read_buf));

	size_t read = fread(read_buf, 2, 1, fp);
	if (read != 1) {
		printf("failed to read %d\n", errno);
		exit(1);
	}

	if (strcmp(read_buf, "34") != 0) {
		printf("unexpected read value %s\n", read_buf);
		exit(1);
	}

	ret = ungetc('a', fp);
	if (ret != 'a') {
		puts("failed to ungetc");
		exit(1);
	}

	ret = fseek(fp, 3, SEEK_CUR);
	if (ret == -1) {
		puts("failed to seek");
		exit(1);
	}

	read = fread(read_buf, 2, 1, fp);
	if (read != 1) {
		printf("failed to read %d\n", errno);
		exit(1);
	}

	if (strcmp(read_buf, "78") != 0) {
		printf("unexpected read value %s\n", read_buf);
		exit(1);
	}

	written = fwrite("0987654321", 10, 1, fp);
	if (written != 1) {
		puts("failed to write");
		exit(1);
	}

	ret = fseek(fp, -20, SEEK_CUR);
	if (ret == -1) {
		puts("failed to seek");
		exit(1);
	}

	read = fread(read_buf, 20, 1, fp);
	if (read != 1) {
		printf("failed to read %d\n", errno);
		exit(1);
	}

	if (strcmp(read_buf, "12345678900987654321") != 0) {
		printf("unexpected read value %s\n", read_buf);
		exit(1);
	}

	fclose(fp);

	puts("success");

	return 0;
}

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-01-29 18:02 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-09 16:28 [PATCH] newlib: fix fseek optimization with SEEK_CUR Bastien Bouclet
2019-11-13 10:15 ` Corinna Vinschen
2019-11-18 10:11 ` Corinna Vinschen
2020-01-29 18:02   ` Corinna Vinschen
2019-11-13 20:36 Bastien Bouclet

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).