public inbox for cygwin-cvs@sourceware.org
help / color / mirror / Atom feed
* [newlib-cygwin/main] Cygwin: fallocate(2): handle FALLOC_FL_PUNCH_HOLE and FALLOC_FL_ZERO_RANGE
@ 2023-11-28 10:03 Corinna Vinschen
  0 siblings, 0 replies; only message in thread
From: Corinna Vinschen @ 2023-11-28 10:03 UTC (permalink / raw)
  To: cygwin-cvs

https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=65831f88d6c4cd943969b5ee531bc6162c7b0f60

commit 65831f88d6c4cd943969b5ee531bc6162c7b0f60
Author:     Corinna Vinschen <corinna@vinschen.de>
AuthorDate: Tue Nov 28 10:42:52 2023 +0100
Commit:     Corinna Vinschen <corinna@vinschen.de>
CommitDate: Tue Nov 28 10:55:38 2023 +0100

    Cygwin: fallocate(2): handle FALLOC_FL_PUNCH_HOLE and FALLOC_FL_ZERO_RANGE
    
    Split fhandler_disk_file::fallocate into multiple methods, each
    implementing a different aspect of fallocate(2), thus adding
    FALLOC_FL_PUNCH_HOLE and FALLOC_FL_ZERO_RANGE handling.
    
    For more correctly implementing posix_fallocate(3) semantics, make
    sure to re-allocate holes in the given range if the file is sparse.
    
    While at it, change the way checking when to make a file sparse.
    The rule is now, make file sparse if the hole created by the action
    spans at least one sparse block, taking the allocation granularity
    of sparse files into account.
    
    Signed-off-by: Corinna Vinschen <corinna@vinschen.de>

Diff:
---
 winsup/cygwin/fhandler/base.cc          |  10 +-
 winsup/cygwin/fhandler/disk_file.cc     | 337 ++++++++++++++++++++++++++------
 winsup/cygwin/include/fcntl.h           |   1 +
 winsup/cygwin/local_includes/fhandler.h |  14 ++
 winsup/cygwin/local_includes/ntdll.h    |   8 +
 5 files changed, 302 insertions(+), 68 deletions(-)

diff --git a/winsup/cygwin/fhandler/base.cc b/winsup/cygwin/fhandler/base.cc
index b9336625419a..f1ad375228f8 100644
--- a/winsup/cygwin/fhandler/base.cc
+++ b/winsup/cygwin/fhandler/base.cc
@@ -896,6 +896,9 @@ fhandler_base::write (const void *ptr, size_t len)
 
       did_lseek (false); /* don't do it again */
 
+      /* If the file system supports sparse files and the application is
+         writing after a long seek beyond EOF spanning more than one
+	 sparsifiable chunk, convert the file to a sparse file. */
       if (!(get_flags () & O_APPEND)
 	  && !has_attribute (FILE_ATTRIBUTE_SPARSE_FILE)
 	  && NT_SUCCESS (NtQueryInformationFile (get_output_handle (),
@@ -904,12 +907,9 @@ fhandler_base::write (const void *ptr, size_t len)
 	  && NT_SUCCESS (NtQueryInformationFile (get_output_handle (),
 						 &io, &fpi, sizeof fpi,
 						 FilePositionInformation))
-	  && fpi.CurrentByteOffset.QuadPart
-	     >= fsi.EndOfFile.QuadPart + (128 * 1024))
+	  && span_sparse_chunk (fpi.CurrentByteOffset.QuadPart,
+				fsi.EndOfFile.QuadPart))
 	{
-	  /* If the file system supports sparse files and the application
-	     is writing after a long seek beyond EOF, convert the file to
-	     a sparse file. */
 	  NTSTATUS status;
 	  status = NtFsControlFile (get_output_handle (), NULL, NULL, NULL,
 				    &io, FSCTL_SET_SPARSE, NULL, 0, NULL, 0);
diff --git a/winsup/cygwin/fhandler/disk_file.cc b/winsup/cygwin/fhandler/disk_file.cc
index c70afed49f1e..ce15e41a4e77 100644
--- a/winsup/cygwin/fhandler/disk_file.cc
+++ b/winsup/cygwin/fhandler/disk_file.cc
@@ -1131,89 +1131,300 @@ fhandler_disk_file::fadvise (off_t offset, off_t length, int advice)
 }
 
 int
-fhandler_disk_file::fallocate (int mode, off_t offset, off_t length)
+fhandler_disk_file::falloc_allocate (int mode, off_t offset, off_t length)
 {
-  int res = 0;
-
-  if (length < 0 || !get_handle ())
-    res = EINVAL;
-  else if (pc.isdir ())
-    res = EISDIR;
-  else if (!(get_access () & GENERIC_WRITE))
-    res = EBADF;
-  else
-    {
-      NTSTATUS status;
-      IO_STATUS_BLOCK io;
-      FILE_STANDARD_INFORMATION fsi;
-      FILE_END_OF_FILE_INFORMATION feofi;
-
-      status = NtQueryInformationFile (get_handle (), &io, &fsi, sizeof fsi,
-				       FileStandardInformation);
-      if (!NT_SUCCESS (status))
-	return geterrno_from_nt_status (status);
-
-      /* Never change file size if FALLOC_FL_KEEP_SIZE is specified. */
-      if ((mode & FALLOC_FL_KEEP_SIZE)
-	  && offset + length > fsi.EndOfFile.QuadPart)
-	{
-	  if (offset > fsi.EndOfFile.QuadPart) /* no-op */
-	    return 0;
-	  length = fsi.EndOfFile.QuadPart - offset;
-	}
-      mode &= ~FALLOC_FL_KEEP_SIZE;
-
-      switch (mode)
-	{
-	case 0:
-	case __FALLOC_FL_TRUNCATE:
-	  break;
-	case FALLOC_FL_PUNCH_HOLE: /* TODO */
-	  return EOPNOTSUPP;
-	  break;
-	case FALLOC_FL_ZERO_RANGE: /* TODO */
-	  return EOPNOTSUPP;
-	  break;
-	default:
-	  return EINVAL;
-	}
+  NTSTATUS status;
+  IO_STATUS_BLOCK io;
+  FILE_STANDARD_INFORMATION fsi;
+  FILE_END_OF_FILE_INFORMATION feofi;
+  FILE_ALLOCATION_INFORMATION fai = { 0 };
 
-      if (mode == 0)
+  /* Fetch EOF */
+  status = NtQueryInformationFile (get_handle (), &io, &fsi, sizeof fsi,
+				   FileStandardInformation);
+  if (!NT_SUCCESS (status))
+    return geterrno_from_nt_status (status);
+
+  switch (mode)
+    {
+    case 0:
+      /* For posix_fallocate(3), truncating the file is a no-op.  However,
+         for sparse files we still have to allocate the blocks within
+	 offset and offset + length which are currently in holes, due to
+	 the following POSIX requirement:
+	 "If posix_fallocate() returns successfully, subsequent writes to
+	  the specified file data shall not fail due to the lack of free
+	  space on the file system  storage  media." */
+      if (offset + length <= fsi.EndOfFile.QuadPart)
 	{
-	  /* If called through posix_fallocate, silently succeed if
-	     offset + length is less than the file's actual length. */
-
-	  /* TODO: If the file is sparse, POSIX requires to allocate
-		   the holes within offset and offset + length. */
-	  if (offset + length < fsi.EndOfFile.QuadPart)
+	  if (!has_attribute (FILE_ATTRIBUTE_SPARSE_FILE))
 	    return 0;
+	  feofi.EndOfFile.QuadPart = fsi.EndOfFile.QuadPart;
 	}
+      else
+	feofi.EndOfFile.QuadPart = offset + length;
+      break;
+    case __FALLOC_FL_TRUNCATE:
+      /* For ftruncate(2), offset is 0. Just use length as is. */
+      feofi.EndOfFile.QuadPart = length;
 
-      feofi.EndOfFile.QuadPart = offset + length;
-      /* Create sparse files only when called through ftruncate, not when
-	 called through posix_fallocate. */
-      if (mode == __FALLOC_FL_TRUNCATE
+      /* Make file sparse only when called through ftruncate and the mount
+	 mode supports sparse files.  Also, make sure that the new region
+	 actually spans over at least one sparsifiable chunk. */
+      if (pc.support_sparse ()
 	  && !has_attribute (FILE_ATTRIBUTE_SPARSE_FILE)
-	  && pc.support_sparse ()
-	  && offset + length >= fsi.EndOfFile.QuadPart + (128 * 1024))
+	  && span_sparse_chunk (feofi.EndOfFile.QuadPart,
+				fsi.EndOfFile.QuadPart))
 	{
 	  status = NtFsControlFile (get_handle (), NULL, NULL, NULL, &io,
 				    FSCTL_SET_SPARSE, NULL, 0, NULL, 0);
 	  if (NT_SUCCESS (status))
 	    pc.file_attributes (pc.file_attributes ()
-			        | FILE_ATTRIBUTE_SPARSE_FILE);
-	  syscall_printf ("%y = NtFsControlFile(%S, FSCTL_SET_SPARSE)",
-			  status, pc.get_nt_native_path ());
+				| FILE_ATTRIBUTE_SPARSE_FILE);
+	  debug_printf ("%y = NtFsControlFile(%S, FSCTL_SET_SPARSE)",
+			status, pc.get_nt_native_path ());
 	}
+      break;
+    case FALLOC_FL_KEEP_SIZE:
+      /* Keep track of the allocation size for overallocation below.
+	 Note that overallocation in Windows is only temporary!
+	 As soon as the last open handle to the file is closed, the
+	 overallocation gets removed by the system.  Also, overallocation
+	 for sparse files fails silently, so just don't bother. */
+      if (offset + length > fsi.EndOfFile.QuadPart
+	  && !has_attribute (FILE_ATTRIBUTE_SPARSE_FILE))
+	fai.AllocationSize.QuadPart = offset + length;
+
+      feofi.EndOfFile.QuadPart = fsi.EndOfFile.QuadPart;
+      break;
+    }
+
+  /* Now set the new EOF */
+  if (feofi.EndOfFile.QuadPart != fsi.EndOfFile.QuadPart)
+    {
       status = NtSetInformationFile (get_handle (), &io,
 				     &feofi, sizeof feofi,
 				     FileEndOfFileInformation);
       if (!NT_SUCCESS (status))
-	res = geterrno_from_nt_status (status);
+	return geterrno_from_nt_status (status);
+    }
+
+  /* If called via fallocate(2) or posix_fallocate(3), allocate blocks in
+     sparse file holes. */
+  if (mode != __FALLOC_FL_TRUNCATE
+      && length
+      && has_attribute (FILE_ATTRIBUTE_SPARSE_FILE))
+    {
+      int res = falloc_zero_range (mode | __FALLOC_FL_ZERO_HOLES,
+				   offset, length);
+      if (res)
+	return res;
+    }
+
+  /* Last but not least, set the new allocation size, if any */
+  if (fai.AllocationSize.QuadPart)
+    {
+      /* This is not fatal. Just note a failure in the debug output. */
+      status = NtSetInformationFile (get_handle (), &io,
+				     &fai, sizeof fai,
+				     FileAllocationInformation);
+      if (!NT_SUCCESS (status))
+	debug_printf ("%y = NtSetInformationFile(%S, "
+		      "FileAllocationInformation)",
+		      status, pc.get_nt_native_path ());
+    }
+
+  return 0;
+}
+
+int
+fhandler_disk_file::falloc_punch_hole (off_t offset, off_t length)
+{
+  NTSTATUS status;
+  IO_STATUS_BLOCK io;
+  FILE_STANDARD_INFORMATION fsi;
+  FILE_ZERO_DATA_INFORMATION fzi;
+
+  /* Fetch EOF */
+  status = NtQueryInformationFile (get_handle (), &io, &fsi, sizeof fsi,
+				   FileStandardInformation);
+  if (!NT_SUCCESS (status))
+    return geterrno_from_nt_status (status);
+
+  if (offset > fsi.EndOfFile.QuadPart) /* no-op */
+    return 0;
+
+  if (offset + length > fsi.EndOfFile.QuadPart)
+    length = fsi.EndOfFile.QuadPart - offset;
+
+  /* If the file isn't sparse yet, make it so. */
+  if (!has_attribute (FILE_ATTRIBUTE_SPARSE_FILE))
+    {
+      status = NtFsControlFile (get_handle (), NULL, NULL, NULL, &io,
+				FSCTL_SET_SPARSE, NULL, 0, NULL, 0);
+	debug_printf ("%y = NtFsControlFile(%S, FSCTL_SET_SPARSE)",
+		      status, pc.get_nt_native_path ());
+      if (!NT_SUCCESS (status))
+	return geterrno_from_nt_status (status);
+      pc.file_attributes (pc.file_attributes () | FILE_ATTRIBUTE_SPARSE_FILE);
+    }
+
+  /* Now punch a hole. For once, FSCTL_SET_ZERO_DATA does it exactly as per
+     fallocate(FALLOC_FL_PUNCH_HOLE) specs. */
+  fzi.FileOffset.QuadPart = offset;
+  fzi.BeyondFinalZero.QuadPart = offset + length;
+  status = NtFsControlFile (get_handle (), NULL, NULL, NULL, &io,
+			    FSCTL_SET_ZERO_DATA, &fzi, sizeof fzi, NULL, 0);
+  if (!NT_SUCCESS (status))
+    return geterrno_from_nt_status (status);
+
+  return 0;
+}
+
+int
+fhandler_disk_file::falloc_zero_range (int mode, off_t offset, off_t length)
+{
+  NTSTATUS status;
+  IO_STATUS_BLOCK io;
+  FILE_STANDARD_INFORMATION fsi;
+  FILE_ALLOCATED_RANGE_BUFFER inp, *out = NULL;
+  OBJECT_ATTRIBUTES attr;
+  HANDLE zo_handle;
+  tmp_pathbuf tp;
+  size_t data_chunk_count = 0;
+
+  /* Fetch EOF */
+  status = NtQueryInformationFile (get_handle (), &io, &fsi, sizeof fsi,
+				   FileStandardInformation);
+  if (!NT_SUCCESS (status))
+    return geterrno_from_nt_status (status);
+
+  /* offset and length must not exceed EOF with FALLOC_FL_KEEP_SIZE */
+  if (mode & FALLOC_FL_KEEP_SIZE)
+    {
+      if (offset > fsi.EndOfFile.QuadPart) /* no-op */
+	return 0;
+
+      if (offset + length > fsi.EndOfFile.QuadPart)
+	length = fsi.EndOfFile.QuadPart - offset;
+    }
+
+  /* If the file is sparse, fetch the data ranges within the file
+       to be able to recognize holes. */
+  if (has_attribute (FILE_ATTRIBUTE_SPARSE_FILE))
+    {
+      inp.FileOffset.QuadPart = offset;
+      inp.Length.QuadPart = length;
+      out = (FILE_ALLOCATED_RANGE_BUFFER *) tp.t_get ();
+      status = NtFsControlFile (get_handle (), NULL, NULL, NULL,
+				&io, FSCTL_QUERY_ALLOCATED_RANGES,
+				&inp, sizeof inp, out, 2 * NT_MAX_PATH);
+      if (NT_ERROR (status))
+	out = NULL;
+      else
+	data_chunk_count = io.Information / sizeof *out;
+    }
+
+  /* Re-open the file and use this handle ever after, so as not to
+     move the file pointer of the original file object.  */
+  status = NtOpenFile (&zo_handle, SYNCHRONIZE | GENERIC_WRITE,
+		       pc.init_reopen_attr (attr, get_handle ()), &io,
+		       FILE_SHARE_VALID_FLAGS, get_options ());
+  if (!NT_SUCCESS (status))
+    return geterrno_from_nt_status (status);
+
+  /* FILE_SPARSE_GRANULARITY == 2 * NT_MAX_PATH ==> fits exactly */
+  char *nullbuf = tp.t_get ();
+  memset (nullbuf, 0, FILE_SPARSE_GRANULARITY);
+  int res = 0;
+
+  /* Split range into chunks of size FILE_SPARSE_GRANULARITY and handle
+     them according to being data or hole */
+  LARGE_INTEGER off = { QuadPart:offset };
+  size_t start_idx = 0;
+  while (length > 0)
+    {
+      off_t chunk_len;
+      bool in_data = true;
+
+      if (off.QuadPart % FILE_SPARSE_GRANULARITY)	/* First block */
+	chunk_len = roundup2 (off.QuadPart, FILE_SPARSE_GRANULARITY) - off.QuadPart;
+      else
+	chunk_len = FILE_SPARSE_GRANULARITY;
+      if (chunk_len > length)			/* First or last block */
+	chunk_len = length;
+
+      /* Check if the current chunk is within data or hole */
+      if (has_attribute (FILE_ATTRIBUTE_SPARSE_FILE)
+	  && off.QuadPart < fsi.EndOfFile.QuadPart)
+	{
+	  in_data = false;
+	  for (size_t idx = start_idx; idx < data_chunk_count; ++idx)
+	    if (off.QuadPart >= out[idx].FileOffset.QuadPart)
+	      {
+		/* Skip entries with lower start address next time. */
+		start_idx = idx;
+		if (off.QuadPart < out[idx].FileOffset.QuadPart
+				   + out[idx].Length.QuadPart)
+		  {
+		    in_data = true;
+		    break;
+		  }
+	      }
+	}
+
+      /* Eventually, write zeros into the block.  Completely zero out data
+	 blocks, just write a single zero to former holes in sparse files.
+	 If __FALLOC_FL_ZERO_HOLES has been specified, only write to holes. */
+      if (!(mode & __FALLOC_FL_ZERO_HOLES) || !in_data)
+	{
+	  status = NtWriteFile (zo_handle, NULL, NULL, NULL, &io, nullbuf,
+				in_data ? chunk_len : 1, &off, NULL);
+	  if (!NT_SUCCESS (status))
+	    {
+	      res = geterrno_from_nt_status (status);
+	      break;
+	    }
+	}
+
+      off.QuadPart += chunk_len;
+      length -= chunk_len;
     }
+
+  NtClose (zo_handle);
   return res;
 }
 
+int
+fhandler_disk_file::fallocate (int mode, off_t offset, off_t length)
+{
+  if (length < 0 || !get_handle ())
+    return EINVAL;
+  if (pc.isdir ())
+    return EISDIR;
+  if (!(get_access () & GENERIC_WRITE))
+    return EBADF;
+
+  switch (mode)
+    {
+    case 0:
+    case __FALLOC_FL_TRUNCATE:
+    case FALLOC_FL_KEEP_SIZE:
+      return falloc_allocate (mode, offset, length);
+    case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
+      /* Only if the filesystem supports it... */
+      if (!(pc.fs_flags () & FILE_SUPPORTS_SPARSE_FILES))
+	return EOPNOTSUPP;
+      return falloc_punch_hole (offset, length);
+    case FALLOC_FL_ZERO_RANGE:
+    case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
+      return falloc_zero_range (mode, offset, length);
+    default:
+      break;
+    }
+  return EINVAL;
+}
+
 int
 fhandler_disk_file::link (const char *newpath)
 {
@@ -1741,7 +1952,7 @@ fhandler_disk_file::pwrite (void *buf, size_t count, off_t offset, void *aio)
 	  && NT_SUCCESS (NtQueryInformationFile (get_handle (),
 						 &io, &fsi, sizeof fsi,
 						 FileStandardInformation))
-	  && offset >= fsi.EndOfFile.QuadPart + (128 * 1024))
+	  && span_sparse_chunk (offset, fsi.EndOfFile.QuadPart))
 	{
 	  NTSTATUS status;
 	  status = NtFsControlFile (get_handle (), NULL, NULL, NULL,
diff --git a/winsup/cygwin/include/fcntl.h b/winsup/cygwin/include/fcntl.h
index b38dfa50bcc1..340aed4402a7 100644
--- a/winsup/cygwin/include/fcntl.h
+++ b/winsup/cygwin/include/fcntl.h
@@ -51,6 +51,7 @@ details. */
 #define FALLOC_FL_KEEP_SIZE		0x1000
 /* Internal flags */
 #define __FALLOC_FL_TRUNCATE		0x2000
+#define __FALLOC_FL_ZERO_HOLES		0x4000
 #endif
 
 __BEGIN_DECLS
diff --git a/winsup/cygwin/local_includes/fhandler.h b/winsup/cygwin/local_includes/fhandler.h
index 54e0c6e804c8..1dc02608ba8a 100644
--- a/winsup/cygwin/local_includes/fhandler.h
+++ b/winsup/cygwin/local_includes/fhandler.h
@@ -14,6 +14,7 @@ details. */
 #include <cygwin/_socketflags.h>
 #include <cygwin/_ucred.h>
 #include <sys/un.h>
+#include <sys/param.h>
 
 /* It appears that 64K is the block size used for buffered I/O on NT.
    Using this blocksize in read/write calls in the application results
@@ -37,6 +38,15 @@ details. */
    ERROR_NOT_ENOUGH_MEMORY occurs in win7 if this value is used. */
 #define INREC_SIZE 2048
 
+/* Helper function to allow checking if some offset in a file is so far
+   beyond EOF, that at least one sparse chunk fits into the span. */
+inline bool
+span_sparse_chunk (off_t new_pos, off_t old_eof)
+{
+  return roundup2 (old_eof, FILE_SPARSE_GRANULARITY) + FILE_SPARSE_GRANULARITY
+	 <= rounddown (new_pos, FILE_SPARSE_GRANULARITY);
+}
+
 extern const char *windows_device_names[];
 extern struct __cygwin_perfile *perfile_table;
 #define __fmode (*(user_data->fmode_ptr))
@@ -1708,6 +1718,10 @@ class fhandler_disk_file: public fhandler_base
   uint64_t fs_ioc_getflags ();
   int fs_ioc_setflags (uint64_t);
 
+  falloc_allocate (int, off_t, off_t);
+  falloc_punch_hole (off_t, off_t);
+  falloc_zero_range (int, off_t, off_t);
+
  public:
   fhandler_disk_file ();
   fhandler_disk_file (path_conv &pc);
diff --git a/winsup/cygwin/local_includes/ntdll.h b/winsup/cygwin/local_includes/ntdll.h
index a1a9f7f8d15c..a9ccd16a92fd 100644
--- a/winsup/cygwin/local_includes/ntdll.h
+++ b/winsup/cygwin/local_includes/ntdll.h
@@ -45,6 +45,9 @@ extern GUID __cygwin_socket_guid;
 #define FILE_WRITE_TO_END_OF_FILE      (-1LL)
 #define FILE_USE_FILE_POINTER_POSITION (-2LL)
 
+/* Sparsification granularity on NTFS. */
+#define FILE_SPARSE_GRANULARITY	       (64 * 1024)
+
 /* Device Characteristics. */
 #define FILE_REMOVABLE_MEDIA           0x00000001
 #define FILE_READ_ONLY_DEVICE          0x00000002
@@ -390,6 +393,11 @@ typedef struct _FILE_ALL_INFORMATION {		// 18
   FILE_NAME_INFORMATION      NameInformation;
 } FILE_ALL_INFORMATION, *PFILE_ALL_INFORMATION;
 
+typedef struct _FILE_ALLOCATION_INFORMATION	// 19
+{
+  LARGE_INTEGER AllocationSize;
+} FILE_ALLOCATION_INFORMATION, *PFILE_ALLOCATION_INFORMATION;
+
 typedef struct _FILE_END_OF_FILE_INFORMATION	// 20
 {
   LARGE_INTEGER EndOfFile;

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-11-28 10:03 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-28 10:03 [newlib-cygwin/main] Cygwin: fallocate(2): handle FALLOC_FL_PUNCH_HOLE and FALLOC_FL_ZERO_RANGE Corinna Vinschen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).