public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
* RFC: Supporting SOURCE_DATE_EPOCH in ar
@ 2023-08-29  9:46 Nick Clifton
  2023-08-29 16:26 ` Fangrui Song
       [not found] ` <DS7PR12MB576518157026CC92239F6470CBE7A@DS7PR12MB5765.namprd12.prod.outlook.com>
  0 siblings, 2 replies; 3+ messages in thread
From: Nick Clifton @ 2023-08-29  9:46 UTC (permalink / raw)
  To: binutils

[-- Attachment #1: Type: text/plain, Size: 1320 bytes --]

Hi Guys,

  I have been looking improving our support for reproducible builds, and
  in particular how we might be able to use the SOURCE_DATE_EPOCH
  environment variable when creating and maintaining static archives.  I
  think that it can be done, and the attached patch is my proposal for
  implementing this.

  The patch makes several changes:

    * Adding files to a reproduicble archive, either one created with
      --enable-deterministic-archives or with SOURCE_DATA_EPOCH set is
      allowed.  If the update ('u') modifier is in effect it will be
      ignored and the new file(s) will always replace the old file(s).

    * Running "ar ruD" will no longer trigger an error, but instead give
      a warning message and then proceed as outlined above.

    * It adds a new function - bfd_get_current_time() - which can be
      used instead of calling time().  This will return
      SOURCE_DATE_EPOCH if it is defined.  The function can also be used
      to conditionally update the st_mtime field in a stat structure,
      changing it only if SOURCE_DATE_EPOCH is defined.

    * It adds three new tests to the binutils testsuite to check that
      the new behaviour is supported correctly.

  Any comments or suggestions ?
  
Cheers
  Nick
  
https://reproducible-builds.org/docs/source-date-epoch/


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: ar-SOURCE_DATE_EPOCH.patch --]
[-- Type: text/x-patch, Size: 15225 bytes --]

diff --git a/bfd/archive.c b/bfd/archive.c
index 47b37bb6e37..1c7dc44ae8d 100644
--- a/bfd/archive.c
+++ b/bfd/archive.c
@@ -1893,7 +1893,7 @@ bfd_ar_hdr_from_filesystem (bfd *abfd, const char *filename, bfd *member)
     {
       /* Assume we just "made" the member, and fake it.  */
       struct bfd_in_memory *bim = (struct bfd_in_memory *) member->iostream;
-      time (&status.st_mtime);
+      status.st_mtime = bfd_get_current_time (0);
       status.st_uid = getuid ();
       status.st_gid = getgid ();
       status.st_mode = 0644;
@@ -1904,6 +1904,15 @@ bfd_ar_hdr_from_filesystem (bfd *abfd, const char *filename, bfd *member)
       bfd_set_error (bfd_error_system_call);
       return NULL;
     }
+  else
+    {
+      /* The call to stat() above has filled in the st_mtime field
+	 with the real time that the object was modified.  But if
+	 we are trying to generate deterministic archives based upon
+	 the SOURCE_DATE_EPOCH environment variable then we want to
+	 override that.  */
+      status.st_mtime = bfd_get_current_time (status.st_mtime);
+    }
 
   /* If the caller requested that the BFD generate deterministic output,
      fake values for modification time, UID, GID, and file mode.  */
@@ -2529,8 +2538,13 @@ _bfd_bsd_write_armap (bfd *arch,
       struct stat statbuf;
 
       if (stat (bfd_get_filename (arch), &statbuf) == 0)
-	bfd_ardata (arch)->armap_timestamp = (statbuf.st_mtime
-					      + ARMAP_TIME_OFFSET);
+	{
+	  /* If asked, replace the time with a deterministic value. */
+	  statbuf.st_mtime = bfd_get_current_time (statbuf.st_mtime);
+
+	  bfd_ardata (arch)->armap_timestamp = (statbuf.st_mtime
+						+ ARMAP_TIME_OFFSET);
+	}
       uid = getuid();
       gid = getgid();
     }
@@ -2617,7 +2631,8 @@ _bfd_bsd_write_armap (bfd *arch,
 }
 
 /* At the end of archive file handling, update the timestamp in the
-   file, so the linker will accept it.
+   file, so older linkers will accept it.  (This does not apply to
+   ld.bfd or ld.gold).
 
    Return TRUE if the timestamp was OK, or an unusual problem happened.
    Return FALSE if we updated the timestamp.  */
@@ -2642,10 +2657,17 @@ _bfd_archive_bsd_update_armap_timestamp (bfd *arch)
       /* Can't read mod time for some reason.  */
       return true;
     }
+
   if (((long) archstat.st_mtime) <= bfd_ardata (arch)->armap_timestamp)
     /* OK by the linker's rules.  */
     return true;
 
+  if (getenv ("SOURCE_DATE_EPOCH") != NULL
+      && bfd_ardata (arch)->armap_timestamp == bfd_get_current_time (0) + ARMAP_TIME_OFFSET)
+    /* If the archive's timestamp has been set to SOURCE_DATE_EPOCH
+       then leave it as-is.  */
+    return true;
+  
   /* Update the timestamp.  */
   bfd_ardata (arch)->armap_timestamp = archstat.st_mtime + ARMAP_TIME_OFFSET;
 
diff --git a/bfd/archive64.c b/bfd/archive64.c
index 63d2393ccfd..b80f3e8c4e5 100644
--- a/bfd/archive64.c
+++ b/bfd/archive64.c
@@ -186,8 +186,16 @@ _bfd_archive_64_bit_write_armap (bfd *arch,
   memcpy (hdr.ar_name, "/SYM64/", strlen ("/SYM64/"));
   if (!_bfd_ar_sizepad (hdr.ar_size, sizeof (hdr.ar_size), mapsize))
     return false;
-  _bfd_ar_spacepad (hdr.ar_date, sizeof (hdr.ar_date), "%ld",
-		    time (NULL));
+
+  time_t date;
+
+  if (arch->flags & BFD_DETERMINISTIC_OUTPUT)
+    date = 0;
+  else
+    date = bfd_get_current_time (0);
+
+  _bfd_ar_spacepad (hdr.ar_date, sizeof (hdr.ar_date), "%ld", (long) date);
+  
   /* This, at least, is what Intel coff sets the values to.: */
   _bfd_ar_spacepad (hdr.ar_uid, sizeof (hdr.ar_uid), "%ld", 0);
   _bfd_ar_spacepad (hdr.ar_gid, sizeof (hdr.ar_gid), "%ld", 0);
diff --git a/bfd/bfd-in2.h b/bfd/bfd-in2.h
index 1c4f75ae244..eddb9902f5e 100644
--- a/bfd/bfd-in2.h
+++ b/bfd/bfd-in2.h
@@ -2758,6 +2758,8 @@ void *bfd_mmap (bfd *abfd, void *addr, bfd_size_type len,
     void **map_addr, bfd_size_type *map_len)
 ATTRIBUTE_WARN_UNUSED_RESULT;
 
+time_t bfd_get_current_time (time_t now);
+
 /* Extracted from bfdwin.c.  */
 struct _bfd_window_internal;
 
diff --git a/bfd/bfdio.c b/bfd/bfdio.c
index 457562f8c7c..7b8608b45dc 100644
--- a/bfd/bfdio.c
+++ b/bfd/bfdio.c
@@ -828,3 +828,47 @@ const struct bfd_iovec _bfd_memory_iovec =
   &memory_bread, &memory_bwrite, &memory_btell, &memory_bseek,
   &memory_bclose, &memory_bflush, &memory_bstat, &memory_bmmap
 };
+
+/*
+FUNCTION
+	bfd_get_current_time
+
+SYNOPSIS
+	time_t bfd_get_current_time (time_t now);
+
+DESCRIPTION
+	Returns the current time.
+
+	If the environment variable SOURCE_DATE_EPOCH is defined
+	then this is parsed and its value is returned.  Otherwise
+	if the paramter NOW is non-zero, then that is returned.
+	Otherwise the result of the system call "time(NULL)" is
+	returned.
+*/
+
+time_t
+bfd_get_current_time (time_t now)
+{
+  char *source_date_epoch;
+  unsigned long long epoch;
+
+  /* FIXME: We could probably cache this lookup,
+     and the parsing of its value below.  */
+  source_date_epoch = getenv ("SOURCE_DATE_EPOCH");
+
+  if (source_date_epoch == NULL)
+    {
+      if (now)
+	return now;
+      return time (NULL);
+    }
+
+  epoch = strtoull (source_date_epoch, NULL, 0);
+
+  /* If epoch == 0 then something is wrong with SOURCE_DATE_EPOCH,
+     but we do not have an easy way to report it.  Since the presence
+     of the environment variable implies that the user wants
+     deterministic behaviour we just accept the 0 value.  */
+
+  return (time_t) epoch;
+}
diff --git a/binutils/ar.c b/binutils/ar.c
index fe2d971962a..b09a3e04b71 100644
--- a/binutils/ar.c
+++ b/binutils/ar.c
@@ -807,7 +807,7 @@ main (int argc, char **argv)
 	fatal (_("`u' is only meaningful with the `r' option."));
 
       if (newer_only && deterministic > 0)
-        fatal (_("`u' is not meaningful with the `D' option."));
+        non_fatal (_("`u' is not meaningful with the `D' option - replacement will always happen."));
 
       if (newer_only && deterministic < 0 && DEFAULT_AR_DETERMINISTIC)
         non_fatal (_("\
@@ -1482,6 +1482,7 @@ replace_members (bfd *arch, char **files_to_move, bool quick)
 		  && current->arelt_data != NULL)
 		{
 		  bool replaced;
+
 		  if (newer_only)
 		    {
 		      struct stat fsbuf, asbuf;
@@ -1492,12 +1493,33 @@ replace_members (bfd *arch, char **files_to_move, bool quick)
 			    bfd_fatal (*files_to_move);
 			  goto next_file;
 			}
+
 		      if (bfd_stat_arch_elt (current, &asbuf) != 0)
 			/* xgettext:c-format */
 			fatal (_("internal stat error on %s"),
 			       bfd_get_filename (current));
 
 		      if (fsbuf.st_mtime <= asbuf.st_mtime)
+			/* A note about deterministic timestamps:  In an
+			   archive created in a determistic manner the
+			   individual elements will either have a timestamp
+			   of 0 or SOURCE_DATE_EPOCH, depending upon the
+			   method used.  This will be the value retrieved
+			   by bfd_stat_arch_elt().
+
+			   The timestamp in fsbuf.st_mtime however will
+			   definitely be greater than 0, and it is unlikely
+			   to be less than SOURCE_DATE_EPOCH.  (FIXME:
+			   should we test for this case case and issue an
+			   error message ?)
+
+			   So in either case fsbuf.st_mtime > asbuf.st_time
+			   and hence the incoming file will replace the
+			   current file.  Which is what should be expected to
+			   happen.  Deterministic archives have no real sense
+			   of the time/date when their elements were created,
+			   and so any updates to the archive should always
+			   result in replaced files.  */
 			goto next_file;
 		    }
 
diff --git a/binutils/doc/binutils.texi b/binutils/doc/binutils.texi
index 9f80f398c9d..4f9fb1cde9c 100644
--- a/binutils/doc/binutils.texi
+++ b/binutils/doc/binutils.texi
@@ -521,6 +521,10 @@ operation @samp{r} (replace).  In particular, the combination @samp{qu} is
 not allowed, since checking the timestamps would lose any speed
 advantage from the operation @samp{q}.
 
+Note - if an archive has been created in a deterministic manner, eg
+via the use of the @option{D} modifier, then replacement will always
+happen and the @option{u} modifier will be ineffective.
+
 @item U
 @cindex deterministic archives
 @kindex --enable-deterministic-archives
diff --git a/binutils/testsuite/binutils-all/ar.exp b/binutils/testsuite/binutils-all/ar.exp
index b9a325cff4b..a0fa54d866d 100644
--- a/binutils/testsuite/binutils-all/ar.exp
+++ b/binutils/testsuite/binutils-all/ar.exp
@@ -467,6 +467,225 @@ proc deterministic_archive { } {
     pass $testname
 }
 
+# Test replacing a member of a deterministic archive.
+
+proc replacing_deterministic_member { } {
+    global AR
+    global AS
+    global NM
+    global srcdir
+    global subdir
+    global obj
+
+    set testname "replacing deterministic member"
+
+    if [is_remote host] {
+	# The kind of filename trickery that we are about to
+	#  play is hard to do if we have to operate remotely.
+	unsupported $testname
+	return
+    }
+
+    file mkdir tmpdir/ar
+    
+    if ![binutils_assemble $srcdir/$subdir/bintest.s tmpdir/bintest.${obj}] {
+	unsupported $testname
+	return
+    }
+
+    # Wait a second and then build a second object file - with the same name
+    # as the first, but in a different directory.
+    sleep 1
+    if ![binutils_assemble $srcdir/$subdir/copytest.s tmpdir/ar/bintest.${obj}] {
+	unsupported $testname
+	return
+    }
+
+    set archive tmpdir/artest.a
+    set older_objfile tmpdir/bintest.${obj}
+    set newer_objfile tmpdir/ar/bintest.${obj}
+    set older_length [file size $older_objfile]
+    # set newer_length [file size $newer_objfile]
+    
+    remote_file build delete tmpdir/artest.a
+
+    # Build the archive with the *newer* object file.
+    
+    set got [binutils_run $AR "rcD $archive ${newer_objfile}"]
+    if ![string match "" $got] {
+	fail "$testname: (could not build archive)"
+	return
+    }
+
+    # Now replace the newer file with the older one.  On a normal
+    # archive this will not work, but one created to be deterministic
+    # should always replace its members.
+    
+    set got [binutils_run $AR "ruD $archive $older_objfile"]
+    # The archiver will warn that 'u' and 'D' do not work together
+    if ![string match "*not meaningful*" $got] {
+	fail "$testname: (failed to replace file)"
+	return
+    }
+
+    set got [binutils_run $AR "tvO $archive"]
+    if ![string match "rw-r--r-- 0/0 *${older_length} *bintest.${obj} 0x*" $got] {
+	fail "$testname (wrong size, expected: $older_length)"
+	return
+    }
+
+    pass $testname
+}
+
+# Test replacing a member of a non-deterministic archive.
+
+proc replacing_non_deterministic_member { } {
+    global AR
+    global AS
+    global NM
+    global srcdir
+    global subdir
+    global obj
+
+    set testname "replacing non-deterministic member"
+
+    if [is_remote host] {
+	# The kind of filename trickery that we are about to
+	#  play is hard to do if we have to operate remotely.
+	unsupported $testname
+	return
+    }
+
+    file mkdir tmpdir/ar
+    
+    if ![binutils_assemble $srcdir/$subdir/bintest.s tmpdir/bintest.${obj}] {
+	unsupported $testname
+	return
+    }
+
+    # Wait a second and then build a second object file - with the same name
+    # as the first, but in a different directory.
+    sleep 1
+    if ![binutils_assemble $srcdir/$subdir/copytest.s tmpdir/ar/bintest.${obj}] {
+	unsupported $testname
+	return
+    }
+
+    set archive tmpdir/artest.a
+    set older_objfile tmpdir/bintest.${obj}
+    set newer_objfile tmpdir/ar/bintest.${obj}
+    # set older_length [file size $older_objfile]
+    set newer_length [file size $newer_objfile]
+    
+    remote_file build delete tmpdir/artest.a
+
+    # Build the archive with the *newer* object file.
+    
+    set got [binutils_run $AR "rc $archive ${newer_objfile}"]
+    if ![string match "" $got] {
+	fail "$testname: (could not build archive)"
+	return
+    }
+
+    # Now try to replace the newer file with the older one.  This should not work.
+    
+    set got [binutils_run $AR "ru $archive $older_objfile"]
+    if ![string match "" $got] {
+	fail "$testname: (failed to replace file)"
+	return
+    }
+
+    # Since this archive is non-deterministic, we do not know what the
+    # user or group ids will be, so we have to use */* to match them.
+    set got [binutils_run $AR "tvO $archive"]
+    if ![string match "rw-r--r-- */* *${newer_length} *bintest.${obj} 0x*" $got] {
+	fail "$testname (wrong size, expected: $newer_length)"
+	return
+    }
+
+    pass $testname
+}
+
+# Test replacing a member of deterministic archive created by using SOURCE_DATE_EPOCH.
+
+proc replacing_sde_deterministic_member { } {
+    global AR
+    global AS
+    global NM
+    global srcdir
+    global subdir
+    global obj
+
+    set testname "replacing SOURCE_DATE_EPOCH deterministic member"
+
+    if [is_remote host] {
+	# The kind of filename trickery that we are about to
+	#  play is hard to do if we have to operate remotely.
+	unsupported $testname
+	return
+    }
+
+    file mkdir tmpdir/ar
+    
+    if ![binutils_assemble $srcdir/$subdir/bintest.s tmpdir/bintest.${obj}] {
+	unsupported $testname
+	return
+    }
+
+    # Wait a second and then build a second object file - with the same name
+    # as the first, but in a different directory.
+    sleep 1
+    if ![binutils_assemble $srcdir/$subdir/copytest.s tmpdir/ar/bintest.${obj}] {
+	unsupported $testname
+	return
+    }
+
+    set archive tmpdir/artest.a
+    set older_objfile tmpdir/bintest.${obj}
+    set newer_objfile tmpdir/ar/bintest.${obj}
+    set older_length [file size $older_objfile]
+    # set newer_length [file size $newer_objfile]
+    
+    remote_file build delete tmpdir/artest.a
+
+    # Build the archive with the *newer* object file.
+    setenv SOURCE_DATE_EPOCH "1000"
+    
+    set got [binutils_run $AR "rc $archive ${newer_objfile}"]
+    if ![string match "" $got] {
+	fail "$testname: (could not build archive)"
+	unsetenv SOURCE_DATE_EPOCH
+	return
+    }
+
+    # Now replace the newer file with the older one.  On a normal
+    # archive this will not work, but one created to be deterministic
+    # should always replace its members.
+    
+    set got [binutils_run $AR "ru $archive $older_objfile"]
+    if ![string match "" $got] {
+	fail "$testname: (failed to replace file)"
+	unsetenv SOURCE_DATE_EPOCH
+	return
+    }
+
+    # Since this archive has fixed source dates, but non-deterministic
+    # uid and gid values we have to use */* to match them.
+    set got [binutils_run $AR "tvO $archive"]
+    if ![string match "rw-r--r-- */* *${older_length} *bintest.${obj} 0x*" $got] {
+	fail "$testname (wrong size, expected: $older_length)"
+	unsetenv SOURCE_DATE_EPOCH
+	return
+    }
+
+    # FIXME - it would be nice if we could check to see that the time & date
+    # in the archive listing matches SOURCE_DATE_EPOCH.
+
+    unsetenv SOURCE_DATE_EPOCH
+    pass $testname
+}
+
+
 proc unique_symbol { } {
     global AR
     global AS
@@ -797,6 +1016,9 @@ if { [file exists $base_dir/bfdtest1] && [file exists $base_dir/bfdtest2] } {
 symbol_table
 argument_parsing
 deterministic_archive
+replacing_deterministic_member
+replacing_non_deterministic_member
+replacing_sde_deterministic_member
 delete_an_element
 move_an_element
 empty_archive

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-09-01 12:01 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-29  9:46 RFC: Supporting SOURCE_DATE_EPOCH in ar Nick Clifton
2023-08-29 16:26 ` Fangrui Song
     [not found] ` <DS7PR12MB576518157026CC92239F6470CBE7A@DS7PR12MB5765.namprd12.prod.outlook.com>
2023-09-01 12:01   ` Nick Clifton

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).