From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail.svario.it (mail.svario.it [84.22.98.252]) by sourceware.org (Postfix) with ESMTPS id 40C9E3858D32 for ; Mon, 27 Feb 2023 12:13:20 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 40C9E3858D32 Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=svario.it Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=svario.it Received: from [IPV6:2a01:c23:6ca5:7800:c4d:8462:3bee:6ce2] (dynamic-2a01-0c23-6ca5-7800-0c4d-8462-3bee-6ce2.c23.pool.telefonica.de [IPv6:2a01:c23:6ca5:7800:c4d:8462:3bee:6ce2]) by mail.svario.it (Postfix) with ESMTPSA id 31E08D1F1E for ; Mon, 27 Feb 2023 13:13:17 +0100 (CET) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=svario.it; s=201710; t=1677499997; bh=q5SyC2NClOPRK/yLKF+34JRWS9PCcbWDOcW+hwhHG9o=; h=Date:From:Subject:To:From; b=eIWwcfliI6AVUc+Y8swIM0MEo/SFj9/bE0I7MIdoad209mTt/5qaNvu56/HQh5UgV FvijOsriHTRvWxC+eiCUPCcC33m0+V65mFzBQFxWH9IcKMYDYhBUVvwbS1xcMuIZWr L8Quwp3yDHrDtBeIuofmDlIxIo9zo/40OHOUwbO78KC0HzUAR2kBMNnJqA2DY+4uHC kxmCssbW0yhGtj5Es6kYl861NKzIjf7mdspsQnNsnbg4JwqSXdRJwD7AP8yy4ydy33 jEs4wJU08TN5TbQBjVKPaEZqQWpm/IAlM3DmGnn8Dw2ITLgd0ASljipWy2CT6pRjjn dEJ91xC8hVoxQ== Message-ID: <6bfff40c-2c4b-c119-116d-7834310299d7@svario.it> Date: Mon, 27 Feb 2023 13:13:16 +0100 MIME-Version: 1.0 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Thunderbird/102.7.1 Content-Language: en-US From: Gioele Barabucci Subject: [PATCH] dtrace: Use deterministic temp file creation for all temp files To: systemtap@sourceware.org Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 7bit X-Spam-Status: No, score=-14.1 required=5.0 tests=BAYES_00,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,GIT_PATCH_0,SPF_HELO_PASS,SPF_PASS,WEIRD_QUOTING autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: `dtrace -G -C` creates temporary files with random filenames. The name of these temporary files gets embedded in the ELF `.symtab` of the final object files, making them always slightly different. This behavior makes all packages that use `dtrace`-produced object files inherently non reproducible. To reproduce this issue: ``` $ git clone https://salsa.debian.org/sssd-team/sssd.git $ cd sssd $ mkdir -p build && cd build/ $ dtrace -C -G -s ../src/systemtap/sssd_probes.d -o stap_generated_probes.o $ readelf --wide --symbols stap_generated_probes.o > sym1.txt $ dtrace -C -G -s ../src/systemtap/sssd_probes.d -o stap_generated_probes.o $ readelf --wide --symbols stap_generated_probes.o > sym2.txt $ diff -u sym1.txt sym2.txt --- sym1.txt 2023-02-27 08:38:48.955299234 +0100 +++ sym2.txt 2023-02-27 08:38:49.103303352 +0100 @@ -2,7 +2,7 @@ Symbol table '.symtab' contains 59 entries: Num: Value Size Type Bind Vis Ndx Name 0: 0000000000 0 NOTYPE LOCAL DEFAULT UND - 1: 0000000000 0 FILE LOCAL DEFAULT ABS .dtrace-temp.4f0bbdda.c + 1: 0000000000 0 FILE LOCAL DEFAULT ABS .dtrace-temp.d20e76c7.c 2: 0000000000 0 SECTION LOCAL DEFAULT 1 .text 3: 0000000000 7 FUNC LOCAL DEFAULT 1 __dtrace 4: 0000000000 0 SECTION LOCAL DEFAULT 5 .debug_info ``` The root cause of this issue is that, although the name of the temporary file is created in a deterministic way (from the SHA256 of the source file), the name of the source file is overwritten with a random name then the `-C` option (`use_cpp`) is used: ``` if s_filename != "" and use_cpp: (ignore, fname) = mkstemp(suffix=".d") cpp = os.environ.get("CPP", "cpp") retcode = call(split(cpp) + [...] + [s_filename, '-o', fname]) if retcode != 0: print("\"cpp includes s_filename\" failed") usage() return 1 s_filename = fname [...] sha = hashlib.sha256() sha.update(s_filename.encode('utf-8')) sha.update(filename.encode('utf-8')) fname = ".dtrace-temp." + sha.hexdigest()[:8] + ".c" ``` To fix this issue, all temporary files are now created using the same deterministic procedure currently used only for the temporary ".c" files. Fixes: https://bugs.debian.org/1032055 Fixes: https://bugs.debian.org/1032056 Signed-off-by: Gioele Barabucci --- dtrace.in | 50 +++++++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/dtrace.in b/dtrace.in index adad99bdb..22c1a9d03 100644 --- a/dtrace.in +++ b/dtrace.in @@ -27,7 +27,6 @@ import time import atexit from shlex import split from subprocess import call -from tempfile import mkstemp try: from pyparsing import alphas, cStyleComment, delimitedList, Group, \ Keyword, lineno, Literal, nestedExpr, nums, oneOf, OneOrMore, \ @@ -278,6 +277,28 @@ class _ReProvider(_HeaderCreator): hdr.close() +def mktemp_determ(sources, suffix): + # for reproducible-builds purposes, use a predictable tmpfile path + sha = hashlib.sha256() + for source in sources: + sha.update(source.encode('utf-8')) + fname = ".dtrace-temp." + sha.hexdigest()[:8] + suffix + tries = 0 + while True: + tries += 1 + if tries > 100: # if file exists due to previous crash or whatever + raise Exception("cannot create temporary file \""+fname+"\"") + try: + wxmode = 'x' if sys.version_info > (3,0) else 'wx' + fdesc = open(fname, mode=wxmode) + break + except FileExistsError: + time.sleep(0.1) # vague estimate of elapsed time for concurrent identical gcc job + pass # Try again + + return fdesc, fname + + def usage(): print("Usage " + sys.argv[0] + " [--help] [-h | -G] [-C [-I]] -s File.d [-o ]") @@ -360,7 +381,7 @@ def main(): return 1 if s_filename != "" and use_cpp: - (ignore, fname) = mkstemp(suffix=".d") + (ignore, fname) = mktemp_determ(["use_cpp", s_filename], suffix=".d") cpp = os.environ.get("CPP", "cpp") retcode = call(split(cpp) + includes + defines + [s_filename, '-o', fname]) if retcode != 0: @@ -399,7 +420,7 @@ def main(): providers = _PypProvider() else: providers = _ReProvider() - (ignore, fname) = mkstemp(suffix=".h") + (fdesc, fname) = mktemp_determ(["build_source", s_filename], suffix=".h") while True: try: providers.probe_write(s_filename, fname) @@ -413,26 +434,9 @@ def main(): else: print("header: " + fname) - # for reproducible-builds purposes, use a predictable tmpfile path - sha = hashlib.sha256() - sha.update(s_filename.encode('utf-8')) - sha.update(filename.encode('utf-8')) - fname = ".dtrace-temp." + sha.hexdigest()[:8] + ".c" - tries = 0 - while True: - tries += 1 - if tries > 100: # if file exists due to previous crash or whatever - print("cannot create temporary file \""+fname+"\"") - return 1 - try: - wxmode = 'x' if sys.version_info > (3,0) else 'wx' - fdesc = open(fname, mode=wxmode) - if not keep_temps: - atexit.register(os.remove, fname) # delete generated source at exit, even if error - break - except: - time.sleep(0.1) # vague estimate of elapsed time for concurrent identical gcc job - pass # Try again + (fdesc, fname) = mktemp_determ(["build_source", s_filename, filename], suffix=".c") + if not keep_temps: + atexit.register(os.remove, fname) # delete generated source at exit, even if error providers.semaphore_write(fdesc) fdesc.close() cc1 = os.environ.get("CC", "gcc") -- 2.39.2