From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 42375 invoked by alias); 24 Nov 2017 13:52:46 -0000 Mailing-List: contact cygwin-apps-cvs-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Post: List-Help: , Sender: cygwin-apps-cvs-owner@sourceware.org Received: (qmail 42352 invoked by uid 9795); 24 Nov 2017 13:52:46 -0000 Date: Fri, 24 Nov 2017 13:52:00 -0000 Message-ID: <20171124135245.42302.qmail@sourceware.org> From: jturney@sourceware.org To: cygwin-apps-cvs@sourceware.org Subject: [calm - Cygwin server-side packaging maintenance script] branch master, updated. 20171113-15-g353e677 X-Git-Refname: refs/heads/master X-Git-Reftype: branch X-Git-Oldrev: e6231b4cff0eba5f0153b7e8f0e869bb1339e9e3 X-Git-Newrev: 353e677e736a9ee8094c0c1eaa5da6a89eb6b59c X-SW-Source: 2017-q4/txt/msg00036.txt.bz2 https://sourceware.org/git/gitweb.cgi?p=cygwin-apps/calm.git;h=353e677e736a9ee8094c0c1eaa5da6a89eb6b59c commit 353e677e736a9ee8094c0c1eaa5da6a89eb6b59c Author: Jon Turney Date: Tue Nov 21 22:12:18 2017 +0000 Repository paths generated by mkgitolite should start with git/ https://sourceware.org/git/gitweb.cgi?p=cygwin-apps/calm.git;h=7792e5f886ac7143e5f59f345023e822df483523 commit 7792e5f886ac7143e5f59f345023e822df483523 Author: Jon Turney Date: Mon Nov 20 15:52:40 2017 +0000 Consider external-source: in 'empty but not obsolete' check https://sourceware.org/git/gitweb.cgi?p=cygwin-apps/calm.git;h=3268914882eed98fc8849f83e01163728692e379 commit 3268914882eed98fc8849f83e01163728692e379 Author: Jon Turney Date: Mon Nov 20 15:50:21 2017 +0000 Don't warn about directories which just contain .sum files https://sourceware.org/git/gitweb.cgi?p=cygwin-apps/calm.git;h=25ab26cdc4c8b75c370cdd2b2685a222ba72b65a commit 25ab26cdc4c8b75c370cdd2b2685a222ba72b65a Author: Jon Turney Date: Mon Nov 20 15:49:38 2017 +0000 Fix logging of bad sha512.sum line not to include newline https://sourceware.org/git/gitweb.cgi?p=cygwin-apps/calm.git;h=2157c661f9e465c82fe557c46b130c44ec0edac3 commit 2157c661f9e465c82fe557c46b130c44ec0edac3 Author: Jon Turney Date: Mon Nov 20 12:46:30 2017 +0000 Suppress empty depends:, obsoletes:, build-depends: https://sourceware.org/git/gitweb.cgi?p=cygwin-apps/calm.git;h=982ee146a7d396699455fab6b966aec5ae0f0ffe commit 982ee146a7d396699455fab6b966aec5ae0f0ffe Author: Jon Turney Date: Wed Nov 15 18:37:06 2017 +0000 Various fixes and improvements to dedup tool https://sourceware.org/git/gitweb.cgi?p=cygwin-apps/calm.git;h=af4b37a92aa0b6a96f4770fc6b697c2dc5e6f1ba commit af4b37a92aa0b6a96f4770fc6b697c2dc5e6f1ba Author: Jon Turney Date: Tue Nov 14 15:37:12 2017 +0000 Add a tool for finding duplicates https://sourceware.org/git/gitweb.cgi?p=cygwin-apps/calm.git;h=5917b6dcfb55ea65c05e07f8ebe8f3f688bfdbb2 commit 5917b6dcfb55ea65c05e07f8ebe8f3f688bfdbb2 Author: Jon Turney Date: Wed Nov 15 10:48:19 2017 +0000 Add a tool for migrating setup.hint to pvr.hint https://sourceware.org/git/gitweb.cgi?p=cygwin-apps/calm.git;h=9793aa7f8dad0a99841e40166a31dfa9ad9d9768 commit 9793aa7f8dad0a99841e40166a31dfa9ad9d9768 Author: Jon Turney Date: Fri Nov 17 11:47:01 2017 +0000 Rationalize the way we run ad-hoc tools Diff: --- calm-tool.sh | 3 + calm/dedupsrc.py | 76 ++++++++++++++++++--- calm/find-duplicates.py | 174 +++++++++++++++++++++++++++++++++++++++++++++++ calm/hint-migrate.py | 104 ++++++++++++++++++++++++++++ calm/hint.py | 1 + calm/mkgitoliteconf.py | 2 +- calm/package.py | 32 ++++++--- calm/tool.py | 35 ++++++++++ setup.py | 3 +- 9 files changed, 409 insertions(+), 21 deletions(-) diff --git a/calm-tool.sh b/calm-tool.sh new file mode 100755 index 0000000..a9d0bc5 --- /dev/null +++ b/calm-tool.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +export PYTHONPATH=$(dirname "$0") +exec python3 -m calm.$1 "${@:2}" diff --git a/calm/dedupsrc.py b/calm/dedupsrc.py old mode 100755 new mode 100644 index 8462fa7..d8a8e84 --- a/calm/dedupsrc.py +++ b/calm/dedupsrc.py @@ -24,6 +24,7 @@ # # Move a given source archive to src/ (assuming it is indentical in x86/ and # x86_64/) and adjust hints appropriately. +# (XXX: could probably be extended to move to noarch/ if not source, as well) # import argparse @@ -35,6 +36,8 @@ import sys from . import common_constants from . import hint +binary_only_hints = ['requires', 'depends', 'obsoletes', 'external-source'] + # # # @@ -45,6 +48,38 @@ def hint_file_write(fn, hints): for k, v in hints.items(): print("%s: %s" % (k, v), file=f) + +# +# +# + +def invent_sdesc(path, vr): + for (dirpath, subdirs, files) in os.walk(path): + # debuginfo packages never have a good sdesc + if 'debuginfo' in dirpath: + continue + + # but just pick the sdesc from first sub-package which has one ... + for f in files: + if re.match('^.*-' + re.escape(vr) + '.hint$', f): + hints = hint.hint_file_parse(os.path.join(dirpath, f), hint.pvr) + if 'sdesc' in hints: + sdesc = hints['sdesc'] + + # ... which doesn't contain 'Obsoleted' + if 'Obsoleted' in sdesc: + continue + + # remove anything inside parentheses at the end of quoted + # sdesc + sdesc = re.sub(r'"(.*)"', r'\1', sdesc) + sdesc = re.sub(r'(\(.*?\))$', '', sdesc) + sdesc = sdesc.strip() + sdesc = '"' + sdesc + '"' + + return sdesc + + return None # # # @@ -81,21 +116,37 @@ def dedup(archive, relarea): hints[arch] = hint.hint_file_parse(hint_pathname, hint.pvr) + # remove hints which only have meaning for binary packages + # + # (requires: tends to have libgcc1 more often on x86, so otherwise this + # would cause spurious differences between hints to be reported) + for h in binary_only_hints: + if h in hints[arch]: + del hints[arch][h] + if hints['x86'] != hints['x86_64']: print('hints for %s-%s differ between arches' % (p, vr)) return 1 + if ('skip' in hints['x86']) and (len(hints['x86']) == 1): + print('hints for %s-%s is skip: only' % (p, vr)) + hints['x86']['category'] = '' + # if hint only contains skip:, try to come up with a plausible sdesc + sdesc = invent_sdesc(os.path.join(relarea, 'x86', path), vr) + if sdesc: + print('suggested sdesc is %s' % (sdesc)) + hints['x86']['sdesc'] = sdesc + + if 'sdesc' not in hints['x86']: + print('hints for %s-%s has no sdesc:' % (p, vr)) + return 1 + # ensure target directory exists try: os.makedirs(os.path.join(relarea, 'src', path, p + '-src')) except FileExistsError: pass - # move the src files to src/ - for arch in ['x86', 'x86_64']: - print('%s -> %s' % (os.path.join(relarea, arch, path, filename), os.path.join(relarea, 'src', path, p + '-src', to_filename))) - os.rename(os.path.join(relarea, arch, path, filename), os.path.join(relarea, 'src', path, p + '-src', to_filename)) - # write .hint file for new -src package src_hints = copy.copy(hints['x86']) @@ -104,16 +155,21 @@ def dedup(archive, relarea): sdesc += ' (source code)' src_hints['sdesc'] = '"' + sdesc + '"' - if 'requires' in src_hints: - del src_hints['requires'] + if 'Source' not in src_hints['category']: + src_hints['category'] = src_hints['category'] + ' Source' - if 'external-source' in src_hints: - del src_hints['external-source'] + if 'parse-warnings' in src_hints: + del src_hints['parse-warnings'] to_hint_pathname = os.path.join(relarea, 'src', path, p + '-src', to_hint_filename) print('writing %s' % (to_hint_pathname)) hint_file_write(to_hint_pathname, src_hints) + # move the src files to src/ + for arch in ['x86', 'x86_64']: + print('%s -> %s' % (os.path.join(relarea, arch, path, filename), os.path.join(relarea, 'src', path, p + '-src', to_filename))) + os.rename(os.path.join(relarea, arch, path, filename), os.path.join(relarea, 'src', path, p + '-src', to_filename)) + # adjust external-source in .hint for all subpackages for arch in ['x86', 'x86_64']: for (dirpath, subdirs, files) in os.walk(os.path.join(relarea, arch, path)): @@ -122,6 +178,8 @@ def dedup(archive, relarea): if filename in files: hint_pathname = os.path.join(dirpath, filename) hints = hint.hint_file_parse(hint_pathname, hint.pvr) + if 'parse-warnings' in hints: + del hints['parse-warnings'] if ('skip' in hints): # p was source only, so no package remains print('removing %s' % (hint_pathname)) diff --git a/calm/find-duplicates.py b/calm/find-duplicates.py new file mode 100644 index 0000000..ec850a4 --- /dev/null +++ b/calm/find-duplicates.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2017 Jon Turney +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# + +import argparse +import hashlib +import re +import os +import sys +import tarfile + +from . import common_constants + +# +# look for archives which are duplicated between x86 and x86_64 +# (these should probably be moved to noarch or src) +# + +# +# helper function to compute sha512 for a particular file +# (block_size should be some multiple of sha512 block size which can be +# efficiently read) +# + + +def sha512_file(f, block_size=256 * 128): + sha512 = hashlib.sha512() + + for chunk in iter(lambda: f.read(block_size), b''): + sha512.update(chunk) + + return sha512.hexdigest() + +# +# +# + + +class TarMemberInfo: + def __init__(self, info, sha512): + self.info = info + self.sha512 = sha512 + + +def read_tar(f): + result = {} + + try: + with tarfile.open(f) as t: + for m in t: + if m.isfile(): + f = t.extractfile(m) + sha512 = sha512_file(f) + else: + sha512 = None + result[m.name] = TarMemberInfo(m, sha512) + except tarfile.ReadError: + # if we can't read the tar archive, we should never consider it to have + # the same contents as another tar archive... + result[f] = None + + return result + +# +# +# + + +def compare_archives(f1, f2): + # for speed, first check that archives are of the same size + if os.path.getsize(f1) != os.path.getsize(f2): + return 'different archive size' + + # if they are both compressed empty files (rather than compressed empty tar + # archives), they are the same + if os.path.getsize(f1) <= 32: + return None + + t1 = read_tar(f1) + t2 = read_tar(f2) + + if t1.keys() != t2.keys(): + return 'different member lists' + + for m in t1: + # compare size of member + if t1[m].info.size != t2[m].info.size: + return 'different size for member %s' % m + + # compare type of member + if t1[m].info.type != t2[m].info.type: + return 'different type for member %s' % m + + # for files, compare hash of file content + if t1[m].info.isfile(): + if t1[m].sha512 != t2[m].sha512: + return 'different hash for member %s' % m + # for links, compare target + elif t1[m].info.islnk() or t1[m].info.issym(): + if t1[m].info.linkname != t2[m].info.linkname: + return 'different linkname for member %s' % m + + # permitted differences: mtime, mode, owner uid/gid + + return None + +# +# +# + + +def find_duplicates(args): + basedir = os.path.join(args.rel_area, common_constants.ARCHES[0], 'release') + + for (dirpath, subdirs, files) in os.walk(basedir): + relpath = os.path.relpath(dirpath, basedir) + otherdir = os.path.join(args.rel_area, common_constants.ARCHES[1], 'release', relpath) + + for f in files: + # not an archive + if not re.match(r'^.*\.tar\.(bz2|gz|lzma|xz)$', f): + continue + + f1 = os.path.join(dirpath, f) + f2 = os.path.join(otherdir, f) + + if os.path.exists(f2): + difference = compare_archives(f1, f2) + if difference is None: + print(os.path.join('release', relpath, f)) + elif args.verbose: + print('%s: %s' % (os.path.join('release', relpath, f), difference)) + +# +# +# + + +def main(): + relarea_default = common_constants.FTP + + parser = argparse.ArgumentParser(description='Source package deduplicator') + parser.add_argument('--releasearea', action='store', metavar='DIR', help="release directory (default: " + relarea_default + ")", default=relarea_default, dest='rel_area') + parser.add_argument('-v', '--verbose', action='count', dest='verbose', help='verbose output') + (args) = parser.parse_args() + + return find_duplicates(args) + + +# +# +# + +if __name__ == "__main__": + sys.exit(main()) diff --git a/calm/hint-migrate.py b/calm/hint-migrate.py new file mode 100644 index 0000000..4d8156e --- /dev/null +++ b/calm/hint-migrate.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2017 Jon Turney +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# + +import argparse +import re +import os +import shutil +import sys + +from . import common_constants +from . import hint + +# +# migrate setup.hint to pvr.hint +# +# (just copy setup.hint to any missing pvr.hint. we don't need to bother +# cleaning up setup.hint which are no longer needed, as calm can do that) +# + + +def hint_migrate(args): + for arch in common_constants.ARCHES + ['noarch']: + basedir = os.path.join(args.rel_area, arch, 'release') + + for (dirpath, subdirs, files) in os.walk(basedir): + relpath = os.path.relpath(dirpath, basedir) + + if 'setup.hint' not in files: + continue + setup_hint_fn = os.path.join(dirpath, 'setup.hint') + + migrate = set() + for f in files: + match = re.match(r'^(.*?)(-src|)\.tar\.(bz2|gz|lzma|xz)$', f) + + # not an archive? + if not match: + continue + + pvr = match.group(1) + + # pvr.hint already exists? + if os.path.exists(os.path.join(dirpath, pvr + '.hint')): + continue + + migrate.add(pvr) + + # nothing to migrate + if not migrate: + continue + + # does the setup.hint parse as a pvr.hint + # (i.e. does it not contain version keys) + hints = hint.hint_file_parse(setup_hint_fn, hint.pvr) + if 'parse-errors' in hints: + print("can't migrate %s as it contains version keys" % (setup_hint_fn)) + continue + + for pvr in migrate: + pvr_hint_fn = os.path.join(dirpath, pvr + '.hint') + print('copy %s -> %s' % (setup_hint_fn, pvr_hint_fn)) + shutil.copy2(setup_hint_fn, pvr_hint_fn) + + +# +# +# + +def main(): + relarea_default = common_constants.FTP + + parser = argparse.ArgumentParser(description='setup.hint migrator') + parser.add_argument('--releasearea', action='store', metavar='DIR', help="release directory (default: " + relarea_default + ")", default=relarea_default, dest='rel_area') + (args) = parser.parse_args() + + return hint_migrate(args) + + +# +# +# + +if __name__ == "__main__": + sys.exit(main()) diff --git a/calm/hint.py b/calm/hint.py index 0c9fd50..f454fa8 100755 --- a/calm/hint.py +++ b/calm/hint.py @@ -124,6 +124,7 @@ categories = ['accessibility', 'science', 'security', 'shells', + 'source', # added to all source packages created by deduplicator to ensure they have a category 'sugar', 'system', 'tcl', diff --git a/calm/mkgitoliteconf.py b/calm/mkgitoliteconf.py index 374965e..bf243e6 100755 --- a/calm/mkgitoliteconf.py +++ b/calm/mkgitoliteconf.py @@ -77,7 +77,7 @@ def do_main(args): if p.startswith('_'): p = p[1:] - print("repo cygwin-packages/%s" % (p)) + print("repo git/cygwin-packages/%s" % (p)) print("C = %s" % (users)) print("RW = %s" % (users)) print("owner = %s" % (owner)) diff --git a/calm/package.py b/calm/package.py index 2d00299..73ba0ed 100755 --- a/calm/package.py +++ b/calm/package.py @@ -227,7 +227,7 @@ def read_package(packages, basedir, dirpath, files, strict=False, remove=[], upl if match: sha512[match.group(2)] = match.group(1) else: - logging.warning("bad line '%s' in sha512.sum for package '%s'" % (l, p)) + logging.warning("bad line '%s' in sha512.sum for package '%s'" % (l.strip(), p)) # discard obsolete md5.sum if 'md5.sum' in files: @@ -350,9 +350,14 @@ def read_package(packages, basedir, dirpath, files, strict=False, remove=[], upl packages[p].path = relpath packages[p].skip = any(['skip' in version_hints[vr] for vr in version_hints]) - elif (len(files) > 0) and (relpath.count(os.path.sep) > 1): - logging.log(strict_lvl, "no .hint files in %s but has files: %s" % (dirpath, ', '.join(files))) - warnings = True + elif (relpath.count(os.path.sep) > 1): + for s in ['md5.sum', 'sha512.sum']: + if s in files: + files.remove(s) + + if len(files) > 0: + logging.log(strict_lvl, "no .hint files in %s but has files: %s" % (dirpath, ', '.join(files))) + warnings = True if strict: return warnings @@ -623,11 +628,10 @@ def validate_packages(args, packages): # If the install tarball is empty and there is no source tarball, we # should probably be marked obsolete - # (XXX: should consider external-source: ?) if not packages[p].skip: for vr in packages[p].version_hints: if '_obsolete' not in packages[p].version_hints[vr].get('category', ''): - if 'source' not in packages[p].vermap[vr]: + if ('source' not in packages[p].vermap[vr]) and ('external-source' not in packages[p].version_hints[vr]): if 'install' in packages[p].vermap[vr]: if packages[p].tar(vr, 'install').is_empty: if p in past_mistakes.empty_but_not_obsolete: @@ -671,6 +675,16 @@ def validate_packages(args, packages): packages[es_p].is_used_by.add(p) continue + # this is a bodge to follow external-source: which hasn't been + # updated following a source package de-duplication + es_p = es_p + '-src' + if es_p in packages: + if 'source' in packages[es_p].vermap[v]: + logging.warning("package '%s' version '%s' external-source: should be %s" % (p, v, es_p)) + packages[es_p].tar(v, 'source').is_used = True + packages[es_p].is_used_by.add(p) + continue + # unless this package is marked as 'self-source' if p in past_mistakes.self_source: continue @@ -899,13 +913,13 @@ def write_setup_ini(args, packages, arch): else: logging.warning("package '%s' version '%s' has no source in external-source '%s'" % (p, version, s)) - if 'depends' in packages[p].version_hints[version]: + if packages[p].version_hints[version].get('depends', ''): print("depends: %s" % packages[p].version_hints[version]['depends'], file=f) - if 'obsoletes' in packages[p].version_hints[version]: + if packages[p].version_hints[version].get('obsoletes', ''): print("obsoletes: %s" % packages[p].version_hints[version]['obsoletes'], file=f) - if 'build-depends' in packages[p].version_hints[version]: + if packages[p].version_hints[version].get('build-depends', ''): bd = packages[p].version_hints[version]['build-depends'] # Ideally, we'd transform dependency atoms which aren't diff --git a/calm/tool.py b/calm/tool.py new file mode 100644 index 0000000..e7668ff --- /dev/null +++ b/calm/tool.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 +# +# Copyright (c) Jon Turney +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# + +import importlib +import sys + + +def main(): + # extract module name from argv + name = sys.argv[1] + sys.argv[1:] = sys.argv[2:] + + # dispatch to main() of tool module + module = importlib.import_module('calm.' + name) + sys.exit(module.main()) diff --git a/setup.py b/setup.py index 303277c..94c78ca 100644 --- a/setup.py +++ b/setup.py @@ -13,8 +13,7 @@ setup( 'console_scripts': [ 'calm = calm.calm:main', 'mksetupini = calm.mksetupini:main', - 'calm-mkgitoliteconf = calm.mkgitoliteconf:main', - 'dedup-source = calm.dedupsrc:main', + 'calm-tool = calm.tool:main', ], }, url='https://cygwin.com/git/?p=cygwin-apps/calm.git',