From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2013) id 783C33858284; Mon, 8 Jan 2024 14:53:41 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 783C33858284 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1704725621; bh=t88J/WdSBrY9ZdXTeR0DiIqMo7GTXWtX3bxIbDT+aA8=; h=From:To:Subject:Date:From; b=ibRtFQs7i2uKG6dUhksBc/p7mO/hHYWJLR5Dxta4Cf60LSdKwck2EKmhEYknqYo9b okqrIzqvXifRwVXdoT83q7iz0UXfgkubYhd0ZKFOd0aYmAtHrDHXcEin8YBOT4h5pv r4rGvSp4hHjKpL8y13vStym7qqhT/uVMYmjx82yc= MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset="utf-8" From: Mike Fabian To: glibc-cvs@sourceware.org Subject: [glibc] localedata: unicode-gen: Remove redundant \s* from regexp, fix comments X-Act-Checkin: glibc X-Git-Author: Mike FABIAN X-Git-Refname: refs/heads/master X-Git-Oldrev: 6f87f46bf4277d1a0d27b2507603e0acc059e6cb X-Git-Newrev: d333a2e0fb3a8045d2667847b8c99ee82a6bbdd2 Message-Id: <20240108145341.783C33858284@sourceware.org> Date: Mon, 8 Jan 2024 14:53:41 +0000 (GMT) List-Id: https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=d333a2e0fb3a8045d2667847b8c99ee82a6bbdd2 commit d333a2e0fb3a8045d2667847b8c99ee82a6bbdd2 Author: Mike FABIAN Date: Mon Jan 8 10:05:13 2024 +0100 localedata: unicode-gen: Remove redundant \s* from regexp, fix comments Diff: --- localedata/charmaps/UTF-8 | 2 +- localedata/unicode-gen/utf8_gen.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/localedata/charmaps/UTF-8 b/localedata/charmaps/UTF-8 index 94f20d5e87..b545cc9b25 100644 --- a/localedata/charmaps/UTF-8 +++ b/localedata/charmaps/UTF-8 @@ -49858,7 +49858,7 @@ END CHARMAP % Character width according to Unicode 15.1.0. % - Default width is 1. % - Double-width characters have width 2; generated from -% "grep '^[^;]*;[WF]' EastAsianWidth.txt" +% "grep '^[^;]*;\s*[WF]' EastAsianWidth.txt" % - Non-spacing characters have width 0; generated from PropList.txt or % "grep '^[^;]*;[^;]*;[^;]*;[^;]*;NSM;' UnicodeData.txt" % - Format control characters have width 0; generated from diff --git a/localedata/unicode-gen/utf8_gen.py b/localedata/unicode-gen/utf8_gen.py index 5e77333bb4..f744e87ffc 100755 --- a/localedata/unicode-gen/utf8_gen.py +++ b/localedata/unicode-gen/utf8_gen.py @@ -204,7 +204,7 @@ def write_header_width(outfile, unicode_version): + '{:s}.\n'.format(unicode_version)) outfile.write('% - Default width is 1.\n') outfile.write('% - Double-width characters have width 2; generated from\n') - outfile.write('% "grep \'^[^;]*;[WF]\' EastAsianWidth.txt"\n') + outfile.write('% "grep \'^[^;]*;\\s*[WF]\' EastAsianWidth.txt"\n') outfile.write('% - Non-spacing characters have width 0; ' + 'generated from PropList.txt or\n') outfile.write('% "grep \'^[^;]*;[^;]*;[^;]*;[^;]*;NSM;\' ' @@ -339,8 +339,8 @@ if __name__ == "__main__": with open(ARGS.east_asian_with_file, mode='r') as EAST_ASIAN_WIDTH_FILE: EAST_ASIAN_WIDTH_LINES = [] for LINE in EAST_ASIAN_WIDTH_FILE: - # If characters from EastAasianWidth.txt which are from - # from reserved ranges (i.e. not yet assigned code points) + # If characters from EastAsianWidth.txt which are from + # reserved ranges (i.e. not yet assigned code points) # are added to the WIDTH section of the UTF-8 file, then # “make check” produces “Unknown Character” errors for # these code points because such unassigned code points @@ -350,7 +350,7 @@ if __name__ == "__main__": # the EastAsianWidth.txt file. if re.match(r'.*\.\..*', LINE): continue - if re.match(r'^[^;]*;\s*[WF]\s*', LINE): + if re.match(r'^[^;]*;\s*[WF]', LINE): EAST_ASIAN_WIDTH_LINES.append(LINE.strip()) with open(ARGS.prop_list_file, mode='r') as PROP_LIST_FILE: PROP_LIST_LINES = []