From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 7905) id 661433857B8B; Tue, 16 Jan 2024 18:07:25 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 661433857B8B DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1705428445; bh=y5POX/pIgitLobyXy0wVg19SgZrpa0KNbqCrn/KJzPc=; h=From:To:Subject:Date:From; b=qQebTWWL7Zys5xbWMSaamjzBF5w1yIsmzwTqww5nNCUxtzd9JKwv6xX1rltHWf5bH kinuI8Fmf8RLjPZzN0ikOOgRSSLgu7RUpNT0Uk9Wjm57V1anP9PVRNL3PV+4catZ2N ruG8FEgTl7YG3Z6nMqjs3ivhbBT+pZzSlDFHyuGc= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Arthur Cohen To: gcc-cvs@gcc.gnu.org Subject: [gcc r14-7920] gccrs: Type annotation for make-unicode-data.py X-Act-Checkin: gcc X-Git-Author: Raiki Tamura X-Git-Refname: refs/heads/trunk X-Git-Oldrev: 18464d32dad7296a370bd8ce6746eac32e00ef52 X-Git-Newrev: 06af44ece16072c7078c1fee283108a12c963ecb Message-Id: <20240116180725.661433857B8B@sourceware.org> Date: Tue, 16 Jan 2024 18:07:25 +0000 (GMT) List-Id: https://gcc.gnu.org/g:06af44ece16072c7078c1fee283108a12c963ecb commit r14-7920-g06af44ece16072c7078c1fee283108a12c963ecb Author: Raiki Tamura Date: Sun Aug 6 18:49:20 2023 +0900 gccrs: Type annotation for make-unicode-data.py gcc/rust/ChangeLog: * util/make-rust-unicode.py: Add type annotation Signed-off-by: Raiki Tamura Diff: --- gcc/rust/util/make-rust-unicode.py | 68 ++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 32 deletions(-) diff --git a/gcc/rust/util/make-rust-unicode.py b/gcc/rust/util/make-rust-unicode.py index eaf2fc8d272..5303440fd25 100644 --- a/gcc/rust/util/make-rust-unicode.py +++ b/gcc/rust/util/make-rust-unicode.py @@ -22,6 +22,10 @@ # > rust-unicode-data.h import sys +from typing import Tuple + +Codepoint = int +Range = Tuple[Codepoint, Codepoint] COPYRIGHT = ( "// Copyright (C) 2020-2023 Free Software Foundation, Inc.\n" @@ -44,25 +48,25 @@ COPYRIGHT = ( ) # Decomposition_Mapping table -decomposition_map = {} +decomposition_map: dict[Codepoint, list[Codepoint]] = {} # Canonical_Combining_Class table -ccc_table = {} +ccc_table: dict[Codepoint, int] = {} # Ranges of codepoints with the Full_Composition_Exclusion property -composition_exclusion_ranges = [] +composition_exclusion_ranges: list[Range] = [] # Ranges of codepoints with the Full_Composition_Exclusion property -alphabetic_ranges = [] +alphabetic_ranges: list[Range] = [] # Ranges of codepoints with NFC_QC=No -nfc_qc_no_ranges = [] +nfc_qc_no_ranges: list[Range] = [] # Ranges of codepoints with NFC_QC=Maybe -nfc_qc_maybe_ranges = [] -numeric_codepoints = [] +nfc_qc_maybe_ranges: list[Range] = [] +numeric_codepoints: list[Codepoint] = [] # Note that an element of range `[m, n]` (a list in python) represents [m, n) -def binary_search_ranges(ranges, target): - low = 0 - high = len(ranges) - 1 +def binary_search_ranges(ranges: list[Range], target: Codepoint) -> int: + low: int = 0 + high: int = len(ranges) - 1 while low <= high: mid = (low + high) // 2 start, end = ranges[mid] @@ -77,8 +81,8 @@ def binary_search_ranges(ranges, target): # Utility function to parse '...' or '' -def parse_codepoint_range(range_str): - codepoint_range = range_str.split("..") +def parse_codepoint_range(range_str: str) -> Range: + codepoint_range: list[str] = range_str.split("..") assert len(codepoint_range) == 1 or len(codepoint_range) == 2, "Invalid format" start_cp, end_cp = 0, 0 if len(codepoint_range) == 1: @@ -89,11 +93,11 @@ def parse_codepoint_range(range_str): # m => [m, m+1) start_cp = int(codepoint_range[0], 16) end_cp = int(codepoint_range[1], 16) + 1 - return [start_cp, end_cp] + return start_cp, end_cp -def read_unicode_data_txt(filepath): - def process_line(line): +def read_unicode_data_txt(filepath: str) -> None: + def process_line(line: str) -> None: rows = line.split(";") if len(rows) != 15: return @@ -124,13 +128,13 @@ def read_unicode_data_txt(filepath): if len(decomp_cps) > 0: decomposition_map[cp] = decomp_cps - with open(sys.argv[1], "r", encoding="UTF-8") as file: + with open(filepath, "r", encoding="UTF-8") as file: while line := file.readline(): process_line(line.rstrip()) -def read_derived_norm_props_txt(filepath): - def process_line(line): +def read_derived_norm_props_txt(filepath: str) -> None: + def process_line(line) -> None: # Ignore comments line = line.split("#")[0] rows = line.split(";") @@ -157,8 +161,8 @@ def read_derived_norm_props_txt(filepath): process_line(line.rstrip()) -def read_derived_core_props_txt(filepath): - def process_line(line): +def read_derived_core_props_txt(filepath: str) -> None: + def process_line(line: str) -> None: # Ignore comments line = line.split("#")[0] rows = line.split(";") @@ -169,7 +173,7 @@ def read_derived_core_props_txt(filepath): rows[1] = rows[1].lstrip().rstrip() if rows[1] != "Alphabetic": return - cp_range = parse_codepoint_range(rows[0]) + cp_range: Range = parse_codepoint_range(rows[0]) alphabetic_ranges.append(cp_range) with open(filepath, "r", encoding="UTF-8") as file: @@ -177,7 +181,7 @@ def read_derived_core_props_txt(filepath): process_line(line.rstrip()) -def write_decomposition(): +def write_decomposition() -> None: print("const std::map> DECOMPOSITION_MAP = {") print(" // clang-format off") for cp in sorted(decomposition_map): @@ -190,7 +194,7 @@ def write_decomposition(): print("};") -def write_recomposition(): +def write_recomposition() -> None: print( "const std::map, uint32_t> RECOMPOSITION_MAP = {{" ) @@ -198,6 +202,8 @@ def write_recomposition(): for cp in decomposition_map: if binary_search_ranges(composition_exclusion_ranges, cp) != -1: continue + d1: Codepoint + d2: Codepoint if len(decomposition_map[cp]) == 1: d1 = decomposition_map[cp][0] d2 = 0 @@ -209,7 +215,7 @@ def write_recomposition(): print("}};") -def write_ccc(): +def write_ccc() -> None: print("const std::map CCC_TABLE = {") print(" // clang-format off") for cp in ccc_table: @@ -218,7 +224,7 @@ def write_ccc(): print("};") -def write_alphabetic(): +def write_alphabetic() -> None: print( "const std::array, NUM_ALPHABETIC_RANGES> ALPHABETIC_RANGES = {{" ) @@ -229,7 +235,7 @@ def write_alphabetic(): print("}};") -def write_numeric(): +def write_numeric() -> None: print("const std::array NUMERIC_CODEPOINTS = {{") print(" // clang-format off") for i, cp in enumerate(numeric_codepoints): @@ -244,13 +250,13 @@ def write_numeric(): print("}};") -def main(): +def main() -> None: if len(sys.argv) != 4: print("too few arguments", file=sys.stderr) exit(-1) - unicode_txt_path = sys.argv[1] - norm_props_txt_path = sys.argv[2] - core_props_txt_path = sys.argv[3] + unicode_txt_path: str = sys.argv[1] + norm_props_txt_path: str = sys.argv[2] + core_props_txt_path: str = sys.argv[3] read_unicode_data_txt(unicode_txt_path) read_derived_norm_props_txt(norm_props_txt_path) @@ -271,8 +277,6 @@ def main(): print() write_recomposition() print() - # write_composition_exclusion() - # print() write_ccc() print() write_alphabetic()