ICU-21980 parse multiple `@missing` lines

author Markus Scherer <markus.icu@gmail.com>

Sun, 29 May 2022 23:52:10 +0000 (16:52 -0700)

committer Markus Scherer <markus.icu@gmail.com>

Thu, 2 Jun 2022 21:29:24 +0000 (21:29 +0000)
author Markus Scherer <markus.icu@gmail.com>
Sun, 29 May 2022 23:52:10 +0000 (16:52 -0700)
committer Markus Scherer <markus.icu@gmail.com>
Thu, 2 Jun 2022 21:29:24 +0000 (21:29 +0000)
diff --git a/tools/unicode/py/preparseucd.py b/tools/unicode/py/preparseucd.py

index ee7e64aecf7b94b7f76a4afe36796d2c79d5a5fc..fec07076b5e2cc9d4101d3e7c7d86842f8211ed4 100755 (executable)
--- a/tools/unicode/py/preparseucd.py
+++ b/tools/unicode/py/preparseucd.py
@@ -353,7 +353,10 @@ def SetPropertyValue(pname, vname, start, end):
  
  _stripped_cp_re = re.compile("([0-9a-fA-F]+)$")
  _stripped_range_re = re.compile("([0-9a-fA-F]+)\.\.([0-9a-fA-F]+)$")
+# Default value for all of Unicode.
  _missing_re = re.compile("# *@missing: *0000\.\.10FFFF *; *(.+)$")
+# Default value for some range.
+_missing2_re = re.compile("# *@missing: *(.+)$")
  
  def ReadUCDLines(in_file, want_ranges=True, want_other=False,
                   want_comments=False, want_missing=False):
@@ -365,6 +368,7 @@ def ReadUCDLines(in_file, want_ranges=True, want_other=False,
      line = line.strip()
      if not line: continue
      if line.startswith("#"):  # whole-line comment
+      parse_data = False
        if want_missing:
          match = _missing_re.match(line)
          if match:
@@ -372,8 +376,15 @@ def ReadUCDLines(in_file, want_ranges=True, want_other=False,
            for i in range(len(fields)): fields[i] = fields[i].strip()
            yield ("missing", line, fields)
            continue
-      if want_comments: yield ("comment", line)
-      continue
+        match = _missing2_re.match(line)
+        if match:
+          # Strip the "missing" comment prefix and fall through to
+          # parse the remainder of the line like regular data.
+          parse_data = True
+          line = match.group(1)
+      if not parse_data:
+        if want_comments: yield ("comment", line)
+        continue
      comment_start = line.find("#")  # inline comment
      if comment_start >= 0:
        line = line[:comment_start].rstrip()
author	Markus Scherer <markus.icu@gmail.com>
	Sun, 29 May 2022 23:52:10 +0000 (16:52 -0700)
committer	Markus Scherer <markus.icu@gmail.com>
	Thu, 2 Jun 2022 21:29:24 +0000 (21:29 +0000)