]> granicus.if.org Git - icu/commitdiff
ICU-21755 commit checker: add support for COMMIT_METADATA.md file
authorSteven R. Loomis <srl295@gmail.com>
Tue, 26 Jul 2022 17:19:55 +0000 (12:19 -0500)
committerSteven R. Loomis <srl295@gmail.com>
Thu, 13 Oct 2022 17:05:17 +0000 (12:05 -0500)
- new option, --commit-metadata= with path to metadata file
- new option, --fix-version=41 (used for SKIP sections)
- scaffolding for 'bad commits' list
- new module CommitMetadata with unit tests
- sample file format TEST_COMMIT_METADATA.md
- such commits are skipped
- refactored the commit skipping part (formerly used for cherry pick skips)
- add a report section for skipped commits
- add a cache for JIRA queries (for dev use): --cache-for-dev "/tmp/cldr-commit-cache" - optional
- add an 'excluded commits' section at the bottom
- make sure commit metadata is used to update ticket IDs and messages.

tools/commit-checker/Pipfile
tools/commit-checker/TEST_COMMIT_METADATA.md [new file with mode: 0644]
tools/commit-checker/check.py
tools/commit-checker/commit_metadata.py [new file with mode: 0644]
tools/commit-checker/test_commit_metadata.py [new file with mode: 0644]

index 2abd5e63bf08497d3e4b9b729f7f2cbd6d68886f..d7c9882fe4e23b392e6275bf53604fca8e4e663e 100644 (file)
@@ -1,5 +1,5 @@
 # Copyright (C) 2018 and later: Unicode, Inc. and others.
-# License & terms of use: http://www.unicode.org/copyright.html 
+# License & terms of use: http://www.unicode.org/copyright.html
 # Author: shane@unicode.org
 
 [[source]]
@@ -15,3 +15,4 @@ gitpython = "*"
 jira = "*"
 
 [dev-packages]
+pytest = "*"
diff --git a/tools/commit-checker/TEST_COMMIT_METADATA.md b/tools/commit-checker/TEST_COMMIT_METADATA.md
new file mode 100644 (file)
index 0000000..b6e3592
--- /dev/null
@@ -0,0 +1,25 @@
+# - Commit Metadata
+
+### Copyright (C) 2022 and later: Unicode, Inc. and others.
+### License & terms of use: http://www.unicode.org/copyright.html
+### All lines starting with one hash (#) are significant
+### All lines starting with a dash (-) are significant.
+###
+### The following list of commits is parsed by the ICU/CLDR commit checker.
+### It has the following structure:
+### `- <commit> <bug id or hyphen> <message>`
+
+- 00decafbad CLDR-0000 Bad commit message
+- 50257a7a3eba7bbed634961a2aa74e77b12810bf - Early commit, no bug id
+- 283dc84d81cfc47fb15cd664fce4b0151d070ea6 - Early commit, no bug id
+- 02198373a591a15b804127acddd32582ec985b7e CLDR-15852 v42 merge commit
+
+
+### The following are items to skip for a certain CLDR version
+### Format: `# SKIP v00` followed by a list of commits to skip for that version
+
+# SKIP v41
+
+- 56ca5d5 CLDR-14877 split ticket
+
+
index 643ede3da09e29b768cc26cac8b08bdd7b2a4377..104f7cfab83f905e5b93aaf45c64d77853ce59c6 100644 (file)
@@ -9,11 +9,14 @@ import os
 import re
 import sys
 import datetime
+from hashlib import sha256
+import pickle
 
 from enum import Enum
 from collections import namedtuple
 from git import Repo
 from jira import JIRA
+from commit_metadata import CommitMetadata
 
 # singleCount = 0
 
@@ -110,6 +113,11 @@ flag_parser.add_argument(
     help = "Hostname of the Jira instance",
     default = "unicode-org.atlassian.net"
 )
+flag_parser.add_argument(
+    "--commit-metadata",
+    help = "path to COMMIT_METADATA.md",
+    default = "COMMIT_METADATA.md"
+)
 flag_parser.add_argument(
     "--jira-username",
     help = "Username to use for authenticating to Jira",
@@ -125,6 +133,16 @@ flag_parser.add_argument(
     help = "JQL query load tickets; this should match tickets expected to correspond to the commits being checked. Example: 'project=ICU and fixVersion=63.1'; set fixVersion to the upcoming version.",
     required = True
 )
+flag_parser.add_argument(
+    "--fix-version",
+    help = "Assumed Fix version. Used for commit metadata",
+    required = False
+)
+flag_parser.add_argument(
+    "--cache-for-dev",
+    help = "DEV USE ONLY: prefix of path to a jira cache",
+    required = False
+)
 flag_parser.add_argument(
     "--github-url",
     help = "Base URL of the GitHub repo",
@@ -171,19 +189,26 @@ def pretty_print_issue(issue, type=None, **kwargs):
     if issue.issue.fields.components and len(issue.issue.fields.components) > 0:
         print("\t- Component(s): " + (' '.join(sorted([str(component.name) for component in issue.issue.fields.components]))))
 
-def get_commits(repo_root, rev_range, **kwargs):
+def get_commits(commit_metadata_obj, repo_root, rev_range, **kwargs):
     """
     Yields an ICUCommit for each commit in the user-specified rev-range.
     """
     repo = Repo(repo_root)
     for commit in repo.iter_commits(rev_range):
-        match = re.search(r"^(\w+-\d+) ", commit.message)
-        if match:
-            issue_id = match.group(1)
-            # print("@@@ %s = %s / %s" % (issue_id, commit, commit.summary), file=sys.stderr)
-            yield ICUCommit(issue_id, commit)
-        else:
-            yield ICUCommit(None, commit)
+        issue_id = None
+        if commit_metadata_obj:
+            # read from the non-skip list
+            from_commit_metadata = commit_metadata_obj.get_commit_info(commit.hexsha, skip=None)
+            if from_commit_metadata:
+                (sha, issue_id, message) = from_commit_metadata
+                if message:
+                    # Update message if found
+                    commit.message = commit.message + "\nCOMMIT_METADATA: " + message
+        if not issue_id:
+            match = re.search(r"^(\w+-\d+) ", commit.message)
+            if match:
+                issue_id = match.group(1)
+        yield ICUCommit(issue_id, commit)
 
 def get_cherrypicked_commits(repo_root, rev_range, **kwargs):
     """
@@ -255,6 +280,40 @@ def get_jira_issues(jira_query, **kwargs):
         start += batch_size
 
 jira_issue_map = dict() # loaded in main()
+commit_metadata = None
+fixversion_to_skip = None
+
+def get_issue_cache_path(args):
+    return '%s-%s.pickle' % (args.cache_for_dev, sha256(args.jira_query.encode()).hexdigest())
+
+def load_issues(args):
+    global jira_issue_ids, jira_issue_map, closed_jira_issue_ids
+
+    if args.cache_for_dev:
+        issue_cache_path = get_issue_cache_path(args)
+        if os.path.exists(issue_cache_path):
+            with open(issue_cache_path, 'rb') as f:
+                (issues, jira_issue_ids, closed_jira_issue_ids, jira_issue_map) = pickle.load(f)
+                print("Loaded jira cache to " + issue_cache_path, file=sys.stderr)
+                return issues
+    # proceed to load
+    issues = list(get_jira_issues(**vars(args)))
+    # add all queried issues to the cache
+    for issue in issues:
+        jira_issue_map[issue.issue_id] = issue
+    # only the issue ids in-query
+    jira_issue_ids = set(issue.issue_id for issue in issues)
+    # only the closed issue ids in-query
+    closed_jira_issue_ids = set(issue.issue_id for issue in issues if issue.is_closed)
+
+    return issues
+
+def save_issues_to_cache(args, issues):
+    if args.cache_for_dev:
+        issue_cache_path = get_issue_cache_path(args)
+        with open(issue_cache_path, 'wb') as f:
+            pickle.dump((issues, jira_issue_ids, closed_jira_issue_ids, jira_issue_map), f)
+            print("Wrote cache to " + issue_cache_path, file=sys.stderr)
 
 def get_single_jira_issue(issue_id, **kwargs):
     """
@@ -293,6 +352,7 @@ def print_sectionheader(section):
     #print("### %s%s" % (aname(section), section))
 
 def main():
+    global fixversion_to_skip
     args = flag_parser.parse_args()
     print("TIP: Have you pulled the latest main? This script only looks at local commits.", file=sys.stderr)
     if not args.jira_username or not args.jira_password:
@@ -301,30 +361,40 @@ def main():
     else:
         authenticated = True
 
+    if args.fix_version:
+        fixversion_to_skip = 'v%s' % args.fix_version
+        args.fixversion_to_skip = fixversion_to_skip
+
+    if args.commit_metadata and os.path.exists(args.commit_metadata):
+        commit_metadata = CommitMetadata(args.commit_metadata)
+    else:
+        commit_metadata = CommitMetadata()  # null
+    args.commit_metadata_obj = commit_metadata
+
     # exclude these, already merged to old maint
-    excludeAlreadyMergedToOldMaint = get_cherrypicked_commits(**vars(args))
+    exclude_already_merged_to_old_maint = get_cherrypicked_commits(**vars(args))
 
     commits = list(get_commits(**vars(args)))
-    issues = list(get_jira_issues(**vars(args)))
+
+    def exclude_commit_sha(sha) -> bool:
+        """Return true if this sha is excluded"""
+        exclude = (sha in exclude_already_merged_to_old_maint) or (commit_metadata.get_commit_info(sha, skip=fixversion_to_skip) is not None)
+        return exclude
 
     # commit_issue_ids is all commits in the git query. Excluding cherry exclusions.
-    commit_issue_ids = set(commit.issue_id for commit in commits if commit.issue_id is not None and commit.commit.hexsha not in excludeAlreadyMergedToOldMaint)
+    commit_issue_ids = set(commit.issue_id for commit in commits if commit.issue_id is not None and not exclude_commit_sha(commit.commit.hexsha))
     # which issues have commits that were excluded
-    excluded_commit_issue_ids = set(commit.issue_id for commit in commits if commit.issue_id is not None and commit.commit.hexsha in excludeAlreadyMergedToOldMaint)
+    excluded_commit_issue_ids = set(commit.issue_id for commit in commits if commit.issue_id is not None and exclude_commit_sha(commit.commit.hexsha))
+    excluded_commits = set(commit for commit in commits if exclude_commit_sha(commit.commit.hexsha))
 
     # grouped_commits is all commits and issue_ids in the git query, regardless of issue status
     # but NOT including cherry exclusions
     grouped_commits = [
-        (issue_id, [commit for commit in commits if commit.issue_id == issue_id and commit.commit.hexsha not in excludeAlreadyMergedToOldMaint])
+        (issue_id, [commit for commit in commits if commit.issue_id == issue_id and not exclude_commit_sha(commit.commit.hexsha)])
         for issue_id in sorted(commit_issue_ids)
     ]
-    # add all queried issues to the cache
-    for issue in issues:
-        jira_issue_map[issue.issue_id] = issue
-    # only the issue ids in-query
-    jira_issue_ids = set(issue.issue_id for issue in issues)
-    # only the closed issue ids in-query
-    closed_jira_issue_ids = set(issue.issue_id for issue in issues if issue.is_closed)
+
+    issues = load_issues(args)
 
     # keep track of issues that we already said have no commit.
     no_commit_ids = set()
@@ -376,7 +446,7 @@ def main():
         no_commit_ids.add(issue.issue_id)
         pretty_print_issue(issue, type=CLOSED_NO_COMMIT, **vars(args))
         if issue.issue_id in excluded_commit_issue_ids:
-            print("\t - **Note: Has cherry-picked commits. Fix Version may be wrong.**")
+            print("\t - **Note: Has excluded/cherry-picked commits. Fix Version may be wrong.**")
         print()
     if not found:
         print("*Success: No problems in this category!*")
@@ -451,8 +521,6 @@ def main():
         print("##### Commits with Issue %s" % issue_id)
         print()
         for commit in commits:
-            if(commit.commit.hexsha in excludeAlreadyMergedToOldMaint):
-                print("@@@ ALREADY MERGED")
             pretty_print_commit(commit, **vars(args))
             print()
     if not found:
@@ -542,5 +610,26 @@ def main():
     print("## Total Problems: %s" % total_problems)
     print("## Issues under review: %s" % len(issues_in_review)) # not counted as a problem.
 
+    if fixversion_to_skip:
+        print("## fix version (for COMMIT_METADATA SKIP) %s" % fixversion_to_skip)
+
+    if len(excluded_commits) > 0:
+        print()
+        print("## Excluded commits: %d" % len(excluded_commits))
+        for commit in excluded_commits:
+            pretty_print_commit(commit, **vars(args))
+            from_commit_metadata = commit_metadata.get_commit_info(commit.commit.hexsha, skip=fixversion_to_skip)
+            if from_commit_metadata:
+                (sha, id, message) = from_commit_metadata
+                # Print any notes in the skip list here.
+                if id:
+                    print("\t- COMMIT_METADATA.md: %s" % id)
+                if message:
+                    print("\t- COMMIT_METADATA.md: %s" % message)
+            if commit.commit.hexsha in exclude_already_merged_to_old_maint:
+                print("\t- NOTE: excluded due to already being merged to old maint")
+    # save out the dev cache here, so that any 1-off issues are also included in the cache
+    save_issues_to_cache(args, issues)
+
 if __name__ == "__main__":
     main()
diff --git a/tools/commit-checker/commit_metadata.py b/tools/commit-checker/commit_metadata.py
new file mode 100644 (file)
index 0000000..62c49c6
--- /dev/null
@@ -0,0 +1,83 @@
+# Copyright (C) 2022 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+# Author: srloomis@unicode.org
+
+from re import compile
+from typing import Tuple
+import typing
+
+commit_line = compile('^\s*-\s+([0-9a-f]+)\s+([^\s]+)\s+(.*)$') # '- <sha> <hypen-or-ticketid> <message>'
+verb_line = compile('^\s*#\s+([A-Z]+)\s+(.*)$') # '# ACTION any other content…'
+
+class CommitMetadata():
+    def __init__(self, metadata_file=None) -> None:
+        skipset = {}
+        fixset = []  # sha, id, message
+        current_skip = None
+        if metadata_file:
+            with open(metadata_file, 'r') as f:
+                for line in f.readlines():
+                    line = line.strip()
+                    m = commit_line.match(line)
+                    if m:
+                        sha = m.group(1)
+                        id = m.group(2)
+                        message = m.group(3)
+                        fixset.append((sha, id, message))
+                        if current_skip:
+                            skipset[current_skip].append((sha, id, message))
+                        continue
+                    m = verb_line.match(line)
+                    if m:
+                        action = m.group(1)
+                        content = m.group(2)
+                        # for now we only support SKIP
+                        assert action == "SKIP", "Unknown action %s in %s (expected 'SKIP')" % (action, line)
+                        current_skip = content
+                        assert content not in skipset, "Error: two sections like %s" % line
+                        skipset[content] = []  # initialize this
+                        continue
+        self.fixset = fixset
+        self.skipset = skipset
+        pass
+
+    def get_commit_info(self, commit, skip=None) -> typing.Tuple[str, str, str]:
+        """Return an override line given a commit
+
+        Args:
+            commit (str): hash or short hash of a commit
+            skip (str): If set, a version to skip such as 'v41' (this will match a section - SKIP v41)
+
+        Returns:
+            sha (str): original sha from line
+            id (str): ticket id or hash
+            message (str): override message
+        """
+        if skip:
+            if not skip in self.skipset:
+                return None
+            return CommitMetadata.match_list(commit, self.skipset[skip])
+        else:
+            return CommitMetadata.match_list(commit, self.fixset)
+
+    @staticmethod
+    def match_list(commit, l) -> any:
+        """Find the first match in a list of commits
+
+        Args:
+            commit (str): short or long commit string
+            l (list): list of tuples, where item 0 is the commit
+
+        Returns:
+            any: matching list member, or None
+        """
+        for i in l:
+            if CommitMetadata.match_commit(commit, i[0]):
+                return i
+        return None
+
+    @staticmethod
+    def match_commit(h1, h2) -> bool:
+        """return true if the prefix of the hashes are the same"""
+        comm = min(len(h1), len(h2))
+        return h1[0:comm] == h2[0:comm]
diff --git a/tools/commit-checker/test_commit_metadata.py b/tools/commit-checker/test_commit_metadata.py
new file mode 100644 (file)
index 0000000..536286e
--- /dev/null
@@ -0,0 +1,65 @@
+# Copyright (C) 2022 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+# Author: srloomis@unicode.org
+
+from commit_metadata import CommitMetadata
+
+def test_hash():
+    assert CommitMetadata.match_commit("512a679aba", "512a679aba22e144a4443df5f8f304e4f8b39054")
+    assert CommitMetadata.match_commit("512a679aba", "512a679aba")
+    assert CommitMetadata.match_commit("512a679aba22e144a4443df5f8f304e4f8b39054", "512a679aba22e144a4443df5f8f304e4f8b39054")
+    assert not CommitMetadata.match_commit("512a679aba", "16aae80199")
+    assert not CommitMetadata.match_commit("512a679aba22e144a4443df5f8f304e4f8b39054", "16aae80199")
+
+def test_matchcommit():
+    assert CommitMetadata.match_list("512a679aba", [("00decafbad", 44), ("512a679aba", 99)]) == ("512a679aba", 99)
+    assert CommitMetadata.match_list("512a679aba22e144a4443df5f8f304e4f8b39054", [("00decafbad", 44), ("512a679aba", 99)]) == ("512a679aba", 99)
+    assert CommitMetadata.match_list("512a679aba", [("00decafbad", 44), ("512a679aba22e144a4443df5f8f304e4f8b39054", 99)]) == ("512a679aba22e144a4443df5f8f304e4f8b39054", 99)
+    assert not CommitMetadata.match_list("16aae80199", [("00decafbad", 44), ("512a679aba", 99)])
+    assert not CommitMetadata.match_list("16aae80199", [("00decafbad", 44), ("512a679aba22e144a4443df5f8f304e4f8b39054", 99)])
+
+def test_read():
+    m = CommitMetadata(metadata_file="./TEST_COMMIT_METADATA.md")
+    assert m
+
+    assert not m.get_commit_info('00000000')  # not in list
+    assert m.get_commit_info('00decafbad')
+    assert m.get_commit_info('00decafbad')[1].startswith('CLDR-0000')
+    assert m.get_commit_info('56ca5d5')
+    assert m.get_commit_info('56ca5d5')[1].startswith('CLDR-14877')
+    # short or long, same
+    assert m.get_commit_info('56ca5d5') == m.get_commit_info('56ca5d563cf57990a7598f570cb9be51956cb9de')
+
+    # skip list
+    assert m.get_commit_info('56ca5d5', skip='v41')
+    assert not m.get_commit_info('56ca5d5', skip='v42')
+    assert m.get_commit_info('56ca5d563cf57990a7598f570cb9be51956cb9de', skip='v41')
+    assert not m.get_commit_info('56ca5d563cf57990a7598f570cb9be51956cb9de', skip='v42')
+    assert not m.get_commit_info('00decafbad', 'v41')
+
+def test_null_read():
+    m = CommitMetadata(metadata_file=None)
+    assert m
+
+    # function with no info
+    assert not m.get_commit_info('00000000')  # not in list
+    assert not m.get_commit_info('00decafbad')
+    assert not m.get_commit_info('56ca5d5')
+    assert not m.get_commit_info('56ca5d5', skip='v41')
+    assert not m.get_commit_info('56ca5d5', skip='v42')
+
+def test_parse_42():
+    m = CommitMetadata(metadata_file="./TEST_COMMIT_METADATA.md")
+    assert m
+
+    # no skip
+    info = m.get_commit_info('02198373a591a15b804127acddd32582ec985b7e')
+    assert info
+    assert info[0] == '02198373a591a15b804127acddd32582ec985b7e'
+    assert info[1] == 'CLDR-15852'
+    assert info[2] == 'v42 merge commit'
+
+    # skip
+    info = m.get_commit_info('02198373a591a15b804127acddd32582ec985b7e', skip='v42')
+    # not found because it isn't in SKIP v42
+    assert not info