import re
import sys
import datetime
+from hashlib import sha256
+import pickle
from enum import Enum
from collections import namedtuple
from git import Repo
from jira import JIRA
+from commit_metadata import CommitMetadata
# singleCount = 0
help = "Hostname of the Jira instance",
default = "unicode-org.atlassian.net"
)
+flag_parser.add_argument(
+ "--commit-metadata",
+ help = "path to COMMIT_METADATA.md",
+ default = "COMMIT_METADATA.md"
+)
flag_parser.add_argument(
"--jira-username",
help = "Username to use for authenticating to Jira",
help = "JQL query load tickets; this should match tickets expected to correspond to the commits being checked. Example: 'project=ICU and fixVersion=63.1'; set fixVersion to the upcoming version.",
required = True
)
+flag_parser.add_argument(
+ "--fix-version",
+ help = "Assumed Fix version. Used for commit metadata",
+ required = False
+)
+flag_parser.add_argument(
+ "--cache-for-dev",
+ help = "DEV USE ONLY: prefix of path to a jira cache",
+ required = False
+)
flag_parser.add_argument(
"--github-url",
help = "Base URL of the GitHub repo",
if issue.issue.fields.components and len(issue.issue.fields.components) > 0:
print("\t- Component(s): " + (' '.join(sorted([str(component.name) for component in issue.issue.fields.components]))))
-def get_commits(repo_root, rev_range, **kwargs):
+def get_commits(commit_metadata_obj, repo_root, rev_range, **kwargs):
"""
Yields an ICUCommit for each commit in the user-specified rev-range.
"""
repo = Repo(repo_root)
for commit in repo.iter_commits(rev_range):
- match = re.search(r"^(\w+-\d+) ", commit.message)
- if match:
- issue_id = match.group(1)
- # print("@@@ %s = %s / %s" % (issue_id, commit, commit.summary), file=sys.stderr)
- yield ICUCommit(issue_id, commit)
- else:
- yield ICUCommit(None, commit)
+ issue_id = None
+ if commit_metadata_obj:
+ # read from the non-skip list
+ from_commit_metadata = commit_metadata_obj.get_commit_info(commit.hexsha, skip=None)
+ if from_commit_metadata:
+ (sha, issue_id, message) = from_commit_metadata
+ if message:
+ # Update message if found
+ commit.message = commit.message + "\nCOMMIT_METADATA: " + message
+ if not issue_id:
+ match = re.search(r"^(\w+-\d+) ", commit.message)
+ if match:
+ issue_id = match.group(1)
+ yield ICUCommit(issue_id, commit)
def get_cherrypicked_commits(repo_root, rev_range, **kwargs):
"""
start += batch_size
jira_issue_map = dict() # loaded in main()
+commit_metadata = None
+fixversion_to_skip = None
+
+def get_issue_cache_path(args):
+ return '%s-%s.pickle' % (args.cache_for_dev, sha256(args.jira_query.encode()).hexdigest())
+
+def load_issues(args):
+ global jira_issue_ids, jira_issue_map, closed_jira_issue_ids
+
+ if args.cache_for_dev:
+ issue_cache_path = get_issue_cache_path(args)
+ if os.path.exists(issue_cache_path):
+ with open(issue_cache_path, 'rb') as f:
+ (issues, jira_issue_ids, closed_jira_issue_ids, jira_issue_map) = pickle.load(f)
+ print("Loaded jira cache to " + issue_cache_path, file=sys.stderr)
+ return issues
+ # proceed to load
+ issues = list(get_jira_issues(**vars(args)))
+ # add all queried issues to the cache
+ for issue in issues:
+ jira_issue_map[issue.issue_id] = issue
+ # only the issue ids in-query
+ jira_issue_ids = set(issue.issue_id for issue in issues)
+ # only the closed issue ids in-query
+ closed_jira_issue_ids = set(issue.issue_id for issue in issues if issue.is_closed)
+
+ return issues
+
+def save_issues_to_cache(args, issues):
+ if args.cache_for_dev:
+ issue_cache_path = get_issue_cache_path(args)
+ with open(issue_cache_path, 'wb') as f:
+ pickle.dump((issues, jira_issue_ids, closed_jira_issue_ids, jira_issue_map), f)
+ print("Wrote cache to " + issue_cache_path, file=sys.stderr)
def get_single_jira_issue(issue_id, **kwargs):
"""
#print("### %s%s" % (aname(section), section))
def main():
+ global fixversion_to_skip
args = flag_parser.parse_args()
print("TIP: Have you pulled the latest main? This script only looks at local commits.", file=sys.stderr)
if not args.jira_username or not args.jira_password:
else:
authenticated = True
+ if args.fix_version:
+ fixversion_to_skip = 'v%s' % args.fix_version
+ args.fixversion_to_skip = fixversion_to_skip
+
+ if args.commit_metadata and os.path.exists(args.commit_metadata):
+ commit_metadata = CommitMetadata(args.commit_metadata)
+ else:
+ commit_metadata = CommitMetadata() # null
+ args.commit_metadata_obj = commit_metadata
+
# exclude these, already merged to old maint
- excludeAlreadyMergedToOldMaint = get_cherrypicked_commits(**vars(args))
+ exclude_already_merged_to_old_maint = get_cherrypicked_commits(**vars(args))
commits = list(get_commits(**vars(args)))
- issues = list(get_jira_issues(**vars(args)))
+
+ def exclude_commit_sha(sha) -> bool:
+ """Return true if this sha is excluded"""
+ exclude = (sha in exclude_already_merged_to_old_maint) or (commit_metadata.get_commit_info(sha, skip=fixversion_to_skip) is not None)
+ return exclude
# commit_issue_ids is all commits in the git query. Excluding cherry exclusions.
- commit_issue_ids = set(commit.issue_id for commit in commits if commit.issue_id is not None and commit.commit.hexsha not in excludeAlreadyMergedToOldMaint)
+ commit_issue_ids = set(commit.issue_id for commit in commits if commit.issue_id is not None and not exclude_commit_sha(commit.commit.hexsha))
# which issues have commits that were excluded
- excluded_commit_issue_ids = set(commit.issue_id for commit in commits if commit.issue_id is not None and commit.commit.hexsha in excludeAlreadyMergedToOldMaint)
+ excluded_commit_issue_ids = set(commit.issue_id for commit in commits if commit.issue_id is not None and exclude_commit_sha(commit.commit.hexsha))
+ excluded_commits = set(commit for commit in commits if exclude_commit_sha(commit.commit.hexsha))
# grouped_commits is all commits and issue_ids in the git query, regardless of issue status
# but NOT including cherry exclusions
grouped_commits = [
- (issue_id, [commit for commit in commits if commit.issue_id == issue_id and commit.commit.hexsha not in excludeAlreadyMergedToOldMaint])
+ (issue_id, [commit for commit in commits if commit.issue_id == issue_id and not exclude_commit_sha(commit.commit.hexsha)])
for issue_id in sorted(commit_issue_ids)
]
- # add all queried issues to the cache
- for issue in issues:
- jira_issue_map[issue.issue_id] = issue
- # only the issue ids in-query
- jira_issue_ids = set(issue.issue_id for issue in issues)
- # only the closed issue ids in-query
- closed_jira_issue_ids = set(issue.issue_id for issue in issues if issue.is_closed)
+
+ issues = load_issues(args)
# keep track of issues that we already said have no commit.
no_commit_ids = set()
no_commit_ids.add(issue.issue_id)
pretty_print_issue(issue, type=CLOSED_NO_COMMIT, **vars(args))
if issue.issue_id in excluded_commit_issue_ids:
- print("\t - **Note: Has cherry-picked commits. Fix Version may be wrong.**")
+ print("\t - **Note: Has excluded/cherry-picked commits. Fix Version may be wrong.**")
print()
if not found:
print("*Success: No problems in this category!*")
print("##### Commits with Issue %s" % issue_id)
print()
for commit in commits:
- if(commit.commit.hexsha in excludeAlreadyMergedToOldMaint):
- print("@@@ ALREADY MERGED")
pretty_print_commit(commit, **vars(args))
print()
if not found:
print("## Total Problems: %s" % total_problems)
print("## Issues under review: %s" % len(issues_in_review)) # not counted as a problem.
+ if fixversion_to_skip:
+ print("## fix version (for COMMIT_METADATA SKIP) %s" % fixversion_to_skip)
+
+ if len(excluded_commits) > 0:
+ print()
+ print("## Excluded commits: %d" % len(excluded_commits))
+ for commit in excluded_commits:
+ pretty_print_commit(commit, **vars(args))
+ from_commit_metadata = commit_metadata.get_commit_info(commit.commit.hexsha, skip=fixversion_to_skip)
+ if from_commit_metadata:
+ (sha, id, message) = from_commit_metadata
+ # Print any notes in the skip list here.
+ if id:
+ print("\t- COMMIT_METADATA.md: %s" % id)
+ if message:
+ print("\t- COMMIT_METADATA.md: %s" % message)
+ if commit.commit.hexsha in exclude_already_merged_to_old_maint:
+ print("\t- NOTE: excluded due to already being merged to old maint")
+ # save out the dev cache here, so that any 1-off issues are also included in the cache
+ save_issues_to_cache(args, issues)
+
if __name__ == "__main__":
main()
--- /dev/null
+# Copyright (C) 2022 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+# Author: srloomis@unicode.org
+
+from re import compile
+from typing import Tuple
+import typing
+
+commit_line = compile('^\s*-\s+([0-9a-f]+)\s+([^\s]+)\s+(.*)$') # '- <sha> <hypen-or-ticketid> <message>'
+verb_line = compile('^\s*#\s+([A-Z]+)\s+(.*)$') # '# ACTION any other content…'
+
+class CommitMetadata():
+ def __init__(self, metadata_file=None) -> None:
+ skipset = {}
+ fixset = [] # sha, id, message
+ current_skip = None
+ if metadata_file:
+ with open(metadata_file, 'r') as f:
+ for line in f.readlines():
+ line = line.strip()
+ m = commit_line.match(line)
+ if m:
+ sha = m.group(1)
+ id = m.group(2)
+ message = m.group(3)
+ fixset.append((sha, id, message))
+ if current_skip:
+ skipset[current_skip].append((sha, id, message))
+ continue
+ m = verb_line.match(line)
+ if m:
+ action = m.group(1)
+ content = m.group(2)
+ # for now we only support SKIP
+ assert action == "SKIP", "Unknown action %s in %s (expected 'SKIP')" % (action, line)
+ current_skip = content
+ assert content not in skipset, "Error: two sections like %s" % line
+ skipset[content] = [] # initialize this
+ continue
+ self.fixset = fixset
+ self.skipset = skipset
+ pass
+
+ def get_commit_info(self, commit, skip=None) -> typing.Tuple[str, str, str]:
+ """Return an override line given a commit
+
+ Args:
+ commit (str): hash or short hash of a commit
+ skip (str): If set, a version to skip such as 'v41' (this will match a section - SKIP v41)
+
+ Returns:
+ sha (str): original sha from line
+ id (str): ticket id or hash
+ message (str): override message
+ """
+ if skip:
+ if not skip in self.skipset:
+ return None
+ return CommitMetadata.match_list(commit, self.skipset[skip])
+ else:
+ return CommitMetadata.match_list(commit, self.fixset)
+
+ @staticmethod
+ def match_list(commit, l) -> any:
+ """Find the first match in a list of commits
+
+ Args:
+ commit (str): short or long commit string
+ l (list): list of tuples, where item 0 is the commit
+
+ Returns:
+ any: matching list member, or None
+ """
+ for i in l:
+ if CommitMetadata.match_commit(commit, i[0]):
+ return i
+ return None
+
+ @staticmethod
+ def match_commit(h1, h2) -> bool:
+ """return true if the prefix of the hashes are the same"""
+ comm = min(len(h1), len(h2))
+ return h1[0:comm] == h2[0:comm]
--- /dev/null
+# Copyright (C) 2022 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+# Author: srloomis@unicode.org
+
+from commit_metadata import CommitMetadata
+
+def test_hash():
+ assert CommitMetadata.match_commit("512a679aba", "512a679aba22e144a4443df5f8f304e4f8b39054")
+ assert CommitMetadata.match_commit("512a679aba", "512a679aba")
+ assert CommitMetadata.match_commit("512a679aba22e144a4443df5f8f304e4f8b39054", "512a679aba22e144a4443df5f8f304e4f8b39054")
+ assert not CommitMetadata.match_commit("512a679aba", "16aae80199")
+ assert not CommitMetadata.match_commit("512a679aba22e144a4443df5f8f304e4f8b39054", "16aae80199")
+
+def test_matchcommit():
+ assert CommitMetadata.match_list("512a679aba", [("00decafbad", 44), ("512a679aba", 99)]) == ("512a679aba", 99)
+ assert CommitMetadata.match_list("512a679aba22e144a4443df5f8f304e4f8b39054", [("00decafbad", 44), ("512a679aba", 99)]) == ("512a679aba", 99)
+ assert CommitMetadata.match_list("512a679aba", [("00decafbad", 44), ("512a679aba22e144a4443df5f8f304e4f8b39054", 99)]) == ("512a679aba22e144a4443df5f8f304e4f8b39054", 99)
+ assert not CommitMetadata.match_list("16aae80199", [("00decafbad", 44), ("512a679aba", 99)])
+ assert not CommitMetadata.match_list("16aae80199", [("00decafbad", 44), ("512a679aba22e144a4443df5f8f304e4f8b39054", 99)])
+
+def test_read():
+ m = CommitMetadata(metadata_file="./TEST_COMMIT_METADATA.md")
+ assert m
+
+ assert not m.get_commit_info('00000000') # not in list
+ assert m.get_commit_info('00decafbad')
+ assert m.get_commit_info('00decafbad')[1].startswith('CLDR-0000')
+ assert m.get_commit_info('56ca5d5')
+ assert m.get_commit_info('56ca5d5')[1].startswith('CLDR-14877')
+ # short or long, same
+ assert m.get_commit_info('56ca5d5') == m.get_commit_info('56ca5d563cf57990a7598f570cb9be51956cb9de')
+
+ # skip list
+ assert m.get_commit_info('56ca5d5', skip='v41')
+ assert not m.get_commit_info('56ca5d5', skip='v42')
+ assert m.get_commit_info('56ca5d563cf57990a7598f570cb9be51956cb9de', skip='v41')
+ assert not m.get_commit_info('56ca5d563cf57990a7598f570cb9be51956cb9de', skip='v42')
+ assert not m.get_commit_info('00decafbad', 'v41')
+
+def test_null_read():
+ m = CommitMetadata(metadata_file=None)
+ assert m
+
+ # function with no info
+ assert not m.get_commit_info('00000000') # not in list
+ assert not m.get_commit_info('00decafbad')
+ assert not m.get_commit_info('56ca5d5')
+ assert not m.get_commit_info('56ca5d5', skip='v41')
+ assert not m.get_commit_info('56ca5d5', skip='v42')
+
+def test_parse_42():
+ m = CommitMetadata(metadata_file="./TEST_COMMIT_METADATA.md")
+ assert m
+
+ # no skip
+ info = m.get_commit_info('02198373a591a15b804127acddd32582ec985b7e')
+ assert info
+ assert info[0] == '02198373a591a15b804127acddd32582ec985b7e'
+ assert info[1] == 'CLDR-15852'
+ assert info[2] == 'v42 merge commit'
+
+ # skip
+ info = m.get_commit_info('02198373a591a15b804127acddd32582ec985b7e', skip='v42')
+ # not found because it isn't in SKIP v42
+ assert not info