-
Notifications
You must be signed in to change notification settings - Fork 3
/
rst_parser.py
145 lines (109 loc) · 4.7 KB
/
rst_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
"""A validator for the reStructuredText changelog format."""
import re
import docutils.nodes
import docutils.parsers.rst
import docutils.utils
__all__ = ['RstChangelog']
VERSION_PATTERN = re.compile(r'^v?[0-9|rc\.]+ \([\w\-]+\)')
BLOCK_PATTERN = re.compile(r'\[#[^#].+\]', flags=re.DOTALL)
ISSUE_PATTERN = re.compile(r'#[0-9]+')
def find_prs_in_entry(content):
issue_numbers = []
for block in BLOCK_PATTERN.finditer(content):
block_start, block_end = block.start(), block.end()
block = content[block_start:block_end]
for m in ISSUE_PATTERN.finditer(block):
start, end = m.start(), m.end()
issue_numbers.append(int(block[start:end][1:]))
return issue_numbers
class BulletItemVisitor(docutils.nodes.NodeVisitor):
def __init__(self, document, warn):
super().__init__(document)
self.warn = warn
self.reset_bullet_items()
def reset_bullet_items(self):
self.bullet_items = []
def dispatch_visit(self, node):
if isinstance(node, docutils.nodes.list_item):
self.bullet_items.append(node)
raise docutils.nodes.SkipChildren
elif isinstance(node, (docutils.nodes.title, docutils.nodes.system_message)):
raise docutils.nodes.SkipChildren
elif isinstance(node, (docutils.nodes.section, docutils.nodes.bullet_list)):
pass
else:
self.warn(f'Unexpected content: {node.astext()} ({str(type(node))})')
class RstChangelog:
def __init__(self):
self.warnings = []
def warn(self, message):
print('WARNING:', message)
self.warnings.append(message)
def _validate_version(self, string):
if VERSION_PATTERN.match(string) is None:
self.warn(f"Invalid version string: {string}")
return
return string.split()[0]
def _parse_observer(self, data):
if data['level'] > 1:
self.warn(data.children[0].astext())
return data
def parse_file(self, filename):
with open(filename) as f:
text = f.read()
return self.parse_string(text)
def parse_string(self, text):
# Parse as rst
parser = docutils.parsers.rst.Parser()
if hasattr(docutils.frontend, 'get_default_settings'):
# docutils >= 0.18
settings = docutils.frontend.get_default_settings(docutils.parsers.rst.Parser)
else: # pragma: no cover
# docutils < 0.18
settings = docutils.frontend.OptionParser(components=(docutils.parsers.rst.Parser, )).get_default_values()
document = docutils.utils.new_document('<rst-doc>', settings=settings)
document.reporter.stream = None
document.reporter.attach_observer(self._parse_observer)
parser.parse(text, document)
# At the top level, the document should just include sections whose name is
# a version number followed by a release date.
visitor = BulletItemVisitor(document, self.warn)
# Some changelogs include a title, which we can just ignore
if len(document.children) == 1:
title = document[0].attributes['names'][0]
if self._validate_version(title) is None:
document = document.children[0].children[1:]
self._issues_by_version = {}
for section in document:
title = section.attributes['names'][0]
version = self._validate_version(title)
# Inside each version section there may either directly be a list of
# entries, or more nested sections. We use a visitor class to search
# for all the bullet point entries, and in the process we make sure
# that there are only section titles and bullet point entries in the
# section
visitor.reset_bullet_items()
section.walk(visitor)
# Go over the bullet point items and find the PR numbers
issues = []
for item in visitor.bullet_items:
issues.extend(find_prs_in_entry(item.astext()))
self._issues_by_version[version] = issues
self.document = document
@property
def versions(self):
return sorted(self._issues_by_version)
@property
def issues(self):
all_issues = []
for version, issues in self._issues_by_version.items():
all_issues.extend(issues)
return sorted(set(all_issues))
def issues_for_version(self, version):
return self._issues_by_version[version]
def versions_for_issue(self, issue):
versions = []
for version, issues in self._issues_by_version.items():
if issue in issues:
versions.append(version)
return versions