forked from bangumi/server
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser.go
210 lines (173 loc) · 5.25 KB
/
parser.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
// SPDX-License-Identifier: AGPL-3.0-only
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published
// by the Free Software Foundation, version 3.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>
package wiki
import (
"errors"
"fmt"
"strings"
"github.com/bangumi/server/internal/pkg/generic/slice"
)
var (
ErrWikiSyntax = errors.New("invalid wiki syntax")
ErrGlobalPrefix = fmt.Errorf("%w: missing prefix '{{Infobox' at the start", ErrWikiSyntax)
ErrGlobalSuffix = fmt.Errorf("%w: missing '}}' at the end", ErrWikiSyntax)
ErrArrayNoClose = fmt.Errorf("%w: array should be closed by '}'", ErrWikiSyntax)
ErrArrayItemWrapped = fmt.Errorf("%w: array item should be wrapped by '[]'", ErrWikiSyntax)
ErrExpectingNewField = fmt.Errorf("%w: missing '|' to start a new field", ErrWikiSyntax)
ErrExpectingSignEqual = fmt.Errorf("%w: missing '=' to separate field name and value", ErrWikiSyntax)
)
// ParseOmitError try to parse a string as wiki, omitting error.
func ParseOmitError(s string) Wiki {
w, err := Parse(s)
if err != nil {
return Wiki{}
}
return w
}
const prefix = "{{Infobox"
const suffix = "}}"
//nolint:funlen,gocognit,gocyclo
func Parse(s string) (Wiki, error) {
var w = Wiki{}
s, lineOffset := processInput(s)
if s == "" {
return w, nil
}
if !strings.HasPrefix(s, prefix) {
return Wiki{}, ErrGlobalPrefix
}
eolCount := strings.Count(s, "\n")
if !strings.HasSuffix(s, suffix) {
return Wiki{}, ErrGlobalSuffix
}
w.Type = readType(s)
w.Fields = make([]Field, 0) // make zero value in json '[]', no alloc with cap 0
if eolCount <= 1 {
return w, nil
}
w.Fields = make([]Field, 0, eolCount-1)
// pre-alloc for all items.
var itemContainer = make([]Item, 0, eolCount-2)
// loop state
var inArray = false
var currentField Field
// variable to loop line
var firstEOL = strings.IndexByte(s, '\n') // skip first line
var secondLastEOL = 0
var lastEOL = firstEOL + 1
var lino = lineOffset - 1 // current line number
var offset int
var line string
for {
// fast iter lines without alloc
offset = strings.IndexByte(s[lastEOL:], '\n')
if offset != -1 {
line = s[lastEOL : lastEOL+offset]
secondLastEOL = lastEOL
lastEOL = lastEOL + offset + 1
lino++
} else {
// can't find next line
if inArray {
// array should be close have read all contents
return Wiki{}, wrapError(ErrArrayNoClose, lino+1, s[secondLastEOL:lastEOL])
}
break
}
// now handle line content
line = trimSpace(line)
if line == "" {
continue
}
if line[0] == '|' {
// new field
currentField = Field{}
if inArray {
return Wiki{}, wrapError(ErrArrayNoClose, lino, line)
}
key, value, err := readStartLine(trimLeftSpace(line[1:])) // read "key = value"
if err != nil {
return Wiki{}, wrapError(err, lino, line)
}
switch value {
case "":
w.Fields = append(w.Fields, Field{Key: key, Null: true})
continue
case "{":
inArray = true
currentField.Key = key
currentField.Array = true
continue
}
w.Fields = append(w.Fields, Field{Key: key, Value: value})
continue
}
if inArray {
if line == "}" { // close array
inArray = false
currentField.Values = slice.Clone(itemContainer)
itemContainer = itemContainer[:0]
w.Fields = append(w.Fields, currentField)
continue
}
// array item
key, value, err := readArrayItem(line)
if err != nil {
return Wiki{}, wrapError(err, lino, line)
}
itemContainer = append(itemContainer, Item{
Key: key,
Value: value,
})
}
if !inArray {
return Wiki{}, wrapError(ErrExpectingNewField, lino, line)
}
}
return w, nil
}
func readType(s string) string {
i := strings.IndexByte(s, '\n')
if i == -1 {
i = strings.IndexByte(s, '}') // {{Infobox Crt}}
}
return trimSpace(s[len(prefix):i])
}
// read whole line as an array item, spaces are trimmed.
//
// readArrayItem("[简体中文名|鲁鲁修]") => "简体中文名", "鲁鲁修", nil
// readArrayItem("[简体中文名|]") => "简体中文名", "", nil
// readArrayItem("[鲁鲁修]") => "", "鲁鲁修", nil
func readArrayItem(line string) (string, string, error) {
if line[0] != '[' || line[len(line)-1] != ']' {
return "", "", ErrArrayItemWrapped
}
content := line[1 : len(line)-1]
before, after, found := strings.Cut(content, "|")
if !found {
return "", trimSpace(content), nil
}
return trimSpace(before), trimSpace(after), nil
}
// read line without leading '|' as key value pair, spaces are trimmed.
//
// readStartLine("播放日期 = 2017年4月16日") => 播放日期, 2017年4月16日, nil
// readStartLine("播放日期 = ") => 播放日期, "", nil
func readStartLine(line string) (string, string, error) {
before, after, found := strings.Cut(line, "=")
if !found {
return "", "", ErrExpectingSignEqual
}
return trimRightSpace(before), trimLeftSpace(after), nil
}