Skip to content

Commit

Permalink
feat(parser): Update parser to 2.0.0 (#100)
Browse files Browse the repository at this point in the history
Fixes: #99
  • Loading branch information
zkat authored Dec 15, 2024
1 parent f31750a commit a1919a0
Show file tree
Hide file tree
Showing 111 changed files with 223 additions and 53 deletions.
162 changes: 118 additions & 44 deletions src/v2_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -882,9 +882,8 @@ fn node_space1(input: &mut Input<'_>) -> PResult<()> {
repeat(1.., node_space).parse_next(input)
}

/// `string := identifier-string | quoted-string | raw-string`
/// string := identifier-string | quoted-string | raw-string
pub(crate) fn string(input: &mut Input<'_>) -> PResult<Option<KdlValue>> {
// TODO: shouldn't put the `resume_after_cut`s here, because they mess with context from higher levels.
trace(
"string",
alt((
Expand Down Expand Up @@ -1018,32 +1017,65 @@ fn equals_sign(input: &mut Input<'_>) -> PResult<()> {
}

/// ```text
/// quoted-string := '"' single-line-string-body '"' | '"""' newline multi-line-string-body newline unicode-space*) '"""'
/// quoted-string := '"' single-line-string-body '"' | '"""' newline multi-line-string-body newline (unicode-space | ('\' (unicode-space | newline)+)*) '"""'
/// single-line-string-body := (string-character - newline)*
/// multi-line-string-body := string-character*
/// multi-line-string-body := (('"' | '""')? string-character)*
/// ```
fn quoted_string<'s>(input: &mut Input<'s>) -> PResult<KdlValue> {
let quotes = alt((("\"\"\"", newline).take(), "\"")).parse_next(input)?;
fn quoted_string(input: &mut Input<'_>) -> PResult<KdlValue> {
let quotes =
alt((
(
"\"\"\"",
cut_err(newline).context(cx().lbl("multi-line string newline").msg(
"Multi-line string opening quotes must be immediately followed by a newline",
)),
)
.take(),
"\"",
))
.parse_next(input)?;
let is_multiline = quotes.len() > 1;
let ml_prefix: Option<String> = if is_multiline {
Some(
peek(preceded(
cut_err(peek(preceded(
repeat_till(
0..,
(
repeat(0.., (not(newline), opt(ws_escape), string_char)).map(|()| ()),
repeat(
0..,
(
not(newline),
alt((
ws_escape.void(),
trace(
"valid string body char(s)",
alt((
('\"', not("\"\"")).void(),
('\"', not("\"")).void(),
string_char.void(),
)),
)
.void(),
)),
),
)
.map(|()| ()),
newline,
),
peek(terminated(
repeat(0.., unicode_space).map(|()| ()),
repeat(0.., alt((ws_escape, unicode_space))).map(|()| ()),
"\"\"\"",
)),
)
.map(|((), ())| ()),
terminated(repeat(0.., unicode_space).map(|()| ()).take(), "\"\"\""),
))
.parse_next(input)?
.to_string(),
terminated(
repeat(0.., alt((ws_escape.map(|_| ""), unicode_space.take())))
.map(|s: String| s),
"\"\"\"",
),
)))
.context(cx().lbl("multi-line string"))
.parse_next(input)?,
)
} else {
None
Expand All @@ -1052,30 +1084,40 @@ fn quoted_string<'s>(input: &mut Input<'s>) -> PResult<KdlValue> {
let parser = repeat_till(
0..,
(
cut_err(alt((&prefix[..], peek(newline).take())))
cut_err(alt(((&prefix[..]).void(), peek(empty_line).void())))
.context(cx().msg("matching multiline string prefix").lbl("bad prefix").hlp("Multi-line string bodies must be prefixed by the exact same whitespace as the leading whitespace before the closing '\"\"\"'")),
alt((
newline.take().map(|_| "\n".to_string()),
empty_line.map(|s| s.to_string()),
repeat_till(
0..,
(not(newline), opt(ws_escape), string_char).map(|(_, _, s)| s),
(
not(newline),
alt((
ws_escape.map(|_| None),
alt((
('\"', not("\"\"")).map(|(c, ())| Some(c)),
('\"', not("\"")).map(|(c, ())| Some(c)),
string_char.map(Some),
))
))
).map(|(_, c)| c),
newline,
)
// multiline string literal newlines are normalized to `\n`
.map(|(s, _): (String, _)| format!("{s}\n")),
.map(|(cs, _): (Vec<Option<char>>, _)| cs.into_iter().flatten().chain(vec!['\n']).collect::<String>()),
)),
)
.map(|(_, s)| s),
(
&prefix[..],
repeat(0.., unicode_space).map(|()| ()).take(),
repeat(0.., ws_escape.void()).map(|()| ()),
peek("\"\"\""),
),
)
.map(|(s, _): (Vec<String>, (_, _, _))| {
let mut s = s.join("");
// Slice off the `\n` at the end of the last line.
s.truncate(s.len() - 1);
s.truncate(s.len().saturating_sub(1));
s
})
.context(cx().lbl("multi-line quoted string"));
Expand All @@ -1090,13 +1132,14 @@ fn quoted_string<'s>(input: &mut Input<'s>) -> PResult<KdlValue> {
.hlp("You can make a string multi-line by wrapping it in '\"\"\"', with a newline immediately after the opening quotes."),
),
),
opt(ws_escape),
string_char,
)
.map(|(_, _, s)| s),
(repeat(0.., unicode_space).map(|()| ()).take(), peek("\"")),
alt((
ws_escape.map(|_| None),
string_char.map(Some),
))
).map(|(_, c)| c),
peek("\"")
)
.map(|(s, (end, _)): (String, (&'s str, _))| format!("{s}{end}"))
.map(|(cs, _): (Vec<Option<char>>, _)| cs.into_iter().flatten().collect::<String>())
.context(cx().lbl("quoted string"));
cut_err(parser).parse_next(input)?
};
Expand All @@ -1112,8 +1155,19 @@ fn quoted_string<'s>(input: &mut Input<'s>) -> PResult<KdlValue> {
Ok(KdlValue::String(body))
}

fn empty_line(input: &mut Input<'_>) -> PResult<&'static str> {
repeat(0.., alt((ws_escape.void(), unicode_space.void())))
.map(|()| ())
.parse_next(input)?;
newline.parse_next(input)?;
Ok("\n")
}

/// Like badval, but is able to slurp up invalid raw strings, which contain whitespace.
fn quoted_string_badval(input: &mut Input<'_>) -> PResult<()> {
// TODO(@zkat): this should have different behavior based on whether we're
// resuming a single or multi-line string. Right now, multi-liners end up
// with silly errors.
(
repeat_till(
0..,
Expand All @@ -1135,19 +1189,25 @@ fn quoted_string_terminator(input: &mut Input<'_>) -> PResult<()> {
/// ```
fn string_char(input: &mut Input<'_>) -> PResult<char> {
alt((
escaped_char,
(not(disallowed_unicode), none_of(['\\', '"'])).map(|(_, c)| c),
trace("escaped char", escaped_char),
trace(
"regular string char",
(not(disallowed_unicode), none_of(['\\', '"'])).map(|(_, c)| c),
),
))
.parse_next(input)
}

fn ws_escape(input: &mut Input<'_>) -> PResult<()> {
(
"\\",
repeat(1.., alt((unicode_space, newline))).map(|()| ()),
trace(
"ws_escape",
(
"\\",
repeat(1.., alt((unicode_space, newline))).map(|()| ()),
),
)
.void()
.parse_next(input)
.void()
.parse_next(input)
}

/// ```text
Expand Down Expand Up @@ -1182,10 +1242,13 @@ fn escaped_char(input: &mut Input<'_>) -> PResult<char> {
.parse_next(input)
}

/// `raw-string := '#' raw-string-quotes '#' | '#' raw-string '#'`
/// `raw-string-quotes := '"' single-line-raw-string-body '"' | '"""' newline multi-line-raw-string-body newline unicode-space*) '"""'`
/// `single-line-raw-string-body := (unicode - newline - disallowed-literal-code-points)*`
/// `multi-line-raw-string-body := (unicode - disallowed-literal-code-points)`
/// ```text
/// raw-string := '#' raw-string-quotes '#' | '#' raw-string '#'
/// raw-string-quotes := '"' single-line-raw-string-body '"' | '"""' newline multi-line-raw-string-body '"""'
/// single-line-raw-string-body := '' | (single-line-raw-string-char - '"') single-line-raw-string-char*? | '"' (single-line-raw-string-char - '"') single-line-raw-string-char*?
/// single-line-raw-string-char := unicode - newline - disallowed-literal-code-points
/// multi-line-raw-string-body := (unicode - disallowed-literal-code-points)*?
/// ```
fn raw_string(input: &mut Input<'_>) -> PResult<KdlValue> {
let hashes: String = repeat(1.., "#").parse_next(input)?;
let quotes = alt((("\"\"\"", newline).take(), "\"")).parse_next(input)?;
Expand Down Expand Up @@ -1229,10 +1292,10 @@ fn raw_string(input: &mut Input<'_>) -> PResult<KdlValue> {
repeat_till(
0..,
(
cut_err(alt((&prefix[..], peek(newline).take())))
cut_err(alt(((&prefix[..]).void(), peek(empty_line).void())))
.context(cx().lbl("matching multiline raw string prefix")),
alt((
newline.take().map(|_| "\n".to_string()),
empty_line.map(|s| s.to_string()),
repeat_till(
0..,
(not(newline), not(("\"\"\"", &hashes[..])), any)
Expand All @@ -1254,7 +1317,7 @@ fn raw_string(input: &mut Input<'_>) -> PResult<KdlValue> {
.map(|(s, _): (Vec<String>, (_, _, _))| {
let mut s = s.join("");
// Slice off the `\n` at the end of the last line.
s.truncate(s.len() - 1);
s.truncate(s.len().saturating_sub(1));
s
})
.parse_next(input)?
Expand Down Expand Up @@ -1311,7 +1374,7 @@ mod string_tests {
}

#[test]
fn quoted_string() {
fn single_line_quoted_string() {
assert_eq!(
string.parse(new_input("\"foo\"")).unwrap(),
Some(KdlValue::String("foo".into()))
Expand Down Expand Up @@ -1363,6 +1426,14 @@ mod string_tests {
Some(KdlValue::String("\nstring\t".into())),
"Empty line without any indentation"
);
assert_eq!(
string
.parse(new_input("\"\"\"\n   \\\n   \n   \"\"\""))
.unwrap(),
Some(KdlValue::String("".into())),
"Escaped whitespace with proper prefix"
);

assert!(string
.parse(new_input("\"\"\"\nfoo\n bar\n baz\n \"\"\""))
.is_err());
Expand Down Expand Up @@ -1491,9 +1562,9 @@ fn disallowed_unicode(input: &mut Input<'_>) -> PResult<()> {
/// `escline := '\\' ws* (single-line-comment | newline | eof)`
fn escline(input: &mut Input<'_>) -> PResult<()> {
"\\".parse_next(input)?;
repeat(0.., ws).map(|_: ()| ()).parse_next(input)?;
wss.parse_next(input)?;
alt((single_line_comment, newline, eof.void())).parse_next(input)?;
repeat(0.., ws).map(|_: ()| ()).parse_next(input)
wss.parse_next(input)
}

#[cfg(test)]
Expand Down Expand Up @@ -1596,9 +1667,12 @@ fn multi_line_comment_test() {
.is_ok());
}

/// slashdash := '/-' line-space*
/// slashdash := '/-' (node-space | line-space)*
fn slashdash(input: &mut Input<'_>) -> PResult<()> {
("/-", repeat(0.., line_space).map(|()| ()))
(
"/-",
repeat(0.., alt((node_space, line_space))).map(|()| ()),
)
.void()
.parse_next(input)
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
node "\"\"\"triple-quote\"\"\"\n##\"too few quotes\"##\n#\"\"\"too few #\"\"\"#"
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
node "this string contains \"quotes\", twice\"\""
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
node "foo bar\nbaz"
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
node " foo bar\n baz"
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
node "" "" "" "\n\n " "\n"
1 change: 0 additions & 1 deletion tests/test_cases/expected_kdl/raw_string_just_quote.kdl

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
node arg2
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
node arg1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
node2
1 change: 1 addition & 0 deletions tests/test_cases/expected_kdl/slashdash_false_node.kdl
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
node foo bar
Original file line number Diff line number Diff line change
@@ -1 +1 @@
foo123~!@$%^&*.:'|?+<>, weeee
foo123~!@$%^&*.:'|?+<>,`-_ weeee
2 changes: 1 addition & 1 deletion tests/test_cases/expected_kdl/unusual_chars_in_bare_id.kdl
Original file line number Diff line number Diff line change
@@ -1 +1 @@
foo123~!@$%^&*.:'|?+<>, weeee
foo123~!@$%^&*.:'|?+<>,`-_ weeee
1 change: 0 additions & 1 deletion tests/test_cases/expected_kdl/zero_arg.kdl

This file was deleted.

File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion tests/test_cases/input/eof_after_escape.kdl
Original file line number Diff line number Diff line change
@@ -1 +1 @@
node \
node \
File renamed without changes.
File renamed without changes.
1 change: 1 addition & 0 deletions tests/test_cases/input/hex.kdl
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
node 0xabcdef1234567890
File renamed without changes.
1 change: 1 addition & 0 deletions tests/test_cases/input/legacy_raw_string_fail.kdl
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
node r"foo"
1 change: 1 addition & 0 deletions tests/test_cases/input/legacy_raw_string_hash_fail.kdl
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
node r#"foo"#
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
node ##"""
"""triple-quote"""
##"too few quotes"##
#"""too few #"""#
"""##
3 changes: 3 additions & 0 deletions tests/test_cases/input/multiline_string_containing_quotes.kdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
node """
this string contains "quotes", twice""
"""
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
node """
foo \
bar
baz
\ """
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
node """
foo \
bar
baz
\ """
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
node """
foo
bar\
"""
19 changes: 19 additions & 0 deletions tests/test_cases/input/multiline_string_whitespace_only.kdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// This file deliberately contains unusual whitespace
// The first three strings are empty, because whitespace-only lines collapse to
// just `\n`.
node """
  """ """
   \

   """ """

 """\
\ // The next two strings contains only whitespace
"""


   \s
   """ #"""


"""#
Loading

0 comments on commit a1919a0

Please sign in to comment.