Skip to content

Commit

Permalink
AsciiDoc writer: improve escaping.
Browse files Browse the repository at this point in the history
Closes #10385.
Closes #2337.
Closes #6424.
  • Loading branch information
jgm committed Dec 29, 2024
1 parent 469075a commit a14d843
Show file tree
Hide file tree
Showing 9 changed files with 137 additions and 87 deletions.
94 changes: 58 additions & 36 deletions src/Text/Pandoc/Writers/AsciiDoc.hs
Original file line number Diff line number Diff line change
Expand Up @@ -118,27 +118,46 @@ pandocToAsciiDoc opts (Pandoc meta blocks) = do
Nothing -> main
Just tpl -> renderTemplate tpl context

data EscState = Normal | AfterPlus
deriving (Show)
data EscContext = Normal | InTable
deriving (Show, Eq)

-- | Escape special characters for AsciiDoc.
escapeString :: PandocMonad m => Text -> ADW m (Doc Text)
escapeString t = do
parentTableLevel <- gets tableNestingLevel
if T.any (needsEscape parentTableLevel) t
then pure $ literal $ snd $ T.foldl' (go parentTableLevel) (Normal, mempty) t
else pure $ literal t
escapeString :: EscContext -> Text -> Doc Text
escapeString context t
| T.any needsEscape t
= literal $
case T.foldl' go (False, mempty) t of
(True, x) -> x <> "++" -- close passthrough context
(False, x) -> x
| otherwise = literal t
where
go :: Int -> (EscState, Text) -> Char -> (EscState, Text)
go _ (st, x) '{' = (st, x <> "\\{")
go ptl (st, x) '|' | ptl > 0 = (st, x <> "\\|")
go _ (AfterPlus, x) '+' = (Normal, x <> "{plus}") -- #10385
go _ (Normal, x) '+' = (AfterPlus, T.snoc x '+')
go _ (st, x) c = (st, T.snoc x c)
needsEscape _ '{' = True
needsEscape _ '+' = True
needsEscape ptl '|' = ptl > 0
needsEscape _ _ = False
-- Bool is True when we are in a ++ passthrough context
go :: (Bool, Text) -> Char -> (Bool, Text)
go (True, x) '+' = (False, x <> "++" <> "{plus}") -- close context
go (False, x) '+' = (False, x <> "{plus}")
go (True, x) '|'
| context == InTable = (False, x <> "++" <> "{vbar}") -- close context
go (False, x) '|'
| context == InTable = (False, x <> "{vbar}")
go (True, x) c
| needsEscape c = (True, T.snoc x c)
| otherwise = (False, T.snoc (x <> "++") c)
go (False, x) c
| needsEscape c = (True, x <> "++" <> T.singleton c)
| otherwise = (False, T.snoc x c)

needsEscape '{' = True
needsEscape '+' = True
needsEscape '`' = True
needsEscape '*' = True
needsEscape '_' = True
needsEscape '<' = True
needsEscape '>' = True
needsEscape '[' = True
needsEscape ']' = True
needsEscape '\\' = True
needsEscape '|' = True
needsEscape _ = False

-- | Ordered list start parser for use in Para below.
olMarker :: Parsec Text ParserState Char
Expand Down Expand Up @@ -401,11 +420,11 @@ bulletListItemToAsciiDoc opts blocks = do
-- | Convert a list item containing text starting with @U+2610 BALLOT BOX@
-- or @U+2612 BALLOT BOX WITH X@ to asciidoctor checkbox syntax (e.g. @[x]@).
taskListItemToAsciiDoc :: [Block] -> [Block]
taskListItemToAsciiDoc = handleTaskListItem toOrg listExt
taskListItemToAsciiDoc = handleTaskListItem toAd listExt
where
toOrg (Str "" : Space : is) = Str "[ ]" : Space : is
toOrg (Str "" : Space : is) = Str "[x]" : Space : is
toOrg is = is
toAd (Str "" : Space : is) = RawInline (Format "asciidoc") "[ ]" : Space : is
toAd (Str "" : Space : is) = RawInline (Format "asciidoc") "[x]" : Space : is
toAd is = is
listExt = extensionsFromList [Ext_task_lists]

addBlock :: PandocMonad m
Expand Down Expand Up @@ -551,24 +570,27 @@ inlineToAsciiDoc opts (Subscript lst) = do
inlineToAsciiDoc opts (SmallCaps lst) = inlineListToAsciiDoc opts lst
inlineToAsciiDoc opts (Quoted qt lst) = do
isLegacy <- gets legacy
inlineListToAsciiDoc opts $
case qt of
SingleQuote
| isLegacy -> [Str "`"] ++ lst ++ [Str "'"]
| otherwise -> [Str "'`"] ++ lst ++ [Str "`'"]
DoubleQuote
| isLegacy -> [Str "``"] ++ lst ++ [Str "''"]
| otherwise -> [Str "\"`"] ++ lst ++ [Str "`\""]
contents <- inlineListToAsciiDoc opts lst
pure $ case qt of
SingleQuote
| isLegacy -> "`" <> contents <> "'"
| otherwise -> "'`" <> contents <> "`'"
DoubleQuote
| isLegacy -> "``" <> contents <> "''"
| otherwise -> "\"`" <> contents <> "`\""
inlineToAsciiDoc _ (Code _ str) = do
isLegacy <- gets legacy
let escChar '`' = "\\'"
escChar c = T.singleton c
let contents = literal (T.concatMap escChar str)
return $
if isLegacy
then text "`" <> contents <> "`"
else text "`+" <> contents <> "+`"
inlineToAsciiDoc _ (Str str) = escapeString str
parentTableLevel <- gets tableNestingLevel
let content
| isLegacy = literal (T.concatMap escChar str)
| otherwise = escapeString
(if parentTableLevel > 0 then InTable else Normal) str
return $ text "`" <> content <> "`"
inlineToAsciiDoc _ (Str str) = do
parentTableLevel <- gets tableNestingLevel
pure $ escapeString (if parentTableLevel > 0 then InTable else Normal) str
inlineToAsciiDoc _ (Math InlineMath str) = do
isLegacy <- gets legacy
modify $ \st -> st{ hasMath = True }
Expand Down
4 changes: 2 additions & 2 deletions test/Tests/Writers/AsciiDoc.hs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ tests = [ testGroup "emphasis"
"__foo__bar"
, testAsciidoc "emph quoted" $
para (doubleQuoted (emph (text "foo"))) =?>
"``__foo__''"
"``_foo_''"
, testAsciidoc "strong word before" $
para (text "foo" <> strong (text "bar")) =?>
"foo**bar**"
Expand All @@ -45,7 +45,7 @@ tests = [ testGroup "emphasis"
"**foo**bar"
, testAsciidoc "strong quoted" $
para (singleQuoted (strong (text "foo"))) =?>
"`**foo**'"
"`*foo*'"
]
, testGroup "blocks"
[ testAsciidoc "code block without line numbers" $
Expand Down
24 changes: 20 additions & 4 deletions test/command/10385.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,22 @@
```
````
% pandoc -t asciidoc
C+ C++ C+++
^D
C+ C+{plus} C+{plus}+
C+ C++ C+++ `++`
`` ` `` \`hi\`
`hi\there`` ok`
```
++`
```
^D
C{plus} C{plus}{plus} C{plus}{plus}{plus} `{plus}{plus}`
`++`++` ++`++hi++`++
`hi++\++there++``++ ok`
....
++`
....
````
6 changes: 6 additions & 0 deletions test/command/2337.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
```
% pandoc -t asciidoc -f html
<a href="http://example.com">][</a>
^D
http://example.com[++][++]
```
2 changes: 1 addition & 1 deletion test/command/4545.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Test 2
^D
Test 1
{empty}[my text]
++[++my text++]++
Test 2
```
Expand Down
6 changes: 6 additions & 0 deletions test/command/6424.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
```
% pandoc -t asciidoc
test^[this is a note\]. and more]
^D
testfootnote:[this is a note++]++. and more]
```
2 changes: 1 addition & 1 deletion test/command/8665.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@
[cols=",",options="header",]
|===
|h1 |h2
|!@#$%^&*()\{}\|~?+-',."<>[]\` |col 2
|!@#$%^&++*++()++{++}{vbar}~?{plus}-',."++<>[]\`++ |col 2
|===
```
48 changes: 24 additions & 24 deletions test/writer.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ In Markdown 1.0.0 and earlier. Version 8. This line turns into a list item.
Because a hard-wrapped line in the middle of a paragraph looked like a list
item.

Here’s one with a bullet. * criminey.
Here’s one with a bullet. ++*++ criminey.

There should be a hard line break +
here.
Expand Down Expand Up @@ -84,7 +84,7 @@ ____
--
____

This should not be a block quote: 2 > 1.
This should not be a block quote: 2 ++>++ 1.

And a following paragraph.

Expand Down Expand Up @@ -424,7 +424,7 @@ So is *_this_* word.

So is *_this_* word.

This is code: `+>+`, `+$+`, `+\+`, `+\$+`, `+<html>+`.
This is code: `++>++`, `$`, `++\++`, `++\++$`, `++<++html++>++`.

[line-through]#This is _strikeout_.#

Expand All @@ -447,7 +447,7 @@ a^b c^d, a~b c~d.

'`He said, "`I want to go.`"`' Were you alive in the 70’s?

Here is some quoted '``+code+``' and a "`http://example.com/?foo=1&bar=2[quoted
Here is some quoted '``code``' and a "`http://example.com/?foo=1&bar=2[quoted
link]`".

Some dashes: one—two — three—four — five.
Expand Down Expand Up @@ -477,11 +477,11 @@ latexmath:[\alpha + \omega \times x^2].

These shouldn’t be math:

* To get the famous equation, write `+$e = mc^2$+`.
* To get the famous equation, write `$e = mc^2$`.
* $22,000 is a _lot_ of money. So is $34,000. (It worked if "`lot`" is
emphasized.)
* Shoes ($20) and socks ($5).
* Escaped `+$+`: $73 _this should be emphasized_ 23$.
* Escaped `$`: $73 _this should be emphasized_ 23$.

Here’s a LaTeX table:

Expand All @@ -503,39 +503,39 @@ AT&T is another way to write it.

This & that.

4 < 5.
4 ++<++ 5.

6 > 5.
6 ++>++ 5.

Backslash: \
Backslash: ++\++

Backtick: `
Backtick: ++`++

Asterisk: *
Asterisk: ++*++

Underscore: _
Underscore: ++_++

Left brace: \{
Left brace: ++{++

Right brace: }

Left bracket: [
Left bracket: ++[++

Right bracket: ]
Right bracket: ++]++

Left paren: (

Right paren: )

Greater-than: >
Greater-than: ++>++

Hash: #

Period: .

Bang: !

Plus: +
Plus: {plus}

Minus: -

Expand All @@ -557,7 +557,7 @@ link:/url/[URL and title]

link:/url/[URL and title]

link:/url/with_underscore[with_underscore]
link:/url/with_underscore[with++_++underscore]

mailto:nobody@nowhere.net[Email link]

Expand All @@ -567,7 +567,7 @@ link:[Empty].

Foo link:/url/[bar].

With link:/url/[embedded [brackets]].
With link:/url/[embedded ++[++brackets++]++].

link:/url/[b] by itself should be a link.

Expand All @@ -577,7 +577,7 @@ Indented link:/url[twice].

Indented link:/url[thrice].

This should [not][] be a link.
This should ++[++not++][]++ be a link.

....
[not]: /url
Expand Down Expand Up @@ -611,7 +611,7 @@ ____
Blockquoted: http://example.com/
____

Auto-links should not occur here: `+<http://example.com/>+`
Auto-links should not occur here: `++<++http://example.com/++>++`

....
or here: <http://example.com/>
Expand All @@ -635,10 +635,10 @@ Here is a movie image:movie.jpg[movie] icon.
Here is a footnote reference,footnote:[Here is the footnote. It can go anywhere
after the footnote reference. It need not be placed at the end of the document.]
and another.[multiblock footnote omitted] This should _not_ be a footnote
reference, because it contains a space.[^my note] Here is an inline
reference, because it contains a space.++[++^my note++]++ Here is an inline
note.footnote:[This is _easier_ to type. Inline notes may contain
http://google.com[links] and `+]+` verbatim characters, as well as [bracketed
text].]
http://google.com[links] and `++]++` verbatim characters, as well as
++[++bracketed text++]++.]

____
Notes can go in quotes.footnote:[In quote.]
Expand Down
Loading

0 comments on commit a14d843

Please sign in to comment.