From d5f4e55da1ace6c07686feabc3802dab034350b3 Mon Sep 17 00:00:00 2001 From: Johannes Kaufmann Date: Sun, 13 Jun 2021 17:51:37 +0200 Subject: [PATCH] fix html not included in pre code block --- commonmark.go | 12 +++--------- from.go | 1 + .../TestCommonmark/pre_code/goldmark.golden | 12 ++++++++++++ testdata/TestCommonmark/pre_code/input.html | 17 ++++++++++++++++- .../pre_code/output.fenced_backtick.golden | 16 ++++++++++++++++ .../pre_code/output.fenced_tilde.golden | 18 +++++++++++++++++- .../pre_code/output.indented.golden | 16 ++++++++++++++++ utils.go | 12 ++++++++++++ 8 files changed, 93 insertions(+), 11 deletions(-) diff --git a/commonmark.go b/commonmark.go index 3311d65..0a8f89b 100644 --- a/commonmark.go +++ b/commonmark.go @@ -11,7 +11,6 @@ import ( "github.com/JohannesKaufmann/html-to-markdown/escape" "github.com/PuerkitoBio/goquery" - "golang.org/x/net/html" ) var multipleSpacesR = regexp.MustCompile(` +`) @@ -268,12 +267,7 @@ var commonmark = []Rule{ { Filter: []string{"code"}, Replacement: func(_ string, selec *goquery.Selection, opt *Options) *string { - code, err := selec.Html() - if err != nil { - return nil - } - // We don't want the html encoded characters to be displayed as is. - code = html.UnescapeString(code) + code := getHTML(selec) // Newlines in the text aren't great, since this is inline code and not a code block. // Newlines will be stripped anyway in the browser, but it won't be recognized as code @@ -309,9 +303,9 @@ var commonmark = []Rule{ language := codeElement.AttrOr("class", "") language = strings.Replace(language, "language-", "", 1) - code := codeElement.Text() + code := getHTML(codeElement) if codeElement.Length() == 0 { - code = selec.Text() + code = getHTML(selec) } fenceChar, _ := utf8.DecodeRuneInString(opt.Fence) diff --git a/from.go b/from.go index ea8ea94..9243b8a 100644 --- a/from.go +++ b/from.go @@ -107,6 +107,7 @@ func NewConverter(domain string, enableCommonmark bool, options *Options) *Conve conv.before = append(conv.before, func(selec *goquery.Selection) { selec.Find("a[href]").Each(func(i int, s *goquery.Selection) { + // TODO: don't hardcode "data-index" and rename it to avoid accidental conflicts s.SetAttr("data-index", strconv.Itoa(i+1)) }) }) diff --git a/testdata/TestCommonmark/pre_code/goldmark.golden b/testdata/TestCommonmark/pre_code/goldmark.golden index 3d95377..5ffd2ac 100644 --- a/testdata/TestCommonmark/pre_code/goldmark.golden +++ b/testdata/TestCommonmark/pre_code/goldmark.golden @@ -29,3 +29,15 @@ totally ~~~~~~ normal ~ code +

+The <img> tag is used to embed an image.
+
+The <img/> tag is used to embed an image.
+
+
+

+<a href="#Blabla" data-index="1">
+    <img src="http://bla.bla/img/img.svg" style="height:auto" width="200px"/>
+</a>
+
+
diff --git a/testdata/TestCommonmark/pre_code/input.html b/testdata/TestCommonmark/pre_code/input.html index ed92e08..b83b208 100644 --- a/testdata/TestCommonmark/pre_code/input.html +++ b/testdata/TestCommonmark/pre_code/input.html @@ -83,4 +83,19 @@ Some ~~~ totally ~~~~~~ normal ~ code - \ No newline at end of file + + + + +

+The <img> tag is used to embed an image.
+
+The  tag is used to embed an image.
+
+ + +

+
+    
+
+
diff --git a/testdata/TestCommonmark/pre_code/output.fenced_backtick.golden b/testdata/TestCommonmark/pre_code/output.fenced_backtick.golden index 596ba57..44940b5 100644 --- a/testdata/TestCommonmark/pre_code/output.fenced_backtick.golden +++ b/testdata/TestCommonmark/pre_code/output.fenced_backtick.golden @@ -59,4 +59,20 @@ Some ~~~ totally ~~~~~~ normal ~ code +``` + +``` + +The tag is used to embed an image. + +The tag is used to embed an image. + +``` + +``` + + + + + ``` \ No newline at end of file diff --git a/testdata/TestCommonmark/pre_code/output.fenced_tilde.golden b/testdata/TestCommonmark/pre_code/output.fenced_tilde.golden index 5e9e0d9..6f53b77 100644 --- a/testdata/TestCommonmark/pre_code/output.fenced_tilde.golden +++ b/testdata/TestCommonmark/pre_code/output.fenced_tilde.golden @@ -59,4 +59,20 @@ Some ~~~ totally ~~~~~~ normal ~ code -~~~~~~~ \ No newline at end of file +~~~~~~~ + +~~~ + +The tag is used to embed an image. + +The tag is used to embed an image. + +~~~ + +~~~ + + + + + +~~~ \ No newline at end of file diff --git a/testdata/TestCommonmark/pre_code/output.indented.golden b/testdata/TestCommonmark/pre_code/output.indented.golden index 596ba57..44940b5 100644 --- a/testdata/TestCommonmark/pre_code/output.indented.golden +++ b/testdata/TestCommonmark/pre_code/output.indented.golden @@ -59,4 +59,20 @@ Some ~~~ totally ~~~~~~ normal ~ code +``` + +``` + +The tag is used to embed an image. + +The tag is used to embed an image. + +``` + +``` + + + + + ``` \ No newline at end of file diff --git a/utils.go b/utils.go index ddb04a5..d506a2a 100644 --- a/utils.go +++ b/utils.go @@ -259,3 +259,15 @@ func findMax(a []int) (max int) { } return max } + +// getHTML gets the HTML content and unescapes the encoded characters. +// Returns "" if there is an error. +func getHTML(selec *goquery.Selection) string { + content, err := selec.Html() + if err != nil { + return "" + } + + // We don't want the html encoded characters to be displayed as is. + return html.UnescapeString(content) +}