diff --git a/README.md b/README.md
index 06859e1..67891b0 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@ go get github.com/JohannesKaufmann/html-to-markdown
## Usage
```go
-import "github.com/JohannesKaufmann/html-to-markdown"
+import md "github.com/JohannesKaufmann/html-to-markdown"
converter := md.NewConverter("", true, nil)
diff --git a/commonmark.go b/commonmark.go
index 8c4a2d1..70a3f3e 100644
--- a/commonmark.go
+++ b/commonmark.go
@@ -16,7 +16,7 @@ import (
var multipleSpacesR = regexp.MustCompile(` +`)
var commonmark = []Rule{
- Rule{
+ {
Filter: []string{"ul", "ol"},
Replacement: func(content string, selec *goquery.Selection, opt *Options) *string {
parent := selec.Parent()
@@ -42,7 +42,7 @@ var commonmark = []Rule{
return &content
},
},
- Rule{
+ {
Filter: []string{"li"},
Replacement: func(content string, selec *goquery.Selection, opt *Options) *string {
if strings.TrimSpace(content) == "" {
@@ -68,7 +68,7 @@ var commonmark = []Rule{
return String(prefix + content + "\n")
},
},
- Rule{
+ {
Filter: []string{"#text"},
Replacement: func(content string, selec *goquery.Selection, opt *Options) *string {
text := selec.Text()
@@ -85,7 +85,7 @@ var commonmark = []Rule{
return &text
},
},
- Rule{
+ {
Filter: []string{"p", "div"},
Replacement: func(content string, selec *goquery.Selection, opt *Options) *string {
parent := goquery.NodeName(selec.Parent())
@@ -101,7 +101,7 @@ var commonmark = []Rule{
return &content
},
},
- Rule{
+ {
Filter: []string{"h1", "h2", "h3", "h4", "h5", "h6"},
Replacement: func(content string, selec *goquery.Selection, opt *Options) *string {
if strings.TrimSpace(content) == "" {
@@ -141,7 +141,7 @@ var commonmark = []Rule{
return &text
},
},
- Rule{
+ {
Filter: []string{"strong", "b"},
Replacement: func(content string, selec *goquery.Selection, opt *Options) *string {
// only use one bold tag if they are nested
@@ -162,7 +162,7 @@ var commonmark = []Rule{
return &trimmed
},
},
- Rule{
+ {
Filter: []string{"i", "em"},
Replacement: func(content string, selec *goquery.Selection, opt *Options) *string {
// only use one italic tag if they are nested
@@ -183,7 +183,7 @@ var commonmark = []Rule{
return &trimmed
},
},
- Rule{
+ {
Filter: []string{"img"},
Replacement: func(content string, selec *goquery.Selection, opt *Options) *string {
alt := selec.AttrOr("alt", "")
@@ -209,7 +209,7 @@ var commonmark = []Rule{
return &text
},
},
- Rule{
+ {
Filter: []string{"a"},
AdvancedReplacement: func(content string, selec *goquery.Selection, opt *Options) (AdvancedResult, bool) {
// if there is no href, no link is used. So just return the content inside the link
@@ -270,7 +270,7 @@ var commonmark = []Rule{
return AdvancedResult{Markdown: replacement, Footer: reference}, false
},
},
- Rule{
+ {
Filter: []string{"code"},
Replacement: func(_ string, selec *goquery.Selection, opt *Options) *string {
content := selec.Text()
@@ -280,7 +280,7 @@ var commonmark = []Rule{
return &text
},
},
- Rule{
+ {
Filter: []string{"pre"},
Replacement: func(content string, selec *goquery.Selection, opt *Options) *string {
codeElement := selec.Find("code")
@@ -301,20 +301,20 @@ var commonmark = []Rule{
return &text
},
},
- Rule{
+ {
Filter: []string{"hr"},
Replacement: func(content string, selec *goquery.Selection, opt *Options) *string {
text := "\n\n" + opt.HorizontalRule + "\n\n"
return &text
},
},
- Rule{
+ {
Filter: []string{"br"},
Replacement: func(content string, selec *goquery.Selection, opt *Options) *string {
return String("\n\n")
},
},
- Rule{
+ {
Filter: []string{"blockquote"},
Replacement: func(content string, selec *goquery.Selection, opt *Options) *string {
content = strings.TrimSpace(content)
@@ -331,7 +331,7 @@ var commonmark = []Rule{
return &text
},
},
- Rule{
+ {
Filter: []string{"noscript"},
Replacement: func(content string, selec *goquery.Selection, opt *Options) *string {
// for now remove the contents of noscript. But in the future we could
diff --git a/examples/add_rules/main.go b/examples/add_rules/main.go
index 8f63447..9078e78 100644
--- a/examples/add_rules/main.go
+++ b/examples/add_rules/main.go
@@ -10,8 +10,8 @@ import (
)
func main() {
- html := `Good sountrack and cake.`
- // -> `Good sountrack ~and cake~.`
+ html := `Good soundtrack and cake.`
+ // -> `Good soundtrack ~~and cake~~.`
/*
We want to add a rule when a `span` tag has a class of `bb_strike`.
@@ -31,7 +31,7 @@ func main() {
// Because of the space it is not recognized as strikethrough.
// -> trim spaces at begin&end of string when inside strong/italic/...
content = strings.TrimSpace(content)
- return md.String("~" + content + "~")
+ return md.String("~~" + content + "~~")
},
}
diff --git a/from.go b/from.go
index 31c7a30..556f11a 100644
--- a/from.go
+++ b/from.go
@@ -237,7 +237,7 @@ type Plugin func(conv *Converter) []Rule
func (c *Converter) Use(plugins ...Plugin) *Converter {
for _, plugin := range plugins {
rules := plugin(c)
- c.AddRules(rules...) // TODO: for better perfomance only use one lock for all plugins
+ c.AddRules(rules...) // TODO: for better performance only use one lock for all plugins
}
return c
}
diff --git a/markdown.go b/markdown.go
index 8c1789d..c2498f3 100644
--- a/markdown.go
+++ b/markdown.go
@@ -65,6 +65,9 @@ var inlineElements = []string{ // -> https://developer.mozilla.org/de/docs/Web/H
"button", "input", "label", "select", "textarea",
}
+// IsInlineElement can be used to check wether a node name (goquery.Nodename) is
+// an html inline element and not a block element. Used in the rule for the
+// p tag to check wether the text is inside a block element.
func IsInlineElement(e string) bool {
for _, element := range inlineElements {
if element == e {
@@ -132,6 +135,8 @@ type Options struct {
domain string
}
+// AdvancedResult is used for example for links. If you use LinkStyle:referenced
+// the link href is placed at the bottom of the generated markdown (Footer).
type AdvancedResult struct {
Header string
Markdown string
diff --git a/plugin/confluence_code_block.go b/plugin/confluence_code_block.go
index d0d9103..78daea3 100644
--- a/plugin/confluence_code_block.go
+++ b/plugin/confluence_code_block.go
@@ -14,7 +14,7 @@ func ConfluenceCodeBlock() md.Plugin {
return func(c *md.Converter) []md.Rule {
character := "```"
return []md.Rule{
- md.Rule{
+ {
Filter: []string{"ac:structured-macro"},
Replacement: func(content string, selec *goquery.Selection, opt *md.Options) *string {
for _, node := range selec.Nodes {
diff --git a/plugin/table.go b/plugin/table.go
index a743394..467629e 100644
--- a/plugin/table.go
+++ b/plugin/table.go
@@ -10,13 +10,13 @@ import (
// EXPERIMENTAL_Table converts a html table to markdown.
var EXPERIMENTAL_Table = []md.Rule{
- md.Rule{ // TableCell
+ { // TableCell
Filter: []string{"th", "td"},
Replacement: func(content string, selec *goquery.Selection, opt *md.Options) *string {
return md.String(cell(content, selec))
},
},
- md.Rule{ // TableRow
+ { // TableRow
Filter: []string{"tr"},
Replacement: func(content string, selec *goquery.Selection, opt *md.Options) *string {
borderCells := ""
diff --git a/plugin/task_list.go b/plugin/task_list.go
index 990960c..c06617f 100644
--- a/plugin/task_list.go
+++ b/plugin/task_list.go
@@ -9,7 +9,7 @@ import (
func TaskListItems() md.Plugin {
return func(c *md.Converter) []md.Rule {
return []md.Rule{
- md.Rule{
+ {
Filter: []string{"input"},
Replacement: func(content string, selec *goquery.Selection, opt *md.Options) *string {
if !selec.Parent().Is("li") {
diff --git a/plugin/vimeo.go b/plugin/vimeo.go
index d84fd19..f916ae4 100644
--- a/plugin/vimeo.go
+++ b/plugin/vimeo.go
@@ -13,6 +13,7 @@ import (
"github.com/PuerkitoBio/goquery"
)
+// Timeout for the http client
var Timeout = time.Second * 10
var netClient = &http.Client{
Timeout: Timeout,
@@ -46,12 +47,15 @@ var vimeoID = regexp.MustCompile(`video\/(\d*)`)
type vimeoVariation int
+// Configure how the Vimeo Plugin should display the video in markdown.
const (
VimeoOnlyThumbnail vimeoVariation = iota
VimeoWithTitle
VimeoWithDescription
)
+// EXPERIMENTAL_VimeoEmbed registers a rule (for iframes) and
+// returns a markdown compatible representation (link to video, ...).
func EXPERIMENTAL_VimeoEmbed(variation vimeoVariation) md.Plugin {
return func(c *md.Converter) []md.Rule {
getVimeoData := func(id string) (*vimeoVideo, error) {
@@ -89,7 +93,7 @@ func EXPERIMENTAL_VimeoEmbed(variation vimeoVariation) md.Plugin {
}
return []md.Rule{
- md.Rule{
+ {
Filter: []string{"iframe"},
Replacement: func(content string, selec *goquery.Selection, opt *md.Options) *string {
src := selec.AttrOr("src", "")
diff --git a/plugin/youtube.go b/plugin/youtube.go
index 35cf218..d8c4ae9 100644
--- a/plugin/youtube.go
+++ b/plugin/youtube.go
@@ -11,8 +11,10 @@ import (
var youtubeID = regexp.MustCompile(`youtube\.com\/embed\/([^\&\?\/]+)`)
+// EXPERIMENTAL_YoutubeEmbed registers a rule (for iframes) and
+// returns a markdown compatible representation (link to video, ...).
var EXPERIMENTAL_YoutubeEmbed = []md.Rule{
- md.Rule{
+ {
Filter: []string{"iframe"},
Replacement: func(content string, selec *goquery.Selection, opt *md.Options) *string {
src := selec.AttrOr("src", "")
diff --git a/utils.go b/utils.go
index 857c114..3b551d1 100644
--- a/utils.go
+++ b/utils.go
@@ -11,6 +11,12 @@ import (
"golang.org/x/net/html"
)
+/*
+WARNING: The functions from this file can be used externally
+but there is no garanty that they will stay exported.
+*/
+
+// CollectText returns the text of the node and all its children
func CollectText(n *html.Node) string {
text := &bytes.Buffer{}
collectText(n, text)
@@ -26,7 +32,8 @@ func collectText(n *html.Node, buf *bytes.Buffer) {
}
}
-// always have a space to the side to recognize the delimiter
+// AddSpaceIfNessesary adds spaces to the text based on the neighbors.
+// That makes sure that there is always a space to the side, to recognize the delimiter.
func AddSpaceIfNessesary(selec *goquery.Selection, text string) string {
var prev string
@@ -92,6 +99,8 @@ func AddSpaceIfNessesary(selec *goquery.Selection, text string) string {
return text
}
+// TrimpLeadingSpaces removes spaces from the beginning of a line
+// but makes sure that list items and code blocks are not affected.
func TrimpLeadingSpaces(text string) string {
parts := strings.Split(text, "\n")
for i := range parts {
@@ -128,6 +137,7 @@ func TrimpLeadingSpaces(text string) string {
return strings.Join(parts, "\n")
}
+// TrimTrailingSpaces removes unnecessary spaces from the end of lines.
func TrimTrailingSpaces(text string) string {
parts := strings.Split(text, "\n")
for i := range parts {
@@ -143,6 +153,7 @@ func TrimTrailingSpaces(text string) string {
// The same as `multipleNewLinesRegex`, but applies to escaped new lines inside a link `\n\`
var multipleNewLinesInLinkRegex = regexp.MustCompile(`(\n\\){1,}`) // `([\n\r\s]\\)`
+// EscapeMultiLine deals with multiline content inside a link
func EscapeMultiLine(content string) string {
content = strings.TrimSpace(content)
content = strings.Replace(content, "\n", `\`+"\n", -1)
@@ -152,7 +163,7 @@ func EscapeMultiLine(content string) string {
return content
}
-// Cal can be passed the content of a code block and it returns
+// CalculateCodeFence can be passed the content of a code block and it returns
// how many fence characters (` or ~) should be used.
//
// This is useful if the html content includes the same fence characters