Skip to content

Commit

Permalink
docs: add documentation for domain and css selector
Browse files Browse the repository at this point in the history
  • Loading branch information
JohannesKaufmann committed Nov 17, 2024
1 parent 4d6b2ff commit b1a76ee
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 10 deletions.
34 changes: 34 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,34 @@ func main() {

- 🧑‍💻 [Example code, basics](/examples/basics/main.go)

Use `WithDomain` to convert _relative_ links to _absolute_ links:

```go
package main

import (
"fmt"
"log"

htmltomarkdown "github.com/JohannesKaufmann/html-to-markdown/v2"
"github.com/JohannesKaufmann/html-to-markdown/v2/converter"
)

func main() {
input := `<img src="/assets/image.png" />`

markdown, err := htmltomarkdown.ConvertString(
input,
converter.WithDomain("https://example.com"),
)
if err != nil {
log.Fatal(err)
}
fmt.Println(markdown)
// Output: ![](https://example.com/assets/image.png)
}
```

The function `htmltomarkdown.ConvertString()` is a _small wrapper_ around `converter.NewConverter()` and the _base_ and _commonmark_ plugins. If you want more control, use the following:

```go
Expand Down Expand Up @@ -217,6 +245,12 @@ This domain is for use in illustrative examples in documents. You may use this d
[More information...](https://www.iana.org/domains/example)
```

Use `--help` to learn about the configurations, for example:

- `--domain="https://example.com"` to convert _relative_ links to _absolute_ links.
- `--exclude-selector=".ad"` to exclude the html elements with `class="ad"` from the conversion.
- `--include-selector="article"` to only include the `<article>` html elements in the conversion.

_(The cli does not support every option yet. Over time more customization will be added)_

---
Expand Down
24 changes: 20 additions & 4 deletions convert.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package htmltomarkdown

import (
"io"

"github.com/JohannesKaufmann/html-to-markdown/v2/converter"
"github.com/JohannesKaufmann/html-to-markdown/v2/plugin/base"
"github.com/JohannesKaufmann/html-to-markdown/v2/plugin/commonmark"
Expand All @@ -10,29 +12,43 @@ import (
// ConvertString converts a html-string to a markdown-string.
//
// Under the hood `html.Parse()` is used to parse the HTML.
func ConvertString(htmlInput string) (string, error) {
func ConvertString(htmlInput string, opts ...converter.ConvertOptionFunc) (string, error) {
conv := converter.NewConverter(
converter.WithPlugins(
base.NewBasePlugin(),
commonmark.NewCommonmarkPlugin(),
),
)

return conv.ConvertString(htmlInput, opts...)
}

// ConvertReader converts the html from the reader to markdown.
//
// Under the hood `html.Parse()` is used to parse the HTML.
func ConvertReader(r io.Reader, opts ...converter.ConvertOptionFunc) ([]byte, error) {
conv := converter.NewConverter(
converter.WithPlugins(
base.NewBasePlugin(),
commonmark.NewCommonmarkPlugin(),
),
)

return conv.ConvertString(htmlInput)
return conv.ConvertReader(r, opts...)
}

// ConvertNode converts a `*html.Node` to a markdown byte slice.
//
// If you have already parsed an HTML page using the `html.Parse()` function
// from the "golang.org/x/net/html" package then you can pass this node
// directly to the converter.
func ConvertNode(doc *html.Node) ([]byte, error) {
func ConvertNode(doc *html.Node, opts ...converter.ConvertOptionFunc) ([]byte, error) {
conv := converter.NewConverter(
converter.WithPlugins(
base.NewBasePlugin(),
commonmark.NewCommonmarkPlugin(),
),
)

return conv.ConvertNode(doc)
return conv.ConvertNode(doc, opts...)
}
16 changes: 16 additions & 0 deletions convert_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,22 @@ func ExampleConvertString() {
fmt.Println(markdown)
// Output: **Bold Text**
}

func ExampleWithDomain() {
input := `<img src="/assets/image.png" />`

markdown, err := htmltomarkdown.ConvertString(
input,
// Provide a different domain for every convert call:
converter.WithDomain("https://example.com"),
)
if err != nil {
log.Fatal(err)
}
fmt.Println(markdown)
// Output: ![](https://example.com/assets/image.png)
}

func ExampleConvertNode() {
input := `<strong>Bold Text</strong>`

Expand Down
18 changes: 12 additions & 6 deletions converter/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,20 @@ type convertOption struct {
domain string
context context.Context
}
type convertOptionFunc func(o *convertOption)
type ConvertOptionFunc func(o *convertOption)

func WithContext(ctx context.Context) convertOptionFunc {
func WithContext(ctx context.Context) ConvertOptionFunc {
return func(o *convertOption) {
o.context = ctx
}
}
func WithDomain(domain string) convertOptionFunc {

// WithDomain provides a base `domain` to the converter and
// to the `AssembleAbsoluteURL` function.
//
// If a *relative* url is encountered (in an image or link) then the `domain` is used
// to convert it to a *absolute* url.
func WithDomain(domain string) ConvertOptionFunc {
return func(o *convertOption) {
o.domain = domain
}
Expand All @@ -49,7 +55,7 @@ var errBasePluginMissing = errors.New(`you registered the "commonmark" plugin bu
// If you have already parsed an HTML page using the `html.Parse()` function
// from the "golang.org/x/net/html" package then you can pass this node
// directly to the converter.
func (conv *Converter) ConvertNode(doc *html.Node, opts ...convertOptionFunc) ([]byte, error) {
func (conv *Converter) ConvertNode(doc *html.Node, opts ...ConvertOptionFunc) ([]byte, error) {

if err := conv.getError(); err != nil {
// There can be errors while calling `Init` on the plugins (e.g. validation errors).
Expand Down Expand Up @@ -113,7 +119,7 @@ func (conv *Converter) ConvertNode(doc *html.Node, opts ...convertOptionFunc) ([
// ConvertReader converts the html from the reader to markdown.
//
// Under the hood `html.Parse()` is used to parse the HTML.
func (conv *Converter) ConvertReader(r io.Reader, opts ...convertOptionFunc) ([]byte, error) {
func (conv *Converter) ConvertReader(r io.Reader, opts ...ConvertOptionFunc) ([]byte, error) {
doc, err := html.Parse(r)
if err != nil {
return nil, err
Expand All @@ -125,7 +131,7 @@ func (conv *Converter) ConvertReader(r io.Reader, opts ...convertOptionFunc) ([]
// ConvertString converts a html-string to a markdown-string.
//
// Under the hood `html.Parse()` is used to parse the HTML.
func (conv *Converter) ConvertString(htmlInput string, opts ...convertOptionFunc) (string, error) {
func (conv *Converter) ConvertString(htmlInput string, opts ...ConvertOptionFunc) (string, error) {
r := strings.NewReader(htmlInput)
output, err := conv.ConvertReader(r, opts...)
if err != nil {
Expand Down

0 comments on commit b1a76ee

Please sign in to comment.