Skip to content

Commit

Permalink
[get] print url option
Browse files Browse the repository at this point in the history
  • Loading branch information
lapwat committed Aug 14, 2024
1 parent 1b2be1c commit 403fdcc
Show file tree
Hide file tree
Showing 7 changed files with 115 additions and 54 deletions.
7 changes: 4 additions & 3 deletions book/chapter.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package book

type chapter struct {
url string
body string
name string
author string
Expand All @@ -10,11 +11,11 @@ type chapter struct {
}

func NewEmptyChapter() chapter {
return chapter{"", "", "", "", []chapter{}, NewScrapeConfigNoInclude()}
return chapter{"", "", "", "", "", []chapter{}, NewScrapeConfigNoInclude()}
}

func NewChapter(body, name, author, content string, subChapters []chapter, config *ScrapeConfig) chapter {
return chapter{body, name, author, content, subChapters, config}
func (c chapter) URL() string {
return c.url
}

func (c chapter) Body() string {
Expand Down
50 changes: 35 additions & 15 deletions book/format.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ func ToMarkdownString(c chapter) string {
markdown += fmt.Sprintf("%s\n", c.Name())
markdown += fmt.Sprintf("%s\n\n", strings.Repeat("=", len(c.Name())))

// url
if c.config.PrintURL {
markdown += fmt.Sprintf("_Source: %s_\n\n", c.URL())
}

// convert content to markdown
content, err := md.NewConverter("", true, nil).ConvertString(c.Content())
if err != nil {
Expand Down Expand Up @@ -72,7 +77,15 @@ func ToHtmlString(c chapter) string {

// chapter content
if c.config.Include {
html += fmt.Sprintf("<h1>%s</h1>", c.Name())
// title
html += fmt.Sprintf("<h1>%s</h1>\n", c.Name())

// url
if c.config.PrintURL {
html += fmt.Sprintf("<p><i>Source: %s</i></p>\n", c.URL())
}

// content
html += c.Content()
}

Expand Down Expand Up @@ -114,19 +127,6 @@ func ToEpub(c chapter, filename string) string {
e := epub.NewEpub(c.Name())
e.SetAuthor(c.Author())

AppendToEpub(e, c)

err := e.Write(filename)
if err != nil {
log.Fatal(err)
}

return filename
}

func AppendToEpub(e *epub.Epub, c chapter) {
content := ""

// append table of content
if len(c.SubChapters()) > 1 {
html := "<h1>Table of Contents</h1>"
Expand All @@ -143,6 +143,19 @@ func AppendToEpub(e *epub.Epub, c chapter) {
}
}

AppendToEpub(e, c)

err := e.Write(filename)
if err != nil {
log.Fatal(err)
}

return filename
}

func AppendToEpub(e *epub.Epub, c chapter) {
content := ""

// chapter content
if c.config.Include {

Expand Down Expand Up @@ -173,8 +186,15 @@ func AppendToEpub(e *epub.Epub, c chapter) {
html := ""
// add title only if ImagesOnly = false
if c.config.ImagesOnly == false {
html += fmt.Sprintf("<h1>%s</h1>", c.Name())
html += fmt.Sprintf("<h1>%s</h1>\n", c.Name())
}

// url
if c.config.PrintURL {
html += fmt.Sprintf("<p><i>Source: %s</i></p>\n", c.URL())
}

// content
html += content

// write to epub file
Expand Down
54 changes: 43 additions & 11 deletions book/format_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ func TestFilename(t *testing.T) {

func TestToMarkdownString(t *testing.T) {

c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})

got := ToMarkdownString(c)
want := "Example Domain\n==============\n\nThis domain is for use in illustrative examples in documents. You may use this\ndomain in literature without prior coordination or asking for permission.\n\n[More information...](https://www.iana.org/domains/example)\n\n\n"
Expand All @@ -30,9 +30,25 @@ func TestToMarkdownString(t *testing.T) {

}

func TestToMarkdownPrintURL(t *testing.T) {

config := NewScrapeConfigQuiet()
config.PrintURL = true

c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})

got := ToMarkdownString(c)
want := "Example Domain\n==============\n\n_Source: https://example.com/_\n\nThis domain is for use in illustrative examples in documents. You may use this\ndomain in literature without prior coordination or asking for permission.\n\n[More information...](https://www.iana.org/domains/example)\n\n\n"

if got != want {
t.Errorf("got %v, wanted %v", got, want)
}

}

func TestToMarkdown(t *testing.T) {

c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
ToMarkdown(c, "")

filename := "Example_Domain.md"
Expand All @@ -49,7 +65,7 @@ func TestToMarkdown(t *testing.T) {
func TestToMarkdownFilename(t *testing.T) {

filename := "ebook.md"
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
ToMarkdown(c, filename)

if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) {
Expand All @@ -64,10 +80,26 @@ func TestToMarkdownFilename(t *testing.T) {

func TestToHtmlString(t *testing.T) {

c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})

got := ToHtmlString(c)
want := "<h1>Example Domain</h1>\n<div>\n \n <p>This domain is for use in illustrative examples in documents. You may use this\n domain in literature without prior coordination or asking for permission.</p>\n <p><a href=\"https://www.iana.org/domains/example\">More information...</a></p>\n</div>"

if got != want {
t.Errorf("got %q, wanted %q", got, want)
}

}

func TestToHtmlPrintURL(t *testing.T) {

config := NewScrapeConfigQuiet()
config.PrintURL = true

c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})

got := ToHtmlString(c)
want := "<h1>Example Domain</h1><div>\n \n <p>This domain is for use in illustrative examples in documents. You may use this\n domain in literature without prior coordination or asking for permission.</p>\n <p><a href=\"https://www.iana.org/domains/example\">More information...</a></p>\n</div>"
want := "<h1>Example Domain</h1>\n<p><i>Source: https://example.com/</i></p>\n<div>\n \n <p>This domain is for use in illustrative examples in documents. You may use this\n domain in literature without prior coordination or asking for permission.</p>\n <p><a href=\"https://www.iana.org/domains/example\">More information...</a></p>\n</div>"

if got != want {
t.Errorf("got %q, wanted %q", got, want)
Expand All @@ -77,7 +109,7 @@ func TestToHtmlString(t *testing.T) {

func TestToHtml(t *testing.T) {

c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
ToHtml(c, "")

filename := "Example_Domain.html"
Expand All @@ -94,7 +126,7 @@ func TestToHtml(t *testing.T) {
func TestToHtmlFilename(t *testing.T) {

filename := "ebook.html"
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
ToHtml(c, filename)

if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) {
Expand All @@ -109,7 +141,7 @@ func TestToHtmlFilename(t *testing.T) {

func TestToEpub(t *testing.T) {

c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
ToEpub(c, "")

filename := "Example_Domain.epub"
Expand All @@ -126,7 +158,7 @@ func TestToEpub(t *testing.T) {
func TestToEpubFilename(t *testing.T) {

filename := "ebook.epub"
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
ToEpub(c, filename)

if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) {
Expand All @@ -141,7 +173,7 @@ func TestToEpubFilename(t *testing.T) {

func TestToMobi(t *testing.T) {

c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
ToMobi(c, "")

filename := "Example_Domain.mobi"
Expand All @@ -158,7 +190,7 @@ func TestToMobi(t *testing.T) {
func TestToMobiFilename(t *testing.T) {

filename := "ebook.mobi"
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
ToMobi(c, filename)

if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) {
Expand Down
11 changes: 8 additions & 3 deletions book/scraper.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,19 @@ type ScrapeConfig struct {
Include bool
ImagesOnly bool
UseLinkName bool
PrintURL bool
}

func NewScrapeConfig() *ScrapeConfig {
return &ScrapeConfig{0, "", false, -1, 0, false, -1, -1, true, false, false}
return &ScrapeConfig{0, "", false, -1, 0, false, -1, -1, true, false, false, false}
}

func NewScrapeConfigQuiet() *ScrapeConfig {
return &ScrapeConfig{0, "", true, -1, 0, false, -1, -1, true, false, false, false}
}

func NewScrapeConfigNoInclude() *ScrapeConfig {
return &ScrapeConfig{0, "", false, -1, 0, false, -1, -1, false, false, false}
return &ScrapeConfig{0, "", false, -1, 0, false, -1, -1, false, false, false, false}
}

func NewScrapeConfigs(selectors []string) []*ScrapeConfig {
Expand Down Expand Up @@ -252,7 +257,7 @@ func NewChapterFromURL(url, linkName string, configs []*ScrapeConfig, index int,

}

return chapter{string(body), name, article.Byline, content, subchapters, config}
return chapter{url, string(body), name, article.Byline, content, subchapters, config}
}

func tableOfContent(url string, config *ScrapeConfig, subConfig *ScrapeConfig, quiet bool) ([]chapter, chapter) {
Expand Down
Loading

0 comments on commit 403fdcc

Please sign in to comment.