diff --git a/book/chapter.go b/book/chapter.go
index 9c82b1b..1797dc5 100644
--- a/book/chapter.go
+++ b/book/chapter.go
@@ -1,6 +1,7 @@
package book
type chapter struct {
+ url string
body string
name string
author string
@@ -10,11 +11,11 @@ type chapter struct {
}
func NewEmptyChapter() chapter {
- return chapter{"", "", "", "", []chapter{}, NewScrapeConfigNoInclude()}
+ return chapter{"", "", "", "", "", []chapter{}, NewScrapeConfigNoInclude()}
}
-func NewChapter(body, name, author, content string, subChapters []chapter, config *ScrapeConfig) chapter {
- return chapter{body, name, author, content, subChapters, config}
+func (c chapter) URL() string {
+ return c.url
}
func (c chapter) Body() string {
diff --git a/book/format.go b/book/format.go
index c8f1312..7cb6dd7 100644
--- a/book/format.go
+++ b/book/format.go
@@ -30,6 +30,11 @@ func ToMarkdownString(c chapter) string {
markdown += fmt.Sprintf("%s\n", c.Name())
markdown += fmt.Sprintf("%s\n\n", strings.Repeat("=", len(c.Name())))
+ // url
+ if c.config.PrintURL {
+ markdown += fmt.Sprintf("_Source: %s_\n\n", c.URL())
+ }
+
// convert content to markdown
content, err := md.NewConverter("", true, nil).ConvertString(c.Content())
if err != nil {
@@ -72,7 +77,15 @@ func ToHtmlString(c chapter) string {
// chapter content
if c.config.Include {
- html += fmt.Sprintf("
%s
", c.Name())
+ // title
+ html += fmt.Sprintf("%s
\n", c.Name())
+
+ // url
+ if c.config.PrintURL {
+ html += fmt.Sprintf("Source: %s
\n", c.URL())
+ }
+
+ // content
html += c.Content()
}
@@ -114,19 +127,6 @@ func ToEpub(c chapter, filename string) string {
e := epub.NewEpub(c.Name())
e.SetAuthor(c.Author())
- AppendToEpub(e, c)
-
- err := e.Write(filename)
- if err != nil {
- log.Fatal(err)
- }
-
- return filename
-}
-
-func AppendToEpub(e *epub.Epub, c chapter) {
- content := ""
-
// append table of content
if len(c.SubChapters()) > 1 {
html := "Table of Contents
"
@@ -143,6 +143,19 @@ func AppendToEpub(e *epub.Epub, c chapter) {
}
}
+ AppendToEpub(e, c)
+
+ err := e.Write(filename)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ return filename
+}
+
+func AppendToEpub(e *epub.Epub, c chapter) {
+ content := ""
+
// chapter content
if c.config.Include {
@@ -173,8 +186,15 @@ func AppendToEpub(e *epub.Epub, c chapter) {
html := ""
// add title only if ImagesOnly = false
if c.config.ImagesOnly == false {
- html += fmt.Sprintf("%s
", c.Name())
+ html += fmt.Sprintf("%s
\n", c.Name())
+ }
+
+ // url
+ if c.config.PrintURL {
+ html += fmt.Sprintf("Source: %s
\n", c.URL())
}
+
+ // content
html += content
// write to epub file
diff --git a/book/format_test.go b/book/format_test.go
index 7153b23..847f2f4 100644
--- a/book/format_test.go
+++ b/book/format_test.go
@@ -19,7 +19,7 @@ func TestFilename(t *testing.T) {
func TestToMarkdownString(t *testing.T) {
- c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
+ c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
got := ToMarkdownString(c)
want := "Example Domain\n==============\n\nThis domain is for use in illustrative examples in documents. You may use this\ndomain in literature without prior coordination or asking for permission.\n\n[More information...](https://www.iana.org/domains/example)\n\n\n"
@@ -30,9 +30,25 @@ func TestToMarkdownString(t *testing.T) {
}
+func TestToMarkdownPrintURL(t *testing.T) {
+
+ config := NewScrapeConfigQuiet()
+ config.PrintURL = true
+
+ c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})
+
+ got := ToMarkdownString(c)
+ want := "Example Domain\n==============\n\n_Source: https://example.com/_\n\nThis domain is for use in illustrative examples in documents. You may use this\ndomain in literature without prior coordination or asking for permission.\n\n[More information...](https://www.iana.org/domains/example)\n\n\n"
+
+ if got != want {
+ t.Errorf("got %v, wanted %v", got, want)
+ }
+
+}
+
func TestToMarkdown(t *testing.T) {
- c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
+ c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
ToMarkdown(c, "")
filename := "Example_Domain.md"
@@ -49,7 +65,7 @@ func TestToMarkdown(t *testing.T) {
func TestToMarkdownFilename(t *testing.T) {
filename := "ebook.md"
- c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
+ c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
ToMarkdown(c, filename)
if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) {
@@ -64,10 +80,26 @@ func TestToMarkdownFilename(t *testing.T) {
func TestToHtmlString(t *testing.T) {
- c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
+ c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
+
+ got := ToHtmlString(c)
+ want := "Example Domain
\n\n \n
This domain is for use in illustrative examples in documents. You may use this\n domain in literature without prior coordination or asking for permission.
\n
More information...
\n
"
+
+ if got != want {
+ t.Errorf("got %q, wanted %q", got, want)
+ }
+
+}
+
+func TestToHtmlPrintURL(t *testing.T) {
+
+ config := NewScrapeConfigQuiet()
+ config.PrintURL = true
+
+ c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})
got := ToHtmlString(c)
- want := "Example Domain
\n \n
This domain is for use in illustrative examples in documents. You may use this\n domain in literature without prior coordination or asking for permission.
\n
More information...
\n
"
+ want := "Example Domain
\nSource: https://example.com/
\n\n \n
This domain is for use in illustrative examples in documents. You may use this\n domain in literature without prior coordination or asking for permission.
\n
More information...
\n
"
if got != want {
t.Errorf("got %q, wanted %q", got, want)
@@ -77,7 +109,7 @@ func TestToHtmlString(t *testing.T) {
func TestToHtml(t *testing.T) {
- c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
+ c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
ToHtml(c, "")
filename := "Example_Domain.html"
@@ -94,7 +126,7 @@ func TestToHtml(t *testing.T) {
func TestToHtmlFilename(t *testing.T) {
filename := "ebook.html"
- c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
+ c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
ToHtml(c, filename)
if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) {
@@ -109,7 +141,7 @@ func TestToHtmlFilename(t *testing.T) {
func TestToEpub(t *testing.T) {
- c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
+ c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
ToEpub(c, "")
filename := "Example_Domain.epub"
@@ -126,7 +158,7 @@ func TestToEpub(t *testing.T) {
func TestToEpubFilename(t *testing.T) {
filename := "ebook.epub"
- c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
+ c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
ToEpub(c, filename)
if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) {
@@ -141,7 +173,7 @@ func TestToEpubFilename(t *testing.T) {
func TestToMobi(t *testing.T) {
- c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
+ c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
ToMobi(c, "")
filename := "Example_Domain.mobi"
@@ -158,7 +190,7 @@ func TestToMobi(t *testing.T) {
func TestToMobiFilename(t *testing.T) {
filename := "ebook.mobi"
- c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfig()}, 0, func(index int, name string) {})
+ c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{NewScrapeConfigQuiet()}, 0, func(index int, name string) {})
ToMobi(c, filename)
if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) {
diff --git a/book/scraper.go b/book/scraper.go
index 8957dba..a75497f 100644
--- a/book/scraper.go
+++ b/book/scraper.go
@@ -30,14 +30,19 @@ type ScrapeConfig struct {
Include bool
ImagesOnly bool
UseLinkName bool
+ PrintURL bool
}
func NewScrapeConfig() *ScrapeConfig {
- return &ScrapeConfig{0, "", false, -1, 0, false, -1, -1, true, false, false}
+ return &ScrapeConfig{0, "", false, -1, 0, false, -1, -1, true, false, false, false}
+}
+
+func NewScrapeConfigQuiet() *ScrapeConfig {
+ return &ScrapeConfig{0, "", true, -1, 0, false, -1, -1, true, false, false, false}
}
func NewScrapeConfigNoInclude() *ScrapeConfig {
- return &ScrapeConfig{0, "", false, -1, 0, false, -1, -1, false, false, false}
+ return &ScrapeConfig{0, "", false, -1, 0, false, -1, -1, false, false, false, false}
}
func NewScrapeConfigs(selectors []string) []*ScrapeConfig {
@@ -252,7 +257,7 @@ func NewChapterFromURL(url, linkName string, configs []*ScrapeConfig, index int,
}
- return chapter{string(body), name, article.Byline, content, subchapters, config}
+ return chapter{url, string(body), name, article.Byline, content, subchapters, config}
}
func tableOfContent(url string, config *ScrapeConfig, subConfig *ScrapeConfig, quiet bool) ([]chapter, chapter) {
diff --git a/book/scraper_test.go b/book/scraper_test.go
index 4946f2d..500948b 100644
--- a/book/scraper_test.go
+++ b/book/scraper_test.go
@@ -7,7 +7,7 @@ import (
func TestBody(t *testing.T) {
- config := NewScrapeConfig()
+ config := NewScrapeConfigQuiet()
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})
got := c.Body()
@@ -21,7 +21,7 @@ func TestBody(t *testing.T) {
func TestName(t *testing.T) {
- config := NewScrapeConfig()
+ config := NewScrapeConfigQuiet()
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})
got := c.Name()
@@ -35,7 +35,7 @@ func TestName(t *testing.T) {
func TestCustomName(t *testing.T) {
- config := NewScrapeConfig()
+ config := NewScrapeConfigQuiet()
config.UseLinkName = true
c := NewChapterFromURL("https://example.com/", "Custom Name", []*ScrapeConfig{config}, 0, func(index int, name string) {})
@@ -50,7 +50,7 @@ func TestCustomName(t *testing.T) {
func TestAuthor(t *testing.T) {
- config := NewScrapeConfig()
+ config := NewScrapeConfigQuiet()
c := NewChapterFromURL("https://12factor.net/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})
got := c.Author()
@@ -64,7 +64,7 @@ func TestAuthor(t *testing.T) {
func TestContent(t *testing.T) {
- config := NewScrapeConfig()
+ config := NewScrapeConfigQuiet()
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})
got := c.Content()
@@ -78,10 +78,10 @@ func TestContent(t *testing.T) {
func TestDelay(t *testing.T) {
- config0 := NewScrapeConfig()
+ config0 := NewScrapeConfigQuiet()
config0.Delay = 500
- config1 := NewScrapeConfig()
+ config1 := NewScrapeConfigQuiet()
start := time.Now()
NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
@@ -98,7 +98,7 @@ func TestDelay(t *testing.T) {
func TestContentImagesOnly(t *testing.T) {
- config := NewScrapeConfig()
+ config := NewScrapeConfigQuiet()
config.ImagesOnly = true
c := NewChapterFromURL("https://12factor.net/codebase", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})
@@ -114,8 +114,8 @@ func TestContentImagesOnly(t *testing.T) {
func TestSubChapters(t *testing.T) {
- config0 := NewScrapeConfig()
- config1 := NewScrapeConfig()
+ config0 := NewScrapeConfigQuiet()
+ config1 := NewScrapeConfigQuiet()
c := NewChapterFromURL("https://html5example.com/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
@@ -130,8 +130,8 @@ func TestSubChapters(t *testing.T) {
func TestSubChaptersRSS(t *testing.T) {
- config0 := NewScrapeConfig()
- config1 := NewScrapeConfig()
+ config0 := NewScrapeConfigQuiet()
+ config1 := NewScrapeConfigQuiet()
c := NewChapterFromURL("https://www.nginx.com/feed/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
@@ -146,10 +146,10 @@ func TestSubChaptersRSS(t *testing.T) {
func TestSubChaptersSelector(t *testing.T) {
- config0 := NewScrapeConfig()
+ config0 := NewScrapeConfigQuiet()
config0.Selector = "body > aside > p > a"
- config1 := NewScrapeConfig()
+ config1 := NewScrapeConfigQuiet()
c := NewChapterFromURL("https://html5example.com/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
@@ -164,10 +164,10 @@ func TestSubChaptersSelector(t *testing.T) {
func TestSubChaptersLimit(t *testing.T) {
- config0 := NewScrapeConfig()
+ config0 := NewScrapeConfigQuiet()
config0.Limit = 1
- config1 := NewScrapeConfig()
+ config1 := NewScrapeConfigQuiet()
c := NewChapterFromURL("https://html5example.com/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
@@ -182,10 +182,10 @@ func TestSubChaptersLimit(t *testing.T) {
func TestSubChaptersLimitOver(t *testing.T) {
- config0 := NewScrapeConfig()
+ config0 := NewScrapeConfigQuiet()
config0.Limit = 15
- config1 := NewScrapeConfig()
+ config1 := NewScrapeConfigQuiet()
c := NewChapterFromURL("https://html5example.com/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
@@ -200,10 +200,10 @@ func TestSubChaptersLimitOver(t *testing.T) {
func TestReverse(t *testing.T) {
- config0 := NewScrapeConfig()
+ config0 := NewScrapeConfigQuiet()
config0.Reverse = true
- config1 := NewScrapeConfig()
+ config1 := NewScrapeConfigQuiet()
c := NewChapterFromURL("https://html5example.com/", "", []*ScrapeConfig{config0, config1}, 0, func(index int, name string) {})
@@ -218,7 +218,7 @@ func TestReverse(t *testing.T) {
func TestNotInclude(t *testing.T) {
- config := NewScrapeConfig()
+ config := NewScrapeConfigQuiet()
config.Include = false
c := NewChapterFromURL("https://example.com/", "", []*ScrapeConfig{config}, 0, func(index int, name string) {})
diff --git a/cmd/get.go b/cmd/get.go
index dd92a2a..a47fc0c 100644
--- a/cmd/get.go
+++ b/cmd/get.go
@@ -33,6 +33,7 @@ type GetOptions struct {
threads int
include bool
useLinkName bool
+ printURL bool
}
var getOpts *GetOptions
@@ -46,6 +47,7 @@ func init() {
getCmd.Flags().StringVarP(&getOpts.output, "output", "", "", "file name (default: book name)")
getCmd.Flags().BoolVarP(&getOpts.stdout, "stdout", "", false, "print to standard output")
getCmd.Flags().BoolVarP(&getOpts.images, "images", "", false, "retrieve images only")
+ getCmd.Flags().BoolVarP(&getOpts.printURL, "print-url", "", false, "print url after chapter title")
getCmd.Flags().BoolVarP(&getOpts.quiet, "quiet", "q", false, "hide progress bar")
// common with list command
@@ -147,6 +149,7 @@ var getCmd = &cobra.Command{
config.ImagesOnly = getOpts.images
config.Include = getOpts.include
config.UseLinkName = getOpts.useLinkName
+ config.PrintURL = getOpts.printURL
// do not use link name for root level as there is not parent link
if index == 0 {
diff --git a/cmd/version.go b/cmd/version.go
index 0630220..cce9f1c 100644
--- a/cmd/version.go
+++ b/cmd/version.go
@@ -14,6 +14,6 @@ var versionCmd = &cobra.Command{
Use: "version",
Short: "Print the version number of papeer",
Run: func(cmd *cobra.Command, args []string) {
- fmt.Println("papeer v0.8.1")
+ fmt.Println("papeer v0.8.2")
},
}