Skip to content

Commit

Permalink
Merge pull request #17 from ccremer/bulk-dl
Browse files Browse the repository at this point in the history
Add command to bulk-download all documents
  • Loading branch information
ccremer authored Jan 29, 2023
2 parents 0c5b480 + 15fda18 commit 14c204d
Show file tree
Hide file tree
Showing 14 changed files with 516 additions and 1 deletion.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,6 @@

# work
/.work/

/documents.zip
/documents
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ CLI tool to interact with paperless-ngx remote API

## Subcommands

- `upload`: Uploads local document(s) to Paperless instance
- `upload`: Uploads local document(s) to Paperless instance.
- `consume`: Consumes a local directory and uploads each file to Paperless instance. The files will be deleted once uploaded.
- `bulk-download`: Downloads all documents at once.

## Installation

Expand Down
126 changes: 126 additions & 0 deletions bulk_download_command.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
package main

import (
"fmt"
"os"

"github.com/ccremer/clustercode/pkg/archive"
"github.com/ccremer/clustercode/pkg/paperless"
"github.com/go-logr/logr"
"github.com/urfave/cli/v2"
)

type BulkDownloadCommand struct {
cli.Command

PaperlessURL string
PaperlessToken string
PaperlessUser string

TargetPath string
Content string
UnzipEnabled bool
OverwriteExistingTarget bool
}

func newBulkDownloadCommand() *BulkDownloadCommand {
c := &BulkDownloadCommand{}
c.Command = cli.Command{
Name: "bulk-download",
Usage: "Downloads all documents at once",
Action: actions(LogMetadata, c.Action),
Flags: []cli.Flag{
newURLFlag(&c.PaperlessURL),
newUsernameFlag(&c.PaperlessUser),
newTokenFlag(&c.PaperlessToken),
newTargetPathFlag(&c.TargetPath),
newDownloadContentFlag(&c.Content),
newUnzipFlag(&c.UnzipEnabled),
newOverwriteFlag(&c.OverwriteExistingTarget),
},
}
return c
}

func (c *BulkDownloadCommand) Action(ctx *cli.Context) error {
log := logr.FromContextOrDiscard(ctx.Context)

if prepareErr := c.prepareTarget(); prepareErr != nil {
return prepareErr
}
clt := paperless.NewClient(c.PaperlessURL, c.PaperlessUser, c.PaperlessToken)

log.Info("Getting list of documents")
documents, queryErr := clt.QueryDocuments(ctx.Context, paperless.QueryParams{
TruncateContent: true,
Ordering: "id",
PageSize: 100,
})
if queryErr != nil {
return queryErr
}
documentIDs := paperless.MapToDocumentIDs(documents)

tmpFile, createTempErr := os.CreateTemp(os.TempDir(), "paperless-bulk-download-")
if createTempErr != nil {
return fmt.Errorf("cannot open temporary file: %w", createTempErr)
}
defer os.Remove(tmpFile.Name()) // cleanup if not renamed

log.Info("Downloading documents")
downloadErr := clt.BulkDownload(ctx.Context, tmpFile, paperless.BulkDownloadParams{
FollowFormatting: true,
Content: paperless.BulkDownloadContent(c.Content),
DocumentIDs: documentIDs,
})
if downloadErr != nil {
return downloadErr
}

if c.UnzipEnabled {
return c.unzip(ctx, tmpFile)
}
return c.move(ctx, tmpFile)
}

func (c *BulkDownloadCommand) unzip(ctx *cli.Context, tmpFile *os.File) error {
log := logr.FromContextOrDiscard(ctx.Context)
downloadFilePath := c.getTargetPath()
if unzipErr := archive.Unzip(ctx.Context, tmpFile.Name(), downloadFilePath); unzipErr != nil {
return fmt.Errorf("cannot unzip file %q to %q: %w", tmpFile.Name(), downloadFilePath, unzipErr)
}
log.Info("Unzipped archive to dir", "dir", downloadFilePath)
return nil
}

func (c *BulkDownloadCommand) move(ctx *cli.Context, tmpFile *os.File) error {
log := logr.FromContextOrDiscard(ctx.Context)
downloadFilePath := c.getTargetPath()
if renameErr := os.Rename(tmpFile.Name(), downloadFilePath); renameErr != nil {
return fmt.Errorf("cannot move temp file: %w", renameErr)
}
log.Info("Downloaded zip archive", "file", downloadFilePath)
return nil
}

func (c *BulkDownloadCommand) getTargetPath() string {
if c.TargetPath != "" {
return c.TargetPath
}
if c.UnzipEnabled {
return "documents"
}
return "documents.zip"
}

func (c *BulkDownloadCommand) prepareTarget() error {
target := c.getTargetPath()
if c.OverwriteExistingTarget {
return os.RemoveAll(target)
}
_, err := os.Stat(target)
if err != nil && os.IsNotExist(err) {
return nil
}
return fmt.Errorf("target %q exists, abort", target)
}
48 changes: 48 additions & 0 deletions flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ package main

import (
"fmt"
"strings"
"time"

"github.com/ccremer/clustercode/pkg/paperless"
"github.com/urfave/cli/v2"
)

Expand Down Expand Up @@ -114,6 +116,52 @@ func newConsumeDelayFlag(dest *time.Duration) *cli.DurationFlag {
}
}

func newTargetPathFlag(dest *string) *cli.StringFlag {
return &cli.StringFlag{
Name: "target-path", EnvVars: []string{"DOWNLOAD_TARGET_PATH"},
Usage: "target file path where documents are downloaded.",
DefaultText: "documents.zip",
Destination: dest,
}
}

func newDownloadContentFlag(dest *string) *cli.StringFlag {
return &cli.StringFlag{
Name: "content", EnvVars: []string{"DOWNLOAD_CONTENT"},
Usage: "selection of document variant.",
Value: paperless.BulkDownloadArchives.String(),
Destination: dest,
Action: func(ctx *cli.Context, s string) error {
enum := []string{
paperless.BulkDownloadArchives.String(),
paperless.BulkDownloadOriginal.String(),
paperless.BulkDownloadBoth.String()}
for _, key := range enum {
if s == key {
return nil
}
}
return fmt.Errorf("parameter %q must be one of [%s]", "content", strings.Join(enum, ", "))
},
}
}

func newUnzipFlag(dest *bool) *cli.BoolFlag {
return &cli.BoolFlag{
Name: "unzip", EnvVars: []string{"DOWNLOAD_UNZIP"},
Usage: "unzip the downloaded file.",
Destination: dest,
}
}

func newOverwriteFlag(dest *bool) *cli.BoolFlag {
return &cli.BoolFlag{
Name: "overwrite", EnvVars: []string{"DOWNLOAD_OVERWRITE"},
Usage: "deletes existing file(s) before downloading.",
Destination: dest,
}
}

func checkEmptyString(flagName string) func(*cli.Context, string) error {
return func(ctx *cli.Context, s string) error {
if s == "" {
Expand Down
4 changes: 4 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ require (
github.com/fsnotify/fsnotify v1.6.0
github.com/go-logr/logr v1.2.3
github.com/pterm/pterm v0.12.51
github.com/stretchr/testify v1.8.1
github.com/urfave/cli/v2 v2.23.7
)

Expand All @@ -15,14 +16,17 @@ require (
atomicgo.dev/keyboard v0.2.8 // indirect
github.com/containerd/console v1.0.3 // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/gookit/color v1.5.2 // indirect
github.com/lithammer/fuzzysearch v1.1.5 // indirect
github.com/mattn/go-runewidth v0.0.14 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/rivo/uniseg v0.2.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778 // indirect
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
golang.org/x/sys v0.0.0-20220908164124-27713097b956 // indirect
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect
golang.org/x/text v0.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
3 changes: 3 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,10 @@ github.com/klauspost/cpuid/v2 v2.0.12/go.mod h1:g2LTdtYhdyuGPqyWyv7qRAmj1WBqxuOb
github.com/klauspost/cpuid/v2 v2.1.0/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY=
github.com/klauspost/cpuid/v2 v2.2.0 h1:4ZexSFt8agMNzNisrsilL6RClWDC5YJnLHNIfTy4iuc=
github.com/klauspost/cpuid/v2 v2.2.0/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/lithammer/fuzzysearch v1.1.5 h1:Ag7aKU08wp0R9QCfF4GoGST9HbmAIeLP7xwMrOBEp1c=
github.com/lithammer/fuzzysearch v1.1.5/go.mod h1:1R1LRNk7yKid1BaQkmuLQaHruxcC4HmAH30Dh61Ih1Q=
Expand Down Expand Up @@ -116,6 +118,7 @@ golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtn
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
Expand Down
1 change: 1 addition & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ func NewApp() *cli.App {
},
Commands: []*cli.Command{
&newUploadCommand().Command,
&newBulkDownloadCommand().Command,
&newConsumeCommand().Command,
},
}
Expand Down
Binary file added pkg/archive/testdata/unzip.zip
Binary file not shown.
70 changes: 70 additions & 0 deletions pkg/archive/unzip.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package archive

import (
"archive/zip"
"context"
"fmt"
"io"
"os"
"path/filepath"
"strings"

"github.com/go-logr/logr"
)

// Unzip reads and copies every file in the archive to the destination dir.
func Unzip(ctx context.Context, source, dest string) error {
log := logr.FromContextOrDiscard(ctx)
log.V(1).Info("Unzipping file", "source", source, "dest", dest)
archive, openErr := zip.OpenReader(source)
if openErr != nil {
return fmt.Errorf("cannot open source file: %w", openErr)
}
defer archive.Close()

for _, f := range archive.File {
destFilePath := filepath.Join(dest, f.Name)

if !strings.HasPrefix(destFilePath, filepath.Clean(dest)+string(os.PathSeparator)) {
return fmt.Errorf("invalid file path: %s", destFilePath)
}
if f.FileInfo().IsDir() {
log.V(2).Info("Creating directory", "dir", f.FileInfo().Name())
if mkdirErr := os.MkdirAll(destFilePath, os.ModePerm); mkdirErr != nil {
return fmt.Errorf("cannot create directory: %w", mkdirErr)
}
continue
}
log.V(2).Info("Extracting file", "source", f.Name, "dest", destFilePath)

err := unzipFile(f, destFilePath)
if err != nil {
return err
}
}
return nil
}

func unzipFile(f *zip.File, destFilePath string) error {
// ensure directory exists where file should be written.
if mkdirErr := os.MkdirAll(filepath.Dir(destFilePath), os.ModePerm); mkdirErr != nil {
return fmt.Errorf("cannot create directory: %w", mkdirErr)
}

dstFile, dstFileErr := os.OpenFile(destFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode())
if dstFileErr != nil {
return fmt.Errorf("cannot open destination file: %w", dstFileErr)
}
defer dstFile.Close()

fileInArchive, srcFileErr := f.Open()
if srcFileErr != nil {
return fmt.Errorf("cannot open source file: %w", srcFileErr)
}
fileInArchive.Close()

if _, copyErr := io.Copy(dstFile, fileInArchive); copyErr != nil {
return fmt.Errorf("cannot copy %q to %q: %w", f.Name, dstFile.Name(), copyErr)
}
return nil
}
28 changes: 28 additions & 0 deletions pkg/archive/unzip_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package archive

import (
"context"
"os"
"path/filepath"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestUnzip(t *testing.T) {
testFilePath := "testdata/unzip.zip"
testDir := "testdata/run"

// cleanup previous test files in case of failure
require.NoError(t, os.RemoveAll(testDir))

err := Unzip(context.TODO(), testFilePath, testDir)
assert.NoError(t, err, "unzip failed with error")

assert.FileExists(t, filepath.Join(testDir, "toplevel.file"))
assert.FileExists(t, filepath.Join(testDir, "Dir In Archive", "Sub Dir.file"))

// cleanup
require.NoError(t, os.RemoveAll(testDir))
}
14 changes: 14 additions & 0 deletions pkg/paperless/document.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package paperless

type Document struct {
// ID of the document, read-only.
ID int `json:"id"`
}

func MapToDocumentIDs(docs []Document) []int {
ids := make([]int, len(docs))
for i := 0; i < len(docs); i++ {
ids[i] = docs[i].ID
}
return ids
}
Loading

0 comments on commit 14c204d

Please sign in to comment.