-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #17 from ccremer/bulk-dl
Add command to bulk-download all documents
- Loading branch information
Showing
14 changed files
with
516 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,3 +8,6 @@ | |
|
||
# work | ||
/.work/ | ||
|
||
/documents.zip | ||
/documents |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
package main | ||
|
||
import ( | ||
"fmt" | ||
"os" | ||
|
||
"github.com/ccremer/clustercode/pkg/archive" | ||
"github.com/ccremer/clustercode/pkg/paperless" | ||
"github.com/go-logr/logr" | ||
"github.com/urfave/cli/v2" | ||
) | ||
|
||
type BulkDownloadCommand struct { | ||
cli.Command | ||
|
||
PaperlessURL string | ||
PaperlessToken string | ||
PaperlessUser string | ||
|
||
TargetPath string | ||
Content string | ||
UnzipEnabled bool | ||
OverwriteExistingTarget bool | ||
} | ||
|
||
func newBulkDownloadCommand() *BulkDownloadCommand { | ||
c := &BulkDownloadCommand{} | ||
c.Command = cli.Command{ | ||
Name: "bulk-download", | ||
Usage: "Downloads all documents at once", | ||
Action: actions(LogMetadata, c.Action), | ||
Flags: []cli.Flag{ | ||
newURLFlag(&c.PaperlessURL), | ||
newUsernameFlag(&c.PaperlessUser), | ||
newTokenFlag(&c.PaperlessToken), | ||
newTargetPathFlag(&c.TargetPath), | ||
newDownloadContentFlag(&c.Content), | ||
newUnzipFlag(&c.UnzipEnabled), | ||
newOverwriteFlag(&c.OverwriteExistingTarget), | ||
}, | ||
} | ||
return c | ||
} | ||
|
||
func (c *BulkDownloadCommand) Action(ctx *cli.Context) error { | ||
log := logr.FromContextOrDiscard(ctx.Context) | ||
|
||
if prepareErr := c.prepareTarget(); prepareErr != nil { | ||
return prepareErr | ||
} | ||
clt := paperless.NewClient(c.PaperlessURL, c.PaperlessUser, c.PaperlessToken) | ||
|
||
log.Info("Getting list of documents") | ||
documents, queryErr := clt.QueryDocuments(ctx.Context, paperless.QueryParams{ | ||
TruncateContent: true, | ||
Ordering: "id", | ||
PageSize: 100, | ||
}) | ||
if queryErr != nil { | ||
return queryErr | ||
} | ||
documentIDs := paperless.MapToDocumentIDs(documents) | ||
|
||
tmpFile, createTempErr := os.CreateTemp(os.TempDir(), "paperless-bulk-download-") | ||
if createTempErr != nil { | ||
return fmt.Errorf("cannot open temporary file: %w", createTempErr) | ||
} | ||
defer os.Remove(tmpFile.Name()) // cleanup if not renamed | ||
|
||
log.Info("Downloading documents") | ||
downloadErr := clt.BulkDownload(ctx.Context, tmpFile, paperless.BulkDownloadParams{ | ||
FollowFormatting: true, | ||
Content: paperless.BulkDownloadContent(c.Content), | ||
DocumentIDs: documentIDs, | ||
}) | ||
if downloadErr != nil { | ||
return downloadErr | ||
} | ||
|
||
if c.UnzipEnabled { | ||
return c.unzip(ctx, tmpFile) | ||
} | ||
return c.move(ctx, tmpFile) | ||
} | ||
|
||
func (c *BulkDownloadCommand) unzip(ctx *cli.Context, tmpFile *os.File) error { | ||
log := logr.FromContextOrDiscard(ctx.Context) | ||
downloadFilePath := c.getTargetPath() | ||
if unzipErr := archive.Unzip(ctx.Context, tmpFile.Name(), downloadFilePath); unzipErr != nil { | ||
return fmt.Errorf("cannot unzip file %q to %q: %w", tmpFile.Name(), downloadFilePath, unzipErr) | ||
} | ||
log.Info("Unzipped archive to dir", "dir", downloadFilePath) | ||
return nil | ||
} | ||
|
||
func (c *BulkDownloadCommand) move(ctx *cli.Context, tmpFile *os.File) error { | ||
log := logr.FromContextOrDiscard(ctx.Context) | ||
downloadFilePath := c.getTargetPath() | ||
if renameErr := os.Rename(tmpFile.Name(), downloadFilePath); renameErr != nil { | ||
return fmt.Errorf("cannot move temp file: %w", renameErr) | ||
} | ||
log.Info("Downloaded zip archive", "file", downloadFilePath) | ||
return nil | ||
} | ||
|
||
func (c *BulkDownloadCommand) getTargetPath() string { | ||
if c.TargetPath != "" { | ||
return c.TargetPath | ||
} | ||
if c.UnzipEnabled { | ||
return "documents" | ||
} | ||
return "documents.zip" | ||
} | ||
|
||
func (c *BulkDownloadCommand) prepareTarget() error { | ||
target := c.getTargetPath() | ||
if c.OverwriteExistingTarget { | ||
return os.RemoveAll(target) | ||
} | ||
_, err := os.Stat(target) | ||
if err != nil && os.IsNotExist(err) { | ||
return nil | ||
} | ||
return fmt.Errorf("target %q exists, abort", target) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
package archive | ||
|
||
import ( | ||
"archive/zip" | ||
"context" | ||
"fmt" | ||
"io" | ||
"os" | ||
"path/filepath" | ||
"strings" | ||
|
||
"github.com/go-logr/logr" | ||
) | ||
|
||
// Unzip reads and copies every file in the archive to the destination dir. | ||
func Unzip(ctx context.Context, source, dest string) error { | ||
log := logr.FromContextOrDiscard(ctx) | ||
log.V(1).Info("Unzipping file", "source", source, "dest", dest) | ||
archive, openErr := zip.OpenReader(source) | ||
if openErr != nil { | ||
return fmt.Errorf("cannot open source file: %w", openErr) | ||
} | ||
defer archive.Close() | ||
|
||
for _, f := range archive.File { | ||
destFilePath := filepath.Join(dest, f.Name) | ||
|
||
if !strings.HasPrefix(destFilePath, filepath.Clean(dest)+string(os.PathSeparator)) { | ||
return fmt.Errorf("invalid file path: %s", destFilePath) | ||
} | ||
if f.FileInfo().IsDir() { | ||
log.V(2).Info("Creating directory", "dir", f.FileInfo().Name()) | ||
if mkdirErr := os.MkdirAll(destFilePath, os.ModePerm); mkdirErr != nil { | ||
return fmt.Errorf("cannot create directory: %w", mkdirErr) | ||
} | ||
continue | ||
} | ||
log.V(2).Info("Extracting file", "source", f.Name, "dest", destFilePath) | ||
|
||
err := unzipFile(f, destFilePath) | ||
if err != nil { | ||
return err | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
func unzipFile(f *zip.File, destFilePath string) error { | ||
// ensure directory exists where file should be written. | ||
if mkdirErr := os.MkdirAll(filepath.Dir(destFilePath), os.ModePerm); mkdirErr != nil { | ||
return fmt.Errorf("cannot create directory: %w", mkdirErr) | ||
} | ||
|
||
dstFile, dstFileErr := os.OpenFile(destFilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode()) | ||
if dstFileErr != nil { | ||
return fmt.Errorf("cannot open destination file: %w", dstFileErr) | ||
} | ||
defer dstFile.Close() | ||
|
||
fileInArchive, srcFileErr := f.Open() | ||
if srcFileErr != nil { | ||
return fmt.Errorf("cannot open source file: %w", srcFileErr) | ||
} | ||
fileInArchive.Close() | ||
|
||
if _, copyErr := io.Copy(dstFile, fileInArchive); copyErr != nil { | ||
return fmt.Errorf("cannot copy %q to %q: %w", f.Name, dstFile.Name(), copyErr) | ||
} | ||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
package archive | ||
|
||
import ( | ||
"context" | ||
"os" | ||
"path/filepath" | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
func TestUnzip(t *testing.T) { | ||
testFilePath := "testdata/unzip.zip" | ||
testDir := "testdata/run" | ||
|
||
// cleanup previous test files in case of failure | ||
require.NoError(t, os.RemoveAll(testDir)) | ||
|
||
err := Unzip(context.TODO(), testFilePath, testDir) | ||
assert.NoError(t, err, "unzip failed with error") | ||
|
||
assert.FileExists(t, filepath.Join(testDir, "toplevel.file")) | ||
assert.FileExists(t, filepath.Join(testDir, "Dir In Archive", "Sub Dir.file")) | ||
|
||
// cleanup | ||
require.NoError(t, os.RemoveAll(testDir)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
package paperless | ||
|
||
type Document struct { | ||
// ID of the document, read-only. | ||
ID int `json:"id"` | ||
} | ||
|
||
func MapToDocumentIDs(docs []Document) []int { | ||
ids := make([]int, len(docs)) | ||
for i := 0; i < len(docs); i++ { | ||
ids[i] = docs[i].ID | ||
} | ||
return ids | ||
} |
Oops, something went wrong.