Skip to content

Commit

Permalink
feat: support batch replication prefix slice (minio#20033)
Browse files Browse the repository at this point in the history
  • Loading branch information
jiuker authored Aug 1, 2024
1 parent 292fccf commit 50a5ad4
Show file tree
Hide file tree
Showing 10 changed files with 521 additions and 40 deletions.
38 changes: 27 additions & 11 deletions cmd/batch-expire.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"net/http"
"runtime"
"strconv"
"strings"
"time"

"github.com/minio/minio-go/v7/pkg/tags"
Expand Down Expand Up @@ -281,7 +282,7 @@ type BatchJobExpire struct {
line, col int
APIVersion string `yaml:"apiVersion" json:"apiVersion"`
Bucket string `yaml:"bucket" json:"bucket"`
Prefix string `yaml:"prefix" json:"prefix"`
Prefix BatchJobPrefix `yaml:"prefix" json:"prefix"`
NotificationCfg BatchJobNotification `yaml:"notify" json:"notify"`
Retry BatchJobRetry `yaml:"retry" json:"retry"`
Rules []BatchJobExpireFilter `yaml:"rules" json:"rules"`
Expand Down Expand Up @@ -535,18 +536,29 @@ func (r *BatchJobExpire) Start(ctx context.Context, api ObjectLayer, job BatchJo
return err
}

ctx, cancel := context.WithCancel(ctx)
defer cancel()
ctx, cancelCause := context.WithCancelCause(ctx)
defer cancelCause(nil)

results := make(chan itemOrErr[ObjectInfo], workerSize)
if err := api.Walk(ctx, r.Bucket, r.Prefix, results, WalkOptions{
Marker: lastObject,
LatestOnly: false, // we need to visit all versions of the object to implement purge: retainVersions
VersionsSort: WalkVersionsSortDesc,
}); err != nil {
// Do not need to retry if we can't list objects on source.
return err
}
go func() {
for _, prefix := range r.Prefix.F() {
prefixResultCh := make(chan itemOrErr[ObjectInfo], workerSize)
err := api.Walk(ctx, r.Bucket, strings.TrimSpace(prefix), prefixResultCh, WalkOptions{
Marker: lastObject,
LatestOnly: false, // we need to visit all versions of the object to implement purge: retainVersions
VersionsSort: WalkVersionsSortDesc,
})
if err != nil {
cancelCause(err)
xioutil.SafeClose(results)
return
}
for result := range prefixResultCh {
results <- result
}
}
xioutil.SafeClose(results)
}()

// Goroutine to periodically save batch-expire job's in-memory state
saverQuitCh := make(chan struct{})
Expand Down Expand Up @@ -657,6 +669,10 @@ func (r *BatchJobExpire) Start(ctx context.Context, api ObjectLayer, job BatchJo
ObjectInfo: result.Item,
})
}
if context.Cause(ctx) != nil {
xioutil.SafeClose(expireCh)
return context.Cause(ctx)
}
// Send any remaining objects downstream
if len(toDel) > 0 {
select {
Expand Down
14 changes: 9 additions & 5 deletions cmd/batch-expire_gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

54 changes: 54 additions & 0 deletions cmd/batch-expire_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package cmd

import (
"slices"
"testing"

"gopkg.in/yaml.v3"
Expand Down Expand Up @@ -68,4 +69,57 @@ expire: # Expire objects that match a condition
if err != nil {
t.Fatal("Failed to parse batch-job-expire yaml", err)
}
if !slices.Equal(job.Expire.Prefix.F(), []string{"myprefix"}) {
t.Fatal("Failed to parse batch-job-expire yaml")
}

multiPrefixExpireYaml := `
expire: # Expire objects that match a condition
apiVersion: v1
bucket: mybucket # Bucket where this batch job will expire matching objects from
prefix: # (Optional) Prefix under which this job will expire objects matching the rules below.
- myprefix
- myprefix1
rules:
- type: object # regular objects with zero or more older versions
name: NAME # match object names that satisfy the wildcard expression.
olderThan: 7d10h # match objects older than this value
createdBefore: "2006-01-02T15:04:05.00Z" # match objects created before "date"
tags:
- key: name
value: pick* # match objects with tag 'name', all values starting with 'pick'
metadata:
- key: content-type
value: image/* # match objects with 'content-type', all values starting with 'image/'
size:
lessThan: "10MiB" # match objects with size less than this value (e.g. 10MiB)
greaterThan: 1MiB # match objects with size greater than this value (e.g. 1MiB)
purge:
# retainVersions: 0 # (default) delete all versions of the object. This option is the fastest.
# retainVersions: 5 # keep the latest 5 versions of the object.
- type: deleted # objects with delete marker as their latest version
name: NAME # match object names that satisfy the wildcard expression.
olderThan: 10h # match objects older than this value (e.g. 7d10h31s)
createdBefore: "2006-01-02T15:04:05.00Z" # match objects created before "date"
purge:
# retainVersions: 0 # (default) delete all versions of the object. This option is the fastest.
# retainVersions: 5 # keep the latest 5 versions of the object including delete markers.
notify:
endpoint: https://notify.endpoint # notification endpoint to receive job completion status
token: Bearer xxxxx # optional authentication token for the notification endpoint
retry:
attempts: 10 # number of retries for the job before giving up
delay: 500ms # least amount of delay between each retry
`
var multiPrefixJob BatchJobRequest
err = yaml.Unmarshal([]byte(multiPrefixExpireYaml), &multiPrefixJob)
if err != nil {
t.Fatal("Failed to parse batch-job-expire yaml", err)
}
if !slices.Equal(multiPrefixJob.Expire.Prefix.F(), []string{"myprefix", "myprefix1"}) {
t.Fatal("Failed to parse batch-job-expire yaml")
}
}
86 changes: 68 additions & 18 deletions cmd/batch-handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -385,12 +385,23 @@ func (r *BatchJobReplicateV1) StartFromSource(ctx context.Context, api ObjectLay
s3Type := r.Target.Type == BatchJobReplicateResourceS3 || r.Source.Type == BatchJobReplicateResourceS3
minioSrc := r.Source.Type == BatchJobReplicateResourceMinIO
ctx, cancel := context.WithCancel(ctx)
objInfoCh := c.ListObjects(ctx, r.Source.Bucket, miniogo.ListObjectsOptions{
Prefix: r.Source.Prefix,
WithVersions: minioSrc,
Recursive: true,
WithMetadata: true,
})

objInfoCh := make(chan miniogo.ObjectInfo, 1)
go func() {
for _, prefix := range r.Source.Prefix.F() {
prefixObjInfoCh := c.ListObjects(ctx, r.Source.Bucket, miniogo.ListObjectsOptions{
Prefix: strings.TrimSpace(prefix),
WithVersions: minioSrc,
Recursive: true,
WithMetadata: true,
})
for obj := range prefixObjInfoCh {
objInfoCh <- obj
}
}
xioutil.SafeClose(objInfoCh)
}()

prevObj := ""
skipReplicate := false

Expand Down Expand Up @@ -1188,19 +1199,28 @@ func (r *BatchJobReplicateV1) Start(ctx context.Context, api ObjectLayer, job Ba
if walkQuorum == "" {
walkQuorum = "strict"
}
ctx, cancel := context.WithCancel(ctx)
ctx, cancelCause := context.WithCancelCause(ctx)
// one of source/target is s3, skip delete marker and all versions under the same object name.
s3Type := r.Target.Type == BatchJobReplicateResourceS3 || r.Source.Type == BatchJobReplicateResourceS3

if err := api.Walk(ctx, r.Source.Bucket, r.Source.Prefix, walkCh, WalkOptions{
Marker: lastObject,
Filter: selectObj,
AskDisks: walkQuorum,
}); err != nil {
cancel()
// Do not need to retry if we can't list objects on source.
return err
}
go func() {
for _, prefix := range r.Source.Prefix.F() {
prefixWalkCh := make(chan itemOrErr[ObjectInfo], 100)
if err := api.Walk(ctx, r.Source.Bucket, strings.TrimSpace(prefix), prefixWalkCh, WalkOptions{
Marker: lastObject,
Filter: selectObj,
AskDisks: walkQuorum,
}); err != nil {
cancelCause(err)
xioutil.SafeClose(walkCh)
return
}
for obj := range prefixWalkCh {
walkCh <- obj
}
}
xioutil.SafeClose(walkCh)
}()

prevObj := ""

Expand Down Expand Up @@ -1251,7 +1271,10 @@ func (r *BatchJobReplicateV1) Start(ctx context.Context, api ObjectLayer, job Ba
}()
}
wk.Wait()

// Do not need to retry if we can't list objects on source.
if context.Cause(ctx) != nil {
return context.Cause(ctx)
}
ri.RetryAttempts = attempts
ri.Complete = ri.ObjectsFailed == 0
ri.Failed = ri.ObjectsFailed > 0
Expand All @@ -1264,7 +1287,7 @@ func (r *BatchJobReplicateV1) Start(ctx context.Context, api ObjectLayer, job Ba
batchLogOnceIf(ctx, fmt.Errorf("unable to notify %v", err), job.ID+"notify")
}

cancel()
cancelCause(nil)
if ri.Failed {
ri.ObjectsFailed = 0
ri.Bucket = ""
Expand Down Expand Up @@ -2232,3 +2255,30 @@ func lookupStyle(s string) miniogo.BucketLookupType {
}
return lookup
}

// BatchJobPrefix - to support prefix field yaml unmarshalling with string or slice of strings
type BatchJobPrefix []string

var _ yaml.Unmarshaler = &BatchJobPrefix{}

// UnmarshalYAML - to support prefix field yaml unmarshalling with string or slice of strings
func (b *BatchJobPrefix) UnmarshalYAML(value *yaml.Node) error {
// try slice first
tmpSlice := []string{}
if err := value.Decode(&tmpSlice); err == nil {
*b = tmpSlice
return nil
}
// try string
tmpStr := ""
if err := value.Decode(&tmpStr); err == nil {
*b = []string{tmpStr}
return nil
}
return fmt.Errorf("unable to decode %s", value.Value)
}

// F - return prefix(es) as slice
func (b *BatchJobPrefix) F() []string {
return *b
}
83 changes: 83 additions & 0 deletions cmd/batch-handlers_gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 50a5ad4

Please sign in to comment.