Skip to content

Commit

Permalink
heal: Include more use case of not healable but readable objects (min…
Browse files Browse the repository at this point in the history
…io#248) (minio#20776)

If one object has many parts where all parts are readable but some parts
are missing from some drives, this object can be sometimes un-healable,
which is wrong.

This commit will avoid reading from drives that have missing, corrupted or
outdated xl.meta. It will also check if any part is unreadable to avoid
healing in that case.
  • Loading branch information
vadmeste authored Dec 18, 2024
1 parent 01e520e commit 16f8cf1
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 120 deletions.
46 changes: 16 additions & 30 deletions cmd/erasure-healing-common.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,23 +277,21 @@ func partNeedsHealing(partErrs []int) bool {
return slices.IndexFunc(partErrs, func(i int) bool { return i != checkPartSuccess && i != checkPartUnknown }) > -1
}

func hasPartErr(partErrs []int) bool {
return slices.IndexFunc(partErrs, func(i int) bool { return i != checkPartSuccess }) > -1
func countPartNotSuccess(partErrs []int) (c int) {
for _, pe := range partErrs {
if pe != checkPartSuccess {
c++
}
}
return
}

// disksWithAllParts - This function needs to be called with
// []StorageAPI returned by listOnlineDisks. Returns,
//
// - disks which have all parts specified in the latest xl.meta.
//
// - slice of errors about the state of data files on disk - can have
// a not-found error or a hash-mismatch error.
func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetadata []FileInfo,
// checkObjectWithAllParts sets partsMetadata and onlineDisks when xl.meta is inexistant/corrupted or outdated
// it also checks if the status of each part (corrupted, missing, ok) in each drive
func checkObjectWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetadata []FileInfo,
errs []error, latestMeta FileInfo, filterByETag bool, bucket, object string,
scanMode madmin.HealScanMode,
) (availableDisks []StorageAPI, dataErrsByDisk map[int][]int, dataErrsByPart map[int][]int) {
availableDisks = make([]StorageAPI, len(onlineDisks))

) (dataErrsByDisk map[int][]int, dataErrsByPart map[int][]int) {
dataErrsByDisk = make(map[int][]int, len(onlineDisks))
for i := range onlineDisks {
dataErrsByDisk[i] = make([]int, len(latestMeta.Parts))
Expand Down Expand Up @@ -334,12 +332,12 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad

metaErrs := make([]error, len(errs))

for i, onlineDisk := range onlineDisks {
for i := range onlineDisks {
if errs[i] != nil {
metaErrs[i] = errs[i]
continue
}
if onlineDisk == OfflineDisk {
if onlineDisks[i] == OfflineDisk {
metaErrs[i] = errDiskNotFound
continue
}
Expand All @@ -355,13 +353,15 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
if corrupted {
metaErrs[i] = errFileCorrupt
partsMetadata[i] = FileInfo{}
onlineDisks[i] = nil
continue
}

if erasureDistributionReliable {
if !meta.IsValid() {
partsMetadata[i] = FileInfo{}
metaErrs[i] = errFileCorrupt
onlineDisks[i] = nil
continue
}

Expand All @@ -372,6 +372,7 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
// might have the right erasure distribution.
partsMetadata[i] = FileInfo{}
metaErrs[i] = errFileCorrupt
onlineDisks[i] = nil
continue
}
}
Expand Down Expand Up @@ -440,20 +441,5 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
dataErrsByDisk[disk][part] = dataErrsByPart[part][disk]
}
}

for i, onlineDisk := range onlineDisks {
if metaErrs[i] == nil {
meta := partsMetadata[i]
if meta.Deleted || meta.IsRemote() || !hasPartErr(dataErrsByDisk[i]) {
// All parts verified, mark it as all data available.
availableDisks[i] = onlineDisk
continue
}
}

// upon errors just make that disk's fileinfo invalid
partsMetadata[i] = FileInfo{}
}

return
}
49 changes: 16 additions & 33 deletions cmd/erasure-healing-common_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -313,11 +313,11 @@ func TestListOnlineDisks(t *testing.T) {
t.Fatalf("Expected modTime to be equal to %v but was found to be %v",
test.expectedTime, modTime)
}
availableDisks, _, _ := disksWithAllParts(ctx, onlineDisks, partsMetadata,
_, _ = checkObjectWithAllParts(ctx, onlineDisks, partsMetadata,
test.errs, fi, false, bucket, object, madmin.HealDeepScan)

if test._tamperBackend != noTamper {
if tamperedIndex != -1 && availableDisks[tamperedIndex] != nil {
if tamperedIndex != -1 && onlineDisks[tamperedIndex] != nil {
t.Fatalf("Drive (%v) with part.1 missing is not a drive with available data",
erasureDisks[tamperedIndex])
}
Expand Down Expand Up @@ -495,11 +495,11 @@ func TestListOnlineDisksSmallObjects(t *testing.T) {
test.expectedTime, modTime)
}

availableDisks, _, _ := disksWithAllParts(ctx, onlineDisks, partsMetadata,
_, _ = checkObjectWithAllParts(ctx, onlineDisks, partsMetadata,
test.errs, fi, false, bucket, object, madmin.HealDeepScan)

if test._tamperBackend != noTamper {
if tamperedIndex != -1 && availableDisks[tamperedIndex] != nil {
if tamperedIndex != -1 && onlineDisks[tamperedIndex] != nil {
t.Fatalf("Drive (%v) with part.1 missing is not a drive with available data",
erasureDisks[tamperedIndex])
}
Expand Down Expand Up @@ -545,6 +545,7 @@ func TestDisksWithAllParts(t *testing.T) {

// Test 1: Test that all disks are returned without any failures with
// unmodified meta data
erasureDisks = s.getDisks()
partsMetadata, errs := readAllFileInfo(ctx, erasureDisks, "", bucket, object, "", false, true)
if err != nil {
t.Fatalf("Failed to read xl meta data %v", err)
Expand All @@ -557,14 +558,10 @@ func TestDisksWithAllParts(t *testing.T) {

erasureDisks, _, _ = listOnlineDisks(erasureDisks, partsMetadata, errs, readQuorum)

filteredDisks, _, dataErrsPerDisk := disksWithAllParts(ctx, erasureDisks, partsMetadata,
dataErrsPerDisk, _ := checkObjectWithAllParts(ctx, erasureDisks, partsMetadata,
errs, fi, false, bucket, object, madmin.HealDeepScan)

if len(filteredDisks) != len(erasureDisks) {
t.Errorf("Unexpected number of drives: %d", len(filteredDisks))
}

for diskIndex, disk := range filteredDisks {
for diskIndex, disk := range erasureDisks {
if partNeedsHealing(dataErrsPerDisk[diskIndex]) {
t.Errorf("Unexpected error: %v", dataErrsPerDisk[diskIndex])
}
Expand All @@ -575,17 +572,15 @@ func TestDisksWithAllParts(t *testing.T) {
}

// Test 2: Not synchronized modtime
erasureDisks = s.getDisks()
partsMetadataBackup := partsMetadata[0]
partsMetadata[0].ModTime = partsMetadata[0].ModTime.Add(-1 * time.Hour)

errs = make([]error, len(erasureDisks))
filteredDisks, _, _ = disksWithAllParts(ctx, erasureDisks, partsMetadata,
_, _ = checkObjectWithAllParts(ctx, erasureDisks, partsMetadata,
errs, fi, false, bucket, object, madmin.HealDeepScan)

if len(filteredDisks) != len(erasureDisks) {
t.Errorf("Unexpected number of drives: %d", len(filteredDisks))
}
for diskIndex, disk := range filteredDisks {
for diskIndex, disk := range erasureDisks {
if diskIndex == 0 && disk != nil {
t.Errorf("Drive not filtered as expected, drive: %d", diskIndex)
}
Expand All @@ -596,17 +591,15 @@ func TestDisksWithAllParts(t *testing.T) {
partsMetadata[0] = partsMetadataBackup // Revert before going to the next test

// Test 3: Not synchronized DataDir
erasureDisks = s.getDisks()
partsMetadataBackup = partsMetadata[1]
partsMetadata[1].DataDir = "foo-random"

errs = make([]error, len(erasureDisks))
filteredDisks, _, _ = disksWithAllParts(ctx, erasureDisks, partsMetadata,
_, _ = checkObjectWithAllParts(ctx, erasureDisks, partsMetadata,
errs, fi, false, bucket, object, madmin.HealDeepScan)

if len(filteredDisks) != len(erasureDisks) {
t.Errorf("Unexpected number of drives: %d", len(filteredDisks))
}
for diskIndex, disk := range filteredDisks {
for diskIndex, disk := range erasureDisks {
if diskIndex == 1 && disk != nil {
t.Errorf("Drive not filtered as expected, drive: %d", diskIndex)
}
Expand All @@ -617,6 +610,7 @@ func TestDisksWithAllParts(t *testing.T) {
partsMetadata[1] = partsMetadataBackup // Revert before going to the next test

// Test 4: key = disk index, value = part name with hash mismatch
erasureDisks = s.getDisks()
diskFailures := make(map[int]string)
diskFailures[0] = "part.1"
diskFailures[3] = "part.1"
Expand All @@ -637,29 +631,18 @@ func TestDisksWithAllParts(t *testing.T) {
}

errs = make([]error, len(erasureDisks))
filteredDisks, dataErrsPerDisk, _ = disksWithAllParts(ctx, erasureDisks, partsMetadata,
dataErrsPerDisk, _ = checkObjectWithAllParts(ctx, erasureDisks, partsMetadata,
errs, fi, false, bucket, object, madmin.HealDeepScan)

if len(filteredDisks) != len(erasureDisks) {
t.Errorf("Unexpected number of drives: %d", len(filteredDisks))
}

for diskIndex, disk := range filteredDisks {
for diskIndex := range erasureDisks {
if _, ok := diskFailures[diskIndex]; ok {
if disk != nil {
t.Errorf("Drive not filtered as expected, drive: %d", diskIndex)
}
if !partNeedsHealing(dataErrsPerDisk[diskIndex]) {
t.Errorf("Disk expected to be healed, driveIndex: %d", diskIndex)
}
} else {
if disk == nil {
t.Errorf("Drive erroneously filtered, driveIndex: %d", diskIndex)
}
if partNeedsHealing(dataErrsPerDisk[diskIndex]) {
t.Errorf("Disk not expected to be healed, driveIndex: %d", diskIndex)
}

}
}
}
Expand Down
73 changes: 35 additions & 38 deletions cmd/erasure-healing.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,33 +161,33 @@ var (

// Only heal on disks where we are sure that healing is needed. We can expand
// this list as and when we figure out more errors can be added to this list safely.
func shouldHealObjectOnDisk(erErr error, partsErrs []int, meta FileInfo, latestMeta FileInfo) (bool, error) {
func shouldHealObjectOnDisk(erErr error, partsErrs []int, meta FileInfo, latestMeta FileInfo) (bool, bool, error) {
if errors.Is(erErr, errFileNotFound) || errors.Is(erErr, errFileVersionNotFound) || errors.Is(erErr, errFileCorrupt) {
return true, erErr
return true, true, erErr
}
if erErr == nil {
if meta.XLV1 {
// Legacy means heal always
// always check first.
return true, errLegacyXLMeta
return true, true, errLegacyXLMeta
}
if !latestMeta.Equals(meta) {
return true, errOutdatedXLMeta
return true, true, errOutdatedXLMeta
}
if !meta.Deleted && !meta.IsRemote() {
// If xl.meta was read fine but there may be problem with the part.N files.
for _, partErr := range partsErrs {
if partErr == checkPartFileNotFound {
return true, errPartMissing
return true, false, errPartMissing
}
if partErr == checkPartFileCorrupt {
return true, errPartCorrupt
return true, false, errPartCorrupt
}
}
}
return false, nil
return false, false, nil
}
return false, erErr
return false, false, erErr
}

const (
Expand Down Expand Up @@ -363,16 +363,7 @@ func (er *erasureObjects) healObject(ctx context.Context, bucket string, object
// No modtime quorum
filterDisksByETag := quorumETag != ""

// List of disks having all parts as per latest metadata.
// NOTE: do not pass in latestDisks to diskWithAllParts since
// the diskWithAllParts needs to reach the drive to ensure
// validity of the metadata content, we should make sure that
// we pass in disks as is for it to be verified. Once verified
// the disksWithAllParts() returns the actual disks that can be
// used here for reconstruction. This is done to ensure that
// we do not skip drives that have inconsistent metadata to be
// skipped from purging when they are stale.
availableDisks, dataErrsByDisk, dataErrsByPart := disksWithAllParts(ctx, onlineDisks, partsMetadata,
dataErrsByDisk, dataErrsByPart := checkObjectWithAllParts(ctx, onlineDisks, partsMetadata,
errs, latestMeta, filterDisksByETag, bucket, object, scanMode)

var erasure Erasure
Expand All @@ -394,12 +385,15 @@ func (er *erasureObjects) healObject(ctx context.Context, bucket string, object
// data state and a list of outdated disks on which data needs
// to be healed.
outDatedDisks := make([]StorageAPI, len(storageDisks))
disksToHealCount := 0
for i := range availableDisks {
yes, reason := shouldHealObjectOnDisk(errs[i], dataErrsByDisk[i], partsMetadata[i], latestMeta)
disksToHealCount, xlMetaToHealCount := 0, 0
for i := range onlineDisks {
yes, isMeta, reason := shouldHealObjectOnDisk(errs[i], dataErrsByDisk[i], partsMetadata[i], latestMeta)
if yes {
outDatedDisks[i] = storageDisks[i]
disksToHealCount++
if isMeta {
xlMetaToHealCount++
}
}

driveState := objectErrToDriveState(reason)
Expand All @@ -416,16 +410,6 @@ func (er *erasureObjects) healObject(ctx context.Context, bucket string, object
})
}

if isAllNotFound(errs) {
// File is fully gone, fileInfo is empty.
err := errFileNotFound
if versionID != "" {
err = errFileVersionNotFound
}
return er.defaultHealResult(FileInfo{}, storageDisks, storageEndpoints, errs,
bucket, object, versionID), err
}

if disksToHealCount == 0 {
// Nothing to heal!
return result, nil
Expand All @@ -437,13 +421,23 @@ func (er *erasureObjects) healObject(ctx context.Context, bucket string, object
return result, nil
}

cannotHeal := !latestMeta.XLV1 && !latestMeta.Deleted && disksToHealCount > latestMeta.Erasure.ParityBlocks
cannotHeal := !latestMeta.XLV1 && !latestMeta.Deleted && xlMetaToHealCount > latestMeta.Erasure.ParityBlocks
if cannotHeal && quorumETag != "" {
// This is an object that is supposed to be removed by the dangling code
// but we noticed that ETag is the same for all objects, let's give it a shot
cannotHeal = false
}

if !latestMeta.Deleted && !latestMeta.IsRemote() {
// check if there is a part that lost its quorum
for _, partErrs := range dataErrsByPart {
if countPartNotSuccess(partErrs) > latestMeta.Erasure.ParityBlocks {
cannotHeal = true
break
}
}
}

if cannotHeal {
// Allow for dangling deletes, on versions that have DataDir missing etc.
// this would end up restoring the correct readable versions.
Expand Down Expand Up @@ -482,16 +476,15 @@ func (er *erasureObjects) healObject(ctx context.Context, bucket string, object
tmpID := mustGetUUID()
migrateDataDir := mustGetUUID()

// Reorder so that we have data disks first and parity disks next.
if !latestMeta.Deleted && len(latestMeta.Erasure.Distribution) != len(availableDisks) {
err := fmt.Errorf("unexpected file distribution (%v) from available disks (%v), looks like backend disks have been manually modified refusing to heal %s/%s(%s)",
latestMeta.Erasure.Distribution, availableDisks, bucket, object, versionID)
healingLogOnceIf(ctx, err, "heal-object-available-disks")
if !latestMeta.Deleted && len(latestMeta.Erasure.Distribution) != len(onlineDisks) {
err := fmt.Errorf("unexpected file distribution (%v) from online disks (%v), looks like backend disks have been manually modified refusing to heal %s/%s(%s)",
latestMeta.Erasure.Distribution, onlineDisks, bucket, object, versionID)
healingLogOnceIf(ctx, err, "heal-object-online-disks")
return er.defaultHealResult(latestMeta, storageDisks, storageEndpoints, errs,
bucket, object, versionID), err
}

latestDisks := shuffleDisks(availableDisks, latestMeta.Erasure.Distribution)
latestDisks := shuffleDisks(onlineDisks, latestMeta.Erasure.Distribution)

if !latestMeta.Deleted && len(latestMeta.Erasure.Distribution) != len(outDatedDisks) {
err := fmt.Errorf("unexpected file distribution (%v) from outdated disks (%v), looks like backend disks have been manually modified refusing to heal %s/%s(%s)",
Expand Down Expand Up @@ -562,6 +555,10 @@ func (er *erasureObjects) healObject(ctx context.Context, bucket string, object
if disk == OfflineDisk {
continue
}
thisPartErrs := shuffleCheckParts(dataErrsByPart[partIndex], latestMeta.Erasure.Distribution)
if thisPartErrs[i] != checkPartSuccess {
continue
}
checksumInfo := copyPartsMetadata[i].Erasure.GetChecksumInfo(partNumber)
partPath := pathJoin(object, srcDataDir, fmt.Sprintf("part.%d", partNumber))
readers[i] = newBitrotReader(disk, copyPartsMetadata[i].Data, bucket, partPath, tillOffset, checksumAlgo,
Expand Down
Loading

0 comments on commit 16f8cf1

Please sign in to comment.