Skip to content

Commit

Permalink
feat: introduce list_quorum="auto" to prefer quorum drives (minio#18084)
Browse files Browse the repository at this point in the history
NOTE: This feature is not retro-active; it will not cater to previous transactions
on existing setups. 

To enable this feature, please set ` _MINIO_DRIVE_QUORUM=on` environment
variable as part of systemd service or k8s configmap. 

Once this has been enabled, you need to also set `list_quorum`. 

```
~ mc admin config set alias/ api list_quorum=auto` 
```

A new debugging tool is available to check for any missing counters.
  • Loading branch information
harshavardhana authored Dec 29, 2023
1 parent 5b2ced0 commit a50ea92
Show file tree
Hide file tree
Showing 30 changed files with 1,288 additions and 251 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,4 @@ docs/debugging/inspect/inspect
docs/debugging/pprofgoparser/pprofgoparser
docs/debugging/reorder-disks/reorder-disks
docs/debugging/populate-hard-links/populate-hardlinks
docs/debugging/xattr/xattr
1 change: 0 additions & 1 deletion cmd/background-newdisks-heal-ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,6 @@ func initAutoHeal(ctx context.Context, objAPI ObjectLayer) {

if env.Get("_MINIO_AUTO_DRIVE_HEALING", config.EnableOn) == config.EnableOn || env.Get("_MINIO_AUTO_DISK_HEALING", config.EnableOn) == config.EnableOn {
globalBackgroundHealState.pushHealLocalDisks(getLocalDisksToHeal()...)

go monitorLocalDisksAndHeal(ctx, z)
}
}
Expand Down
9 changes: 4 additions & 5 deletions cmd/erasure-healing.go
Original file line number Diff line number Diff line change
Expand Up @@ -370,10 +370,8 @@ func (fi *FileInfo) SetHealing() {
// Healing returns true if object is being healed (i.e fi is being passed down
// from healObject)
func (fi FileInfo) Healing() bool {
if _, ok := fi.Metadata[xMinIOHealing]; ok {
return true
}
return false
_, ok := fi.Metadata[xMinIOHealing]
return ok
}

// Heals an object by re-writing corrupt/missing erasure blocks.
Expand Down Expand Up @@ -760,7 +758,8 @@ func (er *erasureObjects) healObject(ctx context.Context, bucket string, object

// Attempt a rename now from healed data to final location.
partsMetadata[i].SetHealing()
if _, err = disk.RenameData(ctx, minioMetaTmpBucket, tmpID, partsMetadata[i], bucket, object); err != nil {

if _, err = disk.RenameData(ctx, minioMetaTmpBucket, tmpID, partsMetadata[i], bucket, object, RenameOptions{}); err != nil {
return result, err
}

Expand Down
10 changes: 5 additions & 5 deletions cmd/erasure-object.go
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ func (er erasureObjects) deleteIfDangling(ctx context.Context, bucket, object st
if disks[index] == nil {
return errDiskNotFound
}
return disks[index].DeleteVersion(ctx, bucket, object, fi, false)
return disks[index].DeleteVersion(ctx, bucket, object, fi, false, DeleteOptions{})
}, index)
}

Expand Down Expand Up @@ -1032,7 +1032,7 @@ func renameData(ctx context.Context, disks []StorageAPI, srcBucket, srcEntry str
if !fi.IsValid() {
return errFileCorrupt
}
sign, err := disks[index].RenameData(ctx, srcBucket, srcEntry, fi, dstBucket, dstEntry)
sign, err := disks[index].RenameData(ctx, srcBucket, srcEntry, fi, dstBucket, dstEntry, RenameOptions{})
if err != nil {
return err
}
Expand All @@ -1059,7 +1059,7 @@ func renameData(ctx context.Context, disks []StorageAPI, srcBucket, srcEntry str
// caller this dangling object will be now scheduled to be removed
// via active healing.
dg.Go(func() error {
return disks[index].DeleteVersion(context.Background(), dstBucket, dstEntry, metadata[index], false)
return disks[index].DeleteVersion(context.Background(), dstBucket, dstEntry, metadata[index], false, DeleteOptions{UndoWrite: true})
}, index)
}
dg.Wait()
Expand Down Expand Up @@ -1610,7 +1610,7 @@ func (er erasureObjects) deleteObjectVersion(ctx context.Context, bucket, object
if disks[index] == nil {
return errDiskNotFound
}
return disks[index].DeleteVersion(ctx, bucket, object, fi, forceDelMarker)
return disks[index].DeleteVersion(ctx, bucket, object, fi, forceDelMarker, DeleteOptions{})
}, index)
}
// return errors if any during deletion
Expand Down Expand Up @@ -1723,7 +1723,7 @@ func (er erasureObjects) DeleteObjects(ctx context.Context, bucket string, objec
}
return
}
errs := disk.DeleteVersions(ctx, bucket, dedupVersions)
errs := disk.DeleteVersions(ctx, bucket, dedupVersions, DeleteOptions{})
for i, err := range errs {
if err == nil {
continue
Expand Down
14 changes: 12 additions & 2 deletions cmd/erasure-server-pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -1985,7 +1985,7 @@ func (z *erasureServerPools) Walk(ctx context.Context, bucket, prefix string, re
go func() {
defer wg.Done()

disks, _ := set.getOnlineDisksWithHealing(true)
disks, infos, _ := set.getOnlineDisksWithHealingAndInfo(true)
if len(disks) == 0 {
cancel()
return
Expand All @@ -2002,7 +2002,17 @@ func (z *erasureServerPools) Walk(ctx context.Context, bucket, prefix string, re

askDisks := getListQuorum(opts.AskDisks, set.setDriveCount)
if askDisks == -1 {
askDisks = getListQuorum("strict", set.setDriveCount)
newDisks := getQuorumDisks(disks, infos, (len(disks)+1)/2)
if newDisks != nil {
// If we found disks signature in quorum, we proceed to list
// from a single drive, shuffling of the drives is subsequently.
disks = newDisks
askDisks = 1
} else {
// If we did not find suitable disks, perform strict quorum listing
// as no disk agrees on quorum anymore.
askDisks = getListQuorum("strict", set.setDriveCount)
}
}

// Special case: ask all disks if the drive count is 4
Expand Down
26 changes: 22 additions & 4 deletions cmd/erasure-sets.go
Original file line number Diff line number Diff line change
Expand Up @@ -1110,7 +1110,7 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H
formatOpID := mustGetUUID()

// Initialize a new set of set formats which will be written to disk.
newFormatSets := newHealFormatSets(refFormat, s.setCount, s.setDriveCount, formats, sErrs)
newFormatSets, currentDisksInfo := newHealFormatSets(refFormat, s.setCount, s.setDriveCount, formats, sErrs)

if !dryRun {
tmpNewFormats := make([]*formatErasureV3, s.setCount*s.setDriveCount)
Expand Down Expand Up @@ -1153,9 +1153,27 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H
s.erasureDisks[m][n].Close()
}

if storageDisks[index] != nil {
storageDisks[index].SetDiskLoc(s.poolIndex, m, n)
s.erasureDisks[m][n] = storageDisks[index]
if disk := storageDisks[index]; disk != nil {
disk.SetDiskLoc(s.poolIndex, m, n)

if disk.IsLocal() && driveQuorum {
commonWrites, commonDeletes := calcCommonWritesDeletes(currentDisksInfo[m], (s.setDriveCount+1)/2)
xldisk, ok := disk.(*xlStorageDiskIDCheck)
if ok {
xldisk.totalWrites.Add(commonWrites)
xldisk.totalDeletes.Add(commonDeletes)
xldisk.storage.setWriteAttribute(commonWrites)
xldisk.storage.setDeleteAttribute(commonDeletes)
}
}

s.erasureDisks[m][n] = disk

if disk.IsLocal() && globalIsDistErasure {
globalLocalDrivesMu.Lock()
globalLocalSetDrives[s.poolIndex][m][n] = disk
globalLocalDrivesMu.Unlock()
}
}
}

Expand Down
35 changes: 19 additions & 16 deletions cmd/erasure.go
Original file line number Diff line number Diff line change
Expand Up @@ -266,12 +266,12 @@ func (er erasureObjects) LocalStorageInfo(ctx context.Context, metrics bool) Sto
return getStorageInfo(localDisks, localEndpoints, metrics)
}

// getOnlineDisksWithHealing - returns online disks and overall healing status.
// getOnlineDisksWithHealingAndInfo - returns online disks and overall healing status.
// Disks are randomly ordered, but in the following groups:
// - Non-scanning disks
// - Non-healing disks
// - Healing disks (if inclHealing is true)
func (er erasureObjects) getOnlineDisksWithHealing(inclHealing bool) (newDisks []StorageAPI, healing bool) {
func (er erasureObjects) getOnlineDisksWithHealingAndInfo(inclHealing bool) (newDisks []StorageAPI, newInfos []DiskInfo, healing bool) {
var wg sync.WaitGroup
disks := er.getDisks()
infos := make([]DiskInfo, len(disks))
Expand All @@ -284,32 +284,24 @@ func (er erasureObjects) getOnlineDisksWithHealing(inclHealing bool) (newDisks [

disk := disks[i]
if disk == nil {
infos[i].Error = "offline drive"
infos[i].Error = errDiskNotFound.Error()
return
}

di, err := disk.DiskInfo(context.Background(), false)
infos[i] = di
if err != nil {
// - Do not consume disks which are not reachable
// unformatted or simply not accessible for some reason.
//
//
// - Future: skip busy disks
if err != nil {
infos[i].Error = err.Error()
}
return
}
if !inclHealing && di.Healing {
return
infos[i].Error = err.Error()
}

infos[i] = di
}()
}
wg.Wait()

var scanningDisks, healingDisks []StorageAPI
var scanningInfos, healingInfos []DiskInfo

for i, info := range infos {
// Check if one of the drives in the set is being healed.
// this information is used by scanner to skip healing
Expand All @@ -321,23 +313,34 @@ func (er erasureObjects) getOnlineDisksWithHealing(inclHealing bool) (newDisks [
healing = true
if inclHealing {
healingDisks = append(healingDisks, disks[i])
healingInfos = append(healingInfos, infos[i])
}
continue
}

if !info.Scanning {
newDisks = append(newDisks, disks[i])
newInfos = append(newInfos, infos[i])
} else {
scanningDisks = append(scanningDisks, disks[i])
scanningInfos = append(scanningInfos, infos[i])
}
}

// Prefer non-scanning disks over disks which are currently being scanned.
newDisks = append(newDisks, scanningDisks...)
newInfos = append(newInfos, scanningInfos...)

/// Then add healing disks.
newDisks = append(newDisks, healingDisks...)
newInfos = append(newInfos, healingInfos...)

return newDisks, healing
return newDisks, newInfos, healing
}

func (er erasureObjects) getOnlineDisksWithHealing(inclHealing bool) (newDisks []StorageAPI, healing bool) {
newDisks, _, healing = er.getOnlineDisksWithHealingAndInfo(inclHealing)
return
}

// Clean-up previously deleted objects. from .minio.sys/tmp/.trash/
Expand Down
17 changes: 15 additions & 2 deletions cmd/format-erasure.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ type formatErasureV3 struct {
// to pick the right set index for an object.
DistributionAlgo string `json:"distributionAlgo"`
} `json:"xl"`
Info DiskInfo `json:"-"`
}

func (f *formatErasureV3) Drives() (drives int) {
Expand Down Expand Up @@ -328,6 +329,11 @@ func loadFormatErasureAll(storageDisks []StorageAPI, heal bool) ([]*formatErasur
if err != nil {
return err
}
info, err := storageDisks[index].DiskInfo(context.Background(), false)
if err != nil {
return err
}
format.Info = info
formats[index] = format
if !heal {
// If no healing required, make the disks valid and
Expand Down Expand Up @@ -824,11 +830,15 @@ func makeFormatErasureMetaVolumes(disk StorageAPI) error {
}

// Initialize a new set of set formats which will be written to all disks.
func newHealFormatSets(refFormat *formatErasureV3, setCount, setDriveCount int, formats []*formatErasureV3, errs []error) [][]*formatErasureV3 {
func newHealFormatSets(refFormat *formatErasureV3, setCount, setDriveCount int, formats []*formatErasureV3, errs []error) ([][]*formatErasureV3, [][]DiskInfo) {
newFormats := make([][]*formatErasureV3, setCount)
for i := range refFormat.Erasure.Sets {
newFormats[i] = make([]*formatErasureV3, setDriveCount)
}
currentDisksInfo := make([][]DiskInfo, setCount)
for i := range refFormat.Erasure.Sets {
currentDisksInfo[i] = make([]DiskInfo, setDriveCount)
}
for i := range refFormat.Erasure.Sets {
for j := range refFormat.Erasure.Sets[i] {
if errors.Is(errs[i*setDriveCount+j], errUnformattedDisk) {
Expand All @@ -841,7 +851,10 @@ func newHealFormatSets(refFormat *formatErasureV3, setCount, setDriveCount int,
newFormats[i][j].Erasure.Version = refFormat.Erasure.Version
newFormats[i][j].Erasure.DistributionAlgo = refFormat.Erasure.DistributionAlgo
}
if format := formats[i*setDriveCount+j]; format != nil && (errs[i*setDriveCount+j] == nil) {
currentDisksInfo[i][j] = format.Info
}
}
}
return newFormats
return newFormats, currentDisksInfo
}
2 changes: 1 addition & 1 deletion cmd/format-erasure_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,7 @@ func TestNewFormatSets(t *testing.T) {
// 16th disk is unformatted.
errs[15] = errUnformattedDisk

newFormats := newHealFormatSets(quorumFormat, setCount, setDriveCount, formats, errs)
newFormats, _ := newHealFormatSets(quorumFormat, setCount, setDriveCount, formats, errs)
if newFormats == nil {
t.Fatal("Unexpected failure")
}
Expand Down
Loading

0 comments on commit a50ea92

Please sign in to comment.