Skip to content

Commit

Permalink
Add lockfile at snapshot activation to avoid race condition
Browse files Browse the repository at this point in the history
This creates an ignite lock file at /tmp/ignite-snapshot.lock
when an overlay snapshot is created. The locking is handled via
pid file using github.com/nightlyone/lockfile package. This
helps avoid the race condition when multiple ignite processes try
to create loop device and use the device mapper for overlay
snapshot at the same time. When a process obtains a lock, other
processes retry to obtain a lock, until a lock is obtained. Once
the snapshot is activated, the lock is released.
  • Loading branch information
darkowlzz authored and stealthybox committed Feb 24, 2020
1 parent edfdea7 commit 9a99293
Showing 1 changed file with 57 additions and 0 deletions.
57 changes: 57 additions & 0 deletions pkg/dmlegacy/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ import (
"fmt"
"os"
"path"
"path/filepath"

"github.com/nightlyone/lockfile"

api "github.com/weaveworks/ignite/pkg/apis/ignite"
"github.com/weaveworks/ignite/pkg/constants"
Expand All @@ -12,6 +15,8 @@ import (
"github.com/weaveworks/ignite/pkg/util"
)

const snapshotLockFileName = "ignite-snapshot.lock"

// ActivateSnapshot sets up the snapshot with devicemapper so that it is active and can be used
func ActivateSnapshot(vm *api.VM) error {
device := util.NewPrefixer().Prefix(vm.GetUID())
Expand All @@ -28,6 +33,28 @@ func ActivateSnapshot(vm *api.VM) error {
return err
}

// NOTE: Multiple ignite processes trying to create loop devices at the
// same time results in race condition. When multiple processes request for
// a free loop device at the same time, they may get the same device ID and
// try to create the same device multiple times.
// Serialize this operation by creating a global lock file when creating a
// loop device and release the lock after setting up device mapper using the
// loop device.

// Global lock path.
glpath := filepath.Join(os.TempDir(), snapshotLockFileName)

// Create a lockfile and obtain a lock.
lock, err := lockfile.New(glpath)
if err != nil {
return fmt.Errorf("failed to create lock: %v", err)
}
if err := obtainLock(lock); err != nil {
return err
}
// Release the lock at the end.
defer lock.Unlock()

// Setup loop device for the image
imageLoop, err := newLoopDev(path.Join(constants.IMAGE_DIR, imageUID.String(), constants.IMAGE_FS), true)
if err != nil {
Expand Down Expand Up @@ -101,3 +128,33 @@ func ActivateSnapshot(vm *api.VM) error {

return overlayLoop.Detach()
}

// obtainLock tries to obtain a lock and retries if the lock is owned by
// another process, until a lock is obtained.
func obtainLock(lock lockfile.Lockfile) error {
// Check if the lock has any owner.
process, err := lock.GetOwner()
if err == nil {
// A lock already exists. Check if the lock owner is the current process
// itself.
if process.Pid == os.Getpid() {
return fmt.Errorf("lockfile %q already locked by this process", lock)
}

// A lock already exists, but it's owned by some other process. Continue
// to obtain lock, in case the lock owner no longer exists.
}

// Obtain a lock. Retry if the lock can't be obtained.
err = lock.TryLock()
for err != nil {
// Check if it's a lock temporary error that can be mitigated with a
// retry. Fail if any other error.
if _, ok := err.(interface{ Temporary() bool }); !ok {
return fmt.Errorf("unable to lock %q: %v", lock, err)
}
err = lock.TryLock()
}

return nil
}

0 comments on commit 9a99293

Please sign in to comment.