Skip to content

Commit

Permalink
Merge pull request mendersoftware#823 from kacf/MEN-4830
Browse files Browse the repository at this point in the history
Fix bug which caused rollback after commit if status report fails.
  • Loading branch information
Kristian Amlie authored Jul 7, 2021
2 parents ea2e511 + 608dc31 commit 77b3003
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 121 deletions.
82 changes: 30 additions & 52 deletions app/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,24 @@ func (i *initState) getNextState(ctx *StateContext, sd *datastore.StateData,
case datastore.MenderStateUpdateCleanup:
return NewUpdateCleanupState(&sd.UpdateInfo, client.StatusFailure), false

// Status reports should be retried. It is possible that the original
// status report had a different status than Failure, but worst case
// this is simply a wrong report, the device will be fine, and the logs
// will reveal what happened.
case datastore.MenderStateUpdateStatusReport,
datastore.MenderStateStatusReportRetry:

return NewUpdateStatusReportState(&sd.UpdateInfo, client.StatusFailure), false

// Historical state. This state is not used anymore in current
// clients. In the past it was used at the very end of the update
// process if there were errors during reporting. But the handling was
// wrong and hence this state was removed. Should we encounter it (which
// is unlikely, but possible), we should be at the very end of an
// update, and should just go back to Idle.
case datastore.MenderStateReportStatusError:
return States.Idle, false

// All other states go to either error or rollback state, depending on
// what's supported.
default:
Expand Down Expand Up @@ -801,7 +819,7 @@ func (u *updateStoreState) Handle(ctx *StateContext, c Controller) (State, bool)
installer, err := c.ReadArtifactHeaders(u.imagein)
if err != nil {
log.Errorf("Fetching Artifact headers failed: %s", err)
return NewUpdateCleanupState(&u.update, client.StatusFailure), false
return NewUpdateStatusReportState(&u.update, client.StatusFailure), false
}

installers := c.GetInstallers()
Expand Down Expand Up @@ -1410,8 +1428,9 @@ func (usr *updateStatusReportState) Handle(ctx *StateContext, c Controller) (Sta

log.Errorf("Failed to send status to server: %v", err)
if err.IsFatal() {
// there is no point in retrying
return NewReportErrorState(usr.Update(), usr.status), false
// There is no point in retrying, and there is nothing
// we can do.
return States.Idle, false
}
return NewUpdateStatusReportRetryState(usr, usr.Update(), usr.status,
usr.triesSendingReport), false
Expand All @@ -1424,8 +1443,9 @@ func (usr *updateStatusReportState) Handle(ctx *StateContext, c Controller) (Sta

log.Errorf("Failed to send deployment logs to server: %v", err)
if err.IsFatal() {
// there is no point in retrying
return NewReportErrorState(usr.Update(), usr.status), false
// There is no point in retrying, and there is nothing
// we can do.
return States.Idle, false
}
return NewUpdateStatusReportRetryState(usr, usr.Update(), usr.status,
usr.triesSendingLogs), false
Expand All @@ -1452,10 +1472,10 @@ func NewUpdateStatusReportRetryState(reportState State,
update *datastore.UpdateInfo, status string, tries int) State {
return &updateStatusReportRetryState{
baseState: baseState{
id: datastore.MenderStatusReportRetryState,
id: datastore.MenderStateStatusReportRetry,
t: ToNone,
},
WaitState: NewWaitState(datastore.MenderStatusReportRetryState, ToNone),
WaitState: NewWaitState(datastore.MenderStateStatusReportRetry, ToNone),
reportState: reportState,
update: *update,
status: status,
Expand Down Expand Up @@ -1504,7 +1524,9 @@ func (usr *updateStatusReportRetryState) Handle(ctx *StateContext, c Controller)
if usr.triesSending < maxTrySending {
return usr.Wait(usr.reportState, usr, c.GetRetryPollInterval(), ctx.WakeupChan)
}
return NewReportErrorState(&usr.update, usr.status), false
// If we have exhausted every attempt, there is nothing more we can
// do. The update is over.
return States.Idle, false
}

func (usr *updateStatusReportRetryState) Update() *datastore.UpdateInfo {
Expand All @@ -1517,50 +1539,6 @@ func (usr *updateStatusReportRetryState) PermitLooping() bool {
return true
}

type reportErrorState struct {
*updateState
updateStatus string
}

func NewReportErrorState(update *datastore.UpdateInfo, status string) State {
return &reportErrorState{
updateState: NewUpdateState(datastore.MenderStateReportStatusError,
ToArtifactFailure, update),
updateStatus: status,
}
}

func (res *reportErrorState) Handle(ctx *StateContext, c Controller) (State, bool) {
// start deployment logging; no error checking
// we can do nothing here; either we will have the logs or not...
DeploymentLogger.Enable(res.Update().ID)

log.Errorf("Handling report error state with status: %v", res.updateStatus)

switch res.updateStatus {
case client.StatusSuccess:
// error while reporting success; rollback
return NewUpdateRollbackState(res.Update()), false
case client.StatusFailure:
// error while reporting failure;
// start from scratch as previous update was broken
log.Errorf("Error while performing update: %v (%v)", res.updateStatus, *res.Update())
return States.Idle, false
case client.StatusAlreadyInstalled:
// we've failed to report already-installed status, not a big
// deal, start from scratch
return States.Idle, false
default:
// should not end up here
return States.Final, false
}
}

func (res *reportErrorState) HandleError(ctx *StateContext, c Controller, merr menderError) (State, bool) {
log.Errorf("Reached final error state: %s", merr.Error())
return States.Idle, false
}

type updateRebootState struct {
*updateState
}
Expand Down
72 changes: 6 additions & 66 deletions app/state_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -359,11 +359,11 @@ func TestStateUpdateReportStatus(t *testing.T) {
}
assert.WithinDuration(t, now, time.Now(), time.Duration(int64(shouldTry)*int64(retry))+time.Millisecond*10)

// next attempt should return an error
// next attempt should return an error, and therefore go back to idle.
s, _ = s.Handle(&ctx, sc)
assert.IsType(t, &updateStatusReportRetryState{}, s)
s, c = s.Handle(&ctx, sc)
assert.IsType(t, &reportErrorState{}, s)
assert.IsType(t, States.Idle, s)
assert.False(t, c)

// error sending logs
Expand All @@ -389,7 +389,7 @@ func TestStateUpdateReportStatus(t *testing.T) {
s, _ = s.Handle(&ctx, sc)
assert.IsType(t, &updateStatusReportRetryState{}, s)
s, c = s.Handle(&ctx, sc)
assert.IsType(t, s, &reportErrorState{})
assert.IsType(t, s, States.Idle)
assert.False(t, c)

// pretend update was aborted at the backend, but was applied
Expand All @@ -399,15 +399,15 @@ func TestStateUpdateReportStatus(t *testing.T) {
reportError: NewFatalError(client.ErrDeploymentAborted),
}
s, _ = usr.Handle(&ctx, sc)
assert.IsType(t, &reportErrorState{}, s)
assert.IsType(t, States.Idle, s)

// pretend update was aborted at the backend, along with local failure
usr = NewUpdateStatusReportState(update, client.StatusFailure)
sc = &stateTestController{
reportError: NewFatalError(client.ErrDeploymentAborted),
}
s, _ = usr.Handle(&ctx, sc)
assert.IsType(t, &reportErrorState{}, s)
assert.IsType(t, States.Idle, s)
}

func TestStateIdle(t *testing.T) {
Expand Down Expand Up @@ -746,7 +746,7 @@ func TestStateUpdateFetch(t *testing.T) {
uis := s.(*updateStoreState)
assert.Equal(t, stream, uis.imagein)
s, c = transitionState(s, &ctx, sc)
assert.IsType(t, &updateCleanupState{}, s)
assert.IsType(t, &updateStatusReportState{}, s)
assert.False(t, c)

ud, err := datastore.LoadStateData(ms)
Expand Down Expand Up @@ -1091,66 +1091,6 @@ func TestStateData(t *testing.T) {
assert.True(t, os.IsNotExist(err))
}

func TestStateReportError(t *testing.T) {
// create directory for storing deployments logs
tempDir, _ := ioutil.TempDir("", "logs")
DeploymentLogger = NewDeploymentLogManager(tempDir)
defer func() {
DeploymentLogger = nil
os.RemoveAll(tempDir)
}()

update := &datastore.UpdateInfo{
ID: "foobar",
}

ms := store.NewMemStore()
ctx := &StateContext{
Store: ms,
}
sc := &stateTestController{}

// update succeeded, but we failed to report the status to the server,
// rollback happens next
res := NewReportErrorState(update, client.StatusSuccess)
s, c := res.Handle(ctx, sc)
assert.IsType(t, &updateRollbackState{}, s)
assert.False(t, c)

// store some state data, failing to report status with a failed update
// will just clean that up and
datastore.StoreStateData(ms, datastore.StateData{
Name: datastore.MenderStateReportStatusError,
UpdateInfo: *update,
}, true)
// update failed and we failed to report that status to the server,
// state data should be removed and we should go back to init
res = NewReportErrorState(update, client.StatusFailure)
s, c = res.Handle(ctx, sc)
assert.IsType(t, &idleState{}, s)
assert.False(t, c)

_, err := datastore.LoadStateData(ms)
assert.Equal(t, err, nil)

// store some state data, failing to report status with an update that
// is already installed will also clean it up
datastore.StoreStateData(ms, datastore.StateData{
Name: datastore.MenderStateReportStatusError,
UpdateInfo: *update,
}, true)
// update is already installed and we failed to report that status to
// the server, state data should be removed and we should go back to
// init
res = NewReportErrorState(update, client.StatusAlreadyInstalled)
s, c = res.Handle(ctx, sc)
assert.IsType(t, &idleState{}, s)
assert.False(t, c)

_, err = datastore.LoadStateData(ms)
assert.Equal(t, err, nil)
}

func TestMaxSendingAttempts(t *testing.T) {
assert.Equal(t, minReportSendRetries,
maxSendingAttempts(time.Second, 0*time.Second, minReportSendRetries))
Expand Down
13 changes: 10 additions & 3 deletions datastore/statedata.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ const (
// status report
MenderStateUpdateStatusReport
// wait before retrying sending either report or deployment logs
MenderStatusReportRetryState
MenderStateStatusReportRetry
// error reporting status
MenderStateReportStatusError
// reboot
Expand Down Expand Up @@ -117,6 +117,9 @@ const (
)

var (
// These are values that are stored in the client database during an
// upgrade from one client to the next. Backwards compatibility is
// paramount here, so be careful when changing these.
stateNames = map[MenderState]string{
MenderStateInit: "init",
MenderStateIdle: "idle",
Expand All @@ -136,8 +139,7 @@ var (
MenderStateUpdateAfterFirstCommit: "update-after-first-commit",
MenderStateUpdateAfterCommit: "update-after-commit",
MenderStateUpdateStatusReport: "update-status-report",
MenderStatusReportRetryState: "update-retry-report",
MenderStateReportStatusError: "status-report-error",
MenderStateStatusReportRetry: "update-retry-report",
MenderStateReboot: "reboot",
MenderStateVerifyReboot: "verify-reboot",
MenderStateAfterReboot: "after-reboot",
Expand All @@ -153,6 +155,11 @@ var (
MenderStateUpdateControlPause: "mender-update-control-pause",
MenderStateFetchUpdateControl: "mender-update-control-refresh-maps",
MenderStateFetchRetryUpdateControl: "mender-update-control-retry-refresh-maps",

// No longer used. Since this used to be at the very end of an
// update, if we encounter it in the database during startup, we
// just go back to Idle.
MenderStateReportStatusError: "status-report-error",
}
)

Expand Down

0 comments on commit 77b3003

Please sign in to comment.