Skip to content

Commit

Permalink
bugfix: fix noderesource-controller, reporter reconcile on node delet…
Browse files Browse the repository at this point in the history
…ion (koordinator-sh#309)

Signed-off-by: saintube <[email protected]>
  • Loading branch information
saintube authored Jun 24, 2022
1 parent d331a10 commit 4bf748b
Show file tree
Hide file tree
Showing 4 changed files with 251 additions and 19 deletions.
2 changes: 1 addition & 1 deletion pkg/koordlet/reporter/reporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ func (r *reporter) sync() {
nodeMetric, err := r.nodeMetricLister.Get(r.nodeName)
if errors.IsNotFound(err) {
klog.Warningf("nodeMetric %v not found, skip", r.nodeName)
return err
return nil
} else if err != nil {
klog.Warningf("failed to get %s nodeMetric: %v", r.nodeName, err)
return err
Expand Down
241 changes: 240 additions & 1 deletion pkg/koordlet/reporter/reporter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,28 +17,42 @@ limitations under the License.
package reporter

import (
"context"
"testing"
"time"

"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/utils/pointer"

"github.com/golang/mock/gomock"
slov1alpha1 "github.com/koordinator-sh/koordinator/apis/slo/v1alpha1"
clientbeta1 "github.com/koordinator-sh/koordinator/pkg/client/clientset/versioned/typed/slo/v1alpha1"
fakeclientslov1alpha1 "github.com/koordinator-sh/koordinator/pkg/client/clientset/versioned/typed/slo/v1alpha1/fake"
listerbeta1 "github.com/koordinator-sh/koordinator/pkg/client/listers/slo/v1alpha1"
"github.com/koordinator-sh/koordinator/pkg/koordlet/metriccache"
mock_metriccache "github.com/koordinator-sh/koordinator/pkg/koordlet/metriccache/mockmetriccache"
"github.com/koordinator-sh/koordinator/pkg/koordlet/statesinformer"
mock_statesinformer "github.com/koordinator-sh/koordinator/pkg/koordlet/statesinformer/mockstatesinformer"
"github.com/stretchr/testify/assert"
)

var _ listerbeta1.NodeMetricLister = &fakeNodeMetricLister{}

type fakeNodeMetricLister struct {
nodeMetrics *slov1alpha1.NodeMetric
getErr error
}

func (f *fakeNodeMetricLister) List(selector labels.Selector) (ret []*slov1alpha1.NodeMetric, err error) {
return []*slov1alpha1.NodeMetric{f.nodeMetrics}, nil
}

func (f *fakeNodeMetricLister) Get(name string) (*slov1alpha1.NodeMetric, error) {
return f.nodeMetrics, nil
return f.nodeMetrics, f.getErr
}

func Test_reporter_isNodeMetricInited(t *testing.T) {
Expand Down Expand Up @@ -90,3 +104,228 @@ func Test_getNodeMetricReportInterval(t *testing.T) {
t.Errorf("expect reportInterval %d but got %d", expectReportInterval, reportInterval)
}
}

type fakeNodeMetricClient struct {
fakeclientslov1alpha1.FakeNodeMetrics
nodeMetrics map[string]*slov1alpha1.NodeMetric
}

func (c *fakeNodeMetricClient) Get(ctx context.Context, name string, opts metav1.GetOptions) (*slov1alpha1.NodeMetric, error) {
nodeMetric, ok := c.nodeMetrics[name]
if !ok {
return &slov1alpha1.NodeMetric{}, errors.NewNotFound(schema.GroupResource{Group: "slo.koordinator.sh", Resource: "nodemetrics"}, name)
}
return nodeMetric, nil
}

func (c *fakeNodeMetricClient) UpdateStatus(ctx context.Context, nodeMetric *slov1alpha1.NodeMetric, opts metav1.UpdateOptions) (*slov1alpha1.NodeMetric, error) {
currentNodeMetric, ok := c.nodeMetrics[nodeMetric.Name]
if !ok {
return &slov1alpha1.NodeMetric{}, errors.NewNotFound(schema.GroupResource{Group: "slo.koordinator.sh", Resource: "nodemetrics"}, nodeMetric.Name)
}
currentNodeMetric.Status = nodeMetric.Status
c.nodeMetrics[nodeMetric.Name] = currentNodeMetric
return currentNodeMetric, nil
}

func Test_reporter_sync(t *testing.T) {
type fields struct {
nodeName string
nodeMetric *slov1alpha1.NodeMetric
metricCache func(ctrl *gomock.Controller) metriccache.MetricCache
statesInformer func(ctrl *gomock.Controller) statesinformer.StatesInformer
nodeMetricLister listerbeta1.NodeMetricLister
nodeMetricClient clientbeta1.NodeMetricInterface
}
tests := []struct {
name string
fields fields
want *slov1alpha1.NodeMetric
wantErr bool
}{
{
name: "nodeMetric not initialized",
fields: fields{
nodeName: "test",
nodeMetric: nil,
metricCache: func(ctrl *gomock.Controller) metriccache.MetricCache {
return nil
},
statesInformer: func(ctrl *gomock.Controller) statesinformer.StatesInformer {
return nil
},
nodeMetricLister: nil,
nodeMetricClient: &fakeNodeMetricClient{},
},
want: &slov1alpha1.NodeMetric{},
wantErr: true,
},
{
name: "collect nil nodeMetric",
fields: fields{
nodeName: "test",
nodeMetric: &slov1alpha1.NodeMetric{
ObjectMeta: metav1.ObjectMeta{
Name: "test",
},
},
metricCache: func(ctrl *gomock.Controller) metriccache.MetricCache {
c := mock_metriccache.NewMockMetricCache(ctrl)
c.EXPECT().GetNodeResourceMetric(gomock.Any()).Return(metriccache.NodeResourceQueryResult{}).Times(1)
return c
},
statesInformer: func(ctrl *gomock.Controller) statesinformer.StatesInformer {
i := mock_statesinformer.NewMockStatesInformer(ctrl)
i.EXPECT().GetAllPods().Return(nil).Times(1)
return i
},
nodeMetricLister: nil,
nodeMetricClient: &fakeNodeMetricClient{},
},
want: &slov1alpha1.NodeMetric{},
wantErr: true,
},
{
name: "successfully report nodeMetric",
fields: fields{
nodeName: "test",
nodeMetric: &slov1alpha1.NodeMetric{
ObjectMeta: metav1.ObjectMeta{
Name: "test",
},
},
metricCache: func(ctrl *gomock.Controller) metriccache.MetricCache {
c := mock_metriccache.NewMockMetricCache(ctrl)
c.EXPECT().GetNodeResourceMetric(gomock.Any()).Return(metriccache.NodeResourceQueryResult{
Metric: &metriccache.NodeResourceMetric{
CPUUsed: metriccache.CPUMetric{
CPUUsed: resource.MustParse("1000"),
},
MemoryUsed: metriccache.MemoryMetric{
MemoryWithoutCache: resource.MustParse("1Gi"),
},
},
}).Times(1)
return c
},
statesInformer: func(ctrl *gomock.Controller) statesinformer.StatesInformer {
i := mock_statesinformer.NewMockStatesInformer(ctrl)
i.EXPECT().GetAllPods().Return(nil).Times(1)
return i
},
nodeMetricLister: &fakeNodeMetricLister{
nodeMetrics: &slov1alpha1.NodeMetric{
ObjectMeta: metav1.ObjectMeta{
Name: "test",
},
},
},
nodeMetricClient: &fakeNodeMetricClient{
nodeMetrics: map[string]*slov1alpha1.NodeMetric{
"test": {
ObjectMeta: metav1.ObjectMeta{
Name: "test",
},
},
},
},
},
want: &slov1alpha1.NodeMetric{
ObjectMeta: metav1.ObjectMeta{
Name: "test",
},
Status: slov1alpha1.NodeMetricStatus{
NodeMetric: &slov1alpha1.NodeMetricInfo{
NodeUsage: *convertNodeMetricToResourceMap(&metriccache.NodeResourceMetric{
CPUUsed: metriccache.CPUMetric{
CPUUsed: resource.MustParse("1000"),
},
MemoryUsed: metriccache.MemoryMetric{
MemoryWithoutCache: resource.MustParse("1Gi"),
},
}),
},
PodsMetric: []*slov1alpha1.PodMetricInfo{},
},
},
wantErr: false,
},
{
name: "skip for nodeMetric not found",
fields: fields{
nodeName: "test",
nodeMetric: &slov1alpha1.NodeMetric{
ObjectMeta: metav1.ObjectMeta{
Name: "test",
},
},
metricCache: func(ctrl *gomock.Controller) metriccache.MetricCache {
c := mock_metriccache.NewMockMetricCache(ctrl)
c.EXPECT().GetNodeResourceMetric(gomock.Any()).Return(metriccache.NodeResourceQueryResult{
Metric: &metriccache.NodeResourceMetric{
CPUUsed: metriccache.CPUMetric{
CPUUsed: resource.MustParse("1000"),
},
MemoryUsed: metriccache.MemoryMetric{
MemoryWithoutCache: resource.MustParse("1Gi"),
},
},
}).Times(1)
return c
},
statesInformer: func(ctrl *gomock.Controller) statesinformer.StatesInformer {
i := mock_statesinformer.NewMockStatesInformer(ctrl)
i.EXPECT().GetAllPods().Return(nil).Times(1)
return i
},
nodeMetricLister: &fakeNodeMetricLister{
nodeMetrics: &slov1alpha1.NodeMetric{
ObjectMeta: metav1.ObjectMeta{
Name: "test",
},
},
getErr: errors.NewNotFound(schema.GroupResource{Group: "slo.koordinator.sh", Resource: "nodemetrics"}, "test"),
},
nodeMetricClient: &fakeNodeMetricClient{
nodeMetrics: map[string]*slov1alpha1.NodeMetric{
"test": {
ObjectMeta: metav1.ObjectMeta{
Name: "test",
},
},
},
},
},
want: &slov1alpha1.NodeMetric{
ObjectMeta: metav1.ObjectMeta{
Name: "test",
},
},
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
r := &reporter{
nodeMetric: tt.fields.nodeMetric,
metricCache: tt.fields.metricCache(ctrl),
statesInformer: tt.fields.statesInformer(ctrl),
nodeMetricLister: tt.fields.nodeMetricLister,
statusUpdater: newStatusUpdater(tt.fields.nodeMetricClient),
}
r.sync()

nodeMetric, err := r.statusUpdater.nodeMetricClient.Get(context.TODO(), tt.fields.nodeName, metav1.GetOptions{})
assert.Equal(t, tt.wantErr, err != nil)
if tt.wantErr {
assert.Equal(t, tt.want, nodeMetric)
} else {
assert.NotNil(t, nodeMetric)
assert.Equal(t, tt.want.Status.NodeMetric, nodeMetric.Status.NodeMetric)
assert.Equal(t, tt.want.Status.PodsMetric, nodeMetric.Status.PodsMetric)
}
})
}
}
10 changes: 6 additions & 4 deletions pkg/slo-controller/noderesource/noderesource_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,9 @@ func (r *NodeResourceReconciler) Reconcile(ctx context.Context, req ctrl.Request
node := &corev1.Node{}
if err := r.Client.Get(context.TODO(), req.NamespacedName, node); err != nil {
if errors.IsNotFound(err) {
klog.V(3).Infof("node %v not found: %v", req.Name)
return ctrl.Result{Requeue: false}, err
// skip non-existing node and return no error to forget the request
klog.V(3).Infof("skip for node %v not found", req.Name)
return ctrl.Result{Requeue: false}, nil
} else {
klog.Errorf("failed to get node %v, error: %v", req.Name, err)
return ctrl.Result{Requeue: true}, err
Expand All @@ -75,8 +76,9 @@ func (r *NodeResourceReconciler) Reconcile(ctx context.Context, req ctrl.Request
nodeMetric := &slov1alpha1.NodeMetric{}
if err := r.Client.Get(context.TODO(), req.NamespacedName, nodeMetric); err != nil {
if errors.IsNotFound(err) {
klog.V(3).Infof("nodemetric %v not found: %v", req.Name)
return ctrl.Result{Requeue: false}, err
// skip non-existing node metric and return no error to forget the request
klog.V(3).Infof("skip for nodemetric %v not found", req.Name)
return ctrl.Result{Requeue: false}, nil
} else {
klog.Errorf("failed to get nodemetric %v, error: %v", req.Name, err)
return ctrl.Result{Requeue: true}, err
Expand Down
17 changes: 4 additions & 13 deletions pkg/slo-controller/noderesource/noderesource_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ import (
"time"

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
Expand Down Expand Up @@ -78,12 +77,8 @@ func Test_NodeResourceController_NodeNotFound(t *testing.T) {
nodeReq := ctrl.Request{NamespacedName: key}

result, err := r.Reconcile(ctx, nodeReq)
if !errors.IsNotFound(err) {
t.Fatal(err)
}
if result.Requeue != false {
t.Errorf("failed to reconcile")
}
assert.NoError(t, err)
assert.Equal(t, false, result.Requeue)
}

func Test_NodeResourceController_NodeMetricNotExist(t *testing.T) {
Expand Down Expand Up @@ -122,12 +117,8 @@ func Test_NodeResourceController_NodeMetricNotExist(t *testing.T) {
nodeReq := ctrl.Request{NamespacedName: key}

result, err := r.Reconcile(ctx, nodeReq)
if !errors.IsNotFound(err) {
t.Fatal(err)
}
if result.Requeue != false {
t.Errorf("failed to reconcile")
}
assert.NoError(t, err)
assert.Equal(t, false, result.Requeue)
}

func Test_NodeResourceController_ColocationEnabled(t *testing.T) {
Expand Down

0 comments on commit 4bf748b

Please sign in to comment.