Skip to content

Commit cfab669

Browse files
committed
fix: init NNC from reconciler instead of directly
1 parent 3cb77fe commit cfab669

File tree

3 files changed

+41
-66
lines changed

3 files changed

+41
-66
lines changed

cns/kubecontroller/nodenetworkconfig/reconciler.go

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,10 @@ type cnsClient interface {
2727
MustEnsureNoStaleNCs(validNCIDs []string)
2828
}
2929

30+
type nodenetworkconfigSink func(*v1alpha.NodeNetworkConfig) error
31+
3032
type nodeNetworkConfigListener interface {
31-
Update(*v1alpha.NodeNetworkConfig) error
33+
Update(*v1alpha.NodeNetworkConfig) error // phasing this out in favor of the sink
3234
}
3335

3436
type nncGetter interface {
@@ -38,31 +40,33 @@ type nncGetter interface {
3840
// Reconciler watches for CRD status changes
3941
type Reconciler struct {
4042
cnscli cnsClient
41-
ipampoolmonitorcli nodeNetworkConfigListener
43+
ipampoolmonitorcli nodenetworkconfigSink
4244
nnccli nncGetter
4345
once sync.Once
44-
started chan interface{}
46+
started chan any
4547
nodeIP string
4648
isSwiftV2 bool
49+
initializer nodenetworkconfigSink
4750
}
4851

4952
// NewReconciler creates a NodeNetworkConfig Reconciler which will get updates from the Kubernetes
5053
// apiserver for NNC events.
5154
// Provided nncListeners are passed the NNC after the Reconcile preprocesses it. Note: order matters! The
5255
// passed Listeners are notified in the order provided.
53-
func NewReconciler(cnscli cnsClient, ipampoolmonitorcli nodeNetworkConfigListener, nodeIP string, isSwiftV2 bool) *Reconciler {
56+
func NewReconciler(cnscli cnsClient, initializer nodenetworkconfigSink, ipampoolmonitorcli nodeNetworkConfigListener, nodeIP string, isSwiftV2 bool) *Reconciler {
5457
return &Reconciler{
5558
cnscli: cnscli,
56-
ipampoolmonitorcli: ipampoolmonitorcli,
57-
started: make(chan interface{}),
59+
ipampoolmonitorcli: ipampoolmonitorcli.Update,
60+
started: make(chan any),
5861
nodeIP: nodeIP,
5962
isSwiftV2: isSwiftV2,
63+
initializer: initializer,
6064
}
6165
}
6266

6367
// Reconcile is called on CRD status changes
6468
func (r *Reconciler) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) {
65-
listenersToNotify := []nodeNetworkConfigListener{}
69+
listenersToNotify := []nodenetworkconfigSink{}
6670
nnc, err := r.nnccli.Get(ctx, req.NamespacedName)
6771
if err != nil {
6872
if apierrors.IsNotFound(err) {
@@ -89,6 +93,12 @@ func (r *Reconciler) Reconcile(ctx context.Context, req reconcile.Request) (reco
8993
}
9094
r.cnscli.MustEnsureNoStaleNCs(validNCIDs)
9195

96+
// call initFunc on first reconcile
97+
if err := r.initializer(nnc); err != nil {
98+
logger.Errorf("[cns-rc] initializer failed during reconcile: %v", err)
99+
return reconcile.Result{}, errors.Wrap(err, "initializer failed during reconcile")
100+
}
101+
92102
// for each NC, parse it in to a CreateNCRequest and forward it to the appropriate Listener
93103
for i := range nnc.Status.NetworkContainers {
94104
// check if this NC matches the Node IP if we have one to check against
@@ -134,7 +144,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req reconcile.Request) (reco
134144

135145
// push the NNC to the registered NNC listeners.
136146
for _, l := range listenersToNotify {
137-
if err := l.Update(nnc); err != nil {
147+
if err := l(nnc); err != nil {
138148
return reconcile.Result{}, errors.Wrap(err, "nnc listener return error during update")
139149
}
140150
}

cns/kubecontroller/nodenetworkconfig/reconciler_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ func TestReconcile(t *testing.T) {
192192
}
193193

194194
t.Run(tt.name, func(t *testing.T) {
195-
r := NewReconciler(&tt.cnsClient, &tt.cnsClient, tt.nodeIP, false)
195+
r := NewReconciler(&tt.cnsClient, func(*v1alpha.NodeNetworkConfig) error { return nil }, &tt.cnsClient, tt.nodeIP, false)
196196
r.nnccli = &tt.ncGetter
197197
got, err := r.Reconcile(context.Background(), tt.in)
198198
if tt.wantErr {
@@ -249,7 +249,7 @@ func TestReconcileStaleNCs(t *testing.T) {
249249
return &nncLog[len(nncLog)-1], nil
250250
}
251251

252-
r := NewReconciler(&cnsClient, &cnsClient, nodeIP, false)
252+
r := NewReconciler(&cnsClient, func(*v1alpha.NodeNetworkConfig) error { return nil }, &cnsClient, nodeIP, false)
253253
r.nnccli = &mockNCGetter{get: nncIterator}
254254

255255
_, err := r.Reconcile(context.Background(), reconcile.Request{})

cns/service/main.go

Lines changed: 21 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ import (
5252
cnstypes "github.com/Azure/azure-container-networking/cns/types"
5353
"github.com/Azure/azure-container-networking/cns/wireserver"
5454
acn "github.com/Azure/azure-container-networking/common"
55-
"github.com/Azure/azure-container-networking/crd"
5655
"github.com/Azure/azure-container-networking/crd/clustersubnetstate"
5756
cssv1alpha1 "github.com/Azure/azure-container-networking/crd/clustersubnetstate/api/v1alpha1"
5857
"github.com/Azure/azure-container-networking/crd/multitenancy"
@@ -74,7 +73,6 @@ import (
7473
"go.uber.org/zap"
7574
"golang.org/x/time/rate"
7675
corev1 "k8s.io/api/core/v1"
77-
apierrors "k8s.io/apimachinery/pkg/api/errors"
7876
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
7977
"k8s.io/apimachinery/pkg/fields"
8078
kuberuntime "k8s.io/apimachinery/pkg/runtime"
@@ -1307,37 +1305,17 @@ func InitializeMultiTenantController(ctx context.Context, httpRestService cns.HT
13071305
return nil
13081306
}
13091307

1310-
type nodeNetworkConfigGetter interface {
1311-
Get(context.Context) (*v1alpha.NodeNetworkConfig, error)
1312-
}
1313-
13141308
type ipamStateReconciler interface {
13151309
ReconcileIPAMStateForSwift(ncRequests []*cns.CreateNetworkContainerRequest, podInfoByIP map[string]cns.PodInfo, nnc *v1alpha.NodeNetworkConfig) cnstypes.ResponseCode
13161310
}
13171311

13181312
// TODO(rbtr) where should this live??
13191313
// reconcileInitialCNSState initializes cns by passing pods and a CreateNetworkContainerRequest
1320-
func reconcileInitialCNSState(ctx context.Context, cli nodeNetworkConfigGetter, ipamReconciler ipamStateReconciler, podInfoByIPProvider cns.PodInfoByIPProvider, isSwiftV2 bool) error {
1321-
// Get nnc using direct client
1322-
nnc, err := cli.Get(ctx)
1323-
if err != nil {
1324-
if crd.IsNotDefined(err) {
1325-
return errors.Wrap(err, "failed to init CNS state: NNC CRD is not defined")
1326-
}
1327-
if apierrors.IsNotFound(err) {
1328-
return errors.Wrap(err, "failed to init CNS state: NNC not found")
1329-
}
1330-
return errors.Wrap(err, "failed to init CNS state: failed to get NNC CRD")
1331-
}
1332-
1333-
logger.Printf("Retrieved NNC: %+v", nnc)
1334-
if !nnc.DeletionTimestamp.IsZero() {
1335-
return errors.New("failed to init CNS state: NNC is being deleted")
1336-
}
1337-
1338-
// If there are no NCs, we can't initialize our state and we should fail out.
1339-
if len(nnc.Status.NetworkContainers) == 0 {
1340-
return errors.New("failed to init CNS state: no NCs found in NNC CRD")
1314+
func reconcileInitialCNSState(nnc *v1alpha.NodeNetworkConfig, ipamReconciler ipamStateReconciler, podInfoByIPProvider cns.PodInfoByIPProvider, isSwiftV2 bool) error {
1315+
// if no NCs, nothing to do
1316+
ncCount := len(nnc.Status.NetworkContainers)
1317+
if ncCount == 0 {
1318+
return errors.New("no network containers found in NNC status")
13411319
}
13421320

13431321
// Get previous PodInfo state from podInfoByIPProvider
@@ -1444,35 +1422,22 @@ func InitializeCRDState(ctx context.Context, z *zap.Logger, httpRestService cns.
14441422
return errors.Wrap(err, "failed to initialize ip state")
14451423
}
14461424

1447-
// create scoped kube clients.
1448-
directcli, err := client.New(kubeConfig, client.Options{Scheme: nodenetworkconfig.Scheme})
1449-
if err != nil {
1450-
return errors.Wrap(err, "failed to create ctrl client")
1451-
}
1452-
directnnccli := nodenetworkconfig.NewClient(directcli)
1453-
if err != nil {
1454-
return errors.Wrap(err, "failed to create NNC client")
1455-
}
1456-
// TODO(rbtr): nodename and namespace should be in the cns config
1457-
directscopedcli := nncctrl.NewScopedClient(directnnccli, types.NamespacedName{Namespace: "kube-system", Name: nodeName})
1458-
1459-
logger.Printf("Reconciling initial CNS state")
1460-
// apiserver nnc might not be registered or api server might be down and crashloop backof puts us outside of 5-10 minutes we have for
1461-
// aks addons to come up so retry a bit more aggresively here.
1462-
// will retry 10 times maxing out at a minute taking about 8 minutes before it gives up.
1463-
attempt := 0
1464-
_ = retry.Do(func() error {
1465-
attempt++
1466-
logger.Printf("reconciling initial CNS state attempt: %d", attempt)
1467-
err = reconcileInitialCNSState(ctx, directscopedcli, httpRestServiceImplementation, podInfoByIPProvider, cnsconfig.EnableSwiftV2)
1468-
if err != nil {
1469-
logger.Errorf("failed to reconcile initial CNS state, attempt: %d err: %v", attempt, err)
1470-
nncInitFailure.Inc()
1425+
1426+
hasInitialized := false
1427+
initFunc := func(nnc *v1alpha.NodeNetworkConfig) error {
1428+
if hasInitialized {
1429+
return nil
14711430
}
1472-
return errors.Wrap(err, "failed to initialize CNS state")
1473-
}, retry.Context(ctx), retry.Delay(initCNSInitalDelay), retry.MaxDelay(time.Minute), retry.UntilSucceeded())
1474-
logger.Printf("reconciled initial CNS state after %d attempts", attempt)
1475-
hasNNCInitialized.Set(1)
1431+
logger.Printf("Reconciling initial CNS state")
1432+
if err := reconcileInitialCNSState(nnc, httpRestServiceImplementation, podInfoByIPProvider, cnsconfig.EnableSwiftV2); err != nil {
1433+
return err
1434+
}
1435+
hasNNCInitialized.Set(2)
1436+
hasInitialized = true
1437+
return nil
1438+
}
1439+
1440+
14761441
scheme := kuberuntime.NewScheme()
14771442
if err := corev1.AddToScheme(scheme); err != nil { //nolint:govet // intentional shadow
14781443
return errors.Wrap(err, "failed to add corev1 to scheme")
@@ -1561,7 +1526,7 @@ func InitializeCRDState(ctx context.Context, z *zap.Logger, httpRestService cns.
15611526

15621527
// get CNS Node IP to compare NC Node IP with this Node IP to ensure NCs were created for this node
15631528
nodeIP := configuration.NodeIP()
1564-
nncReconciler := nncctrl.NewReconciler(httpRestServiceImplementation, poolMonitor, nodeIP, cnsconfig.EnableSwiftV2)
1529+
nncReconciler := nncctrl.NewReconciler(httpRestServiceImplementation, initFunc, poolMonitor, nodeIP, cnsconfig.EnableSwiftV2)
15651530
// pass Node to the Reconciler for Controller xref
15661531
// IPAMv1 - reconcile only status changes (where generation doesn't change).
15671532
// IPAMv2 - reconcile all updates.

0 commit comments

Comments
 (0)