@@ -122,7 +122,8 @@ func (r *Resize) RunInfra(ctx context.Context) error {
122122 nodes := & corev1.NodeList {}
123123
124124 if err := r .client .List (ctx , nodes , & client.ListOptions {LabelSelector : selector }); err != nil {
125- return false , err
125+ log .Printf ("error retrieving nodes list, continuing to wait: %s" , err )
126+ return false , nil
126127 }
127128
128129 readyNodes := 0
@@ -193,7 +194,8 @@ func (r *Resize) RunInfra(ctx context.Context) error {
193194 if apierrors .IsNotFound (err ) {
194195 return true , nil
195196 }
196- return false , err
197+ log .Printf ("error retrieving machines list, continuing to wait: %s" , err )
198+ return false , nil
197199 }
198200
199201 log .Printf ("original machinepool %s/%s still exists, continuing to wait" , originalMp .Namespace , originalMp .Name )
@@ -205,7 +207,7 @@ func (r *Resize) RunInfra(ctx context.Context) error {
205207 // Wait for original nodes to delete
206208 if err := wait .PollImmediate (twentySecondIncrement , twentyMinuteTimeout , func () (bool , error ) {
207209 // Re-check for originalNodes to see if they have been deleted
208- return r .nodesMatchExpectedCount (ctx , originalNodeSelector , 0 )
210+ return skipError ( wrapResult ( r .nodesMatchExpectedCount (ctx , originalNodeSelector , 0 )), "error matching expected count" )
209211 }); err != nil {
210212 switch {
211213 case errors .Is (err , wait .ErrWaitTimeout ):
@@ -219,7 +221,7 @@ func (r *Resize) RunInfra(ctx context.Context) error {
219221
220222 if err := wait .PollImmediate (twentySecondIncrement , twentyMinuteTimeout , func () (bool , error ) {
221223 log .Printf ("waiting for nodes to terminate" )
222- return r .nodesMatchExpectedCount (ctx , originalNodeSelector , 0 )
224+ return skipError ( wrapResult ( r .nodesMatchExpectedCount (ctx , originalNodeSelector , 0 )), "error matching expected count" )
223225 }); err != nil {
224226 if errors .Is (err , wait .ErrWaitTimeout ) {
225227 log .Printf ("timed out waiting for nodes to terminate: %v." , err .Error ())
@@ -242,11 +244,13 @@ func (r *Resize) RunInfra(ctx context.Context) error {
242244 nodes := & corev1.NodeList {}
243245 selector , err := labels .Parse ("node-role.kubernetes.io/infra=" )
244246 if err != nil {
247+ // This should never happen, so we do not have to skip this error
245248 return false , err
246249 }
247250
248251 if err := r .client .List (ctx , nodes , & client.ListOptions {LabelSelector : selector }); err != nil {
249- return false , err
252+ log .Printf ("error retrieving nodes list, continuing to wait: %s" , err )
253+ return false , nil
250254 }
251255
252256 readyNodes := 0
@@ -292,7 +296,8 @@ func (r *Resize) RunInfra(ctx context.Context) error {
292296 if apierrors .IsNotFound (err ) {
293297 return true , nil
294298 }
295- return false , err
299+ log .Printf ("error retrieving old machine details, continuing to wait: %s" , err )
300+ return false , nil
296301 }
297302
298303 log .Printf ("temporary machinepool %s/%s still exists, continuing to wait" , tempMp .Namespace , tempMp .Name )
@@ -307,11 +312,13 @@ func (r *Resize) RunInfra(ctx context.Context) error {
307312 nodes := & corev1.NodeList {}
308313 selector , err := labels .Parse ("node-role.kubernetes.io/infra=" )
309314 if err != nil {
315+ // This should never happen, so we do not have to skip this errorreturn false, err
310316 return false , err
311317 }
312318
313319 if err := r .client .List (ctx , nodes , & client.ListOptions {LabelSelector : selector }); err != nil {
314- return false , err
320+ log .Printf ("error retrieving nodes list, continuing to wait: %s" , err )
321+ return false , nil
315322 }
316323
317324 switch len (nodes .Items ) {
@@ -334,7 +341,7 @@ func (r *Resize) RunInfra(ctx context.Context) error {
334341
335342 if err := wait .PollImmediate (twentySecondIncrement , twentyMinuteTimeout , func () (bool , error ) {
336343 log .Printf ("waiting for nodes to terminate" )
337- return r .nodesMatchExpectedCount (ctx , tempNodeSelector , 0 )
344+ return skipError ( wrapResult ( r .nodesMatchExpectedCount (ctx , tempNodeSelector , 0 )), "error matching expected count" )
338345 }); err != nil {
339346 if errors .Is (err , wait .ErrWaitTimeout ) {
340347 log .Printf ("timed out waiting for nodes to terminate: %v." , err .Error ())
@@ -536,3 +543,30 @@ func (r *Resize) nodesMatchExpectedCount(ctx context.Context, labelSelector labe
536543
537544 return false , nil
538545}
546+
547+ // having an error when being in a rety loop, should not be handled as an error, and we should just display it and continue
548+ // in case we have a function that return a bool status and an error, we can use following helper
549+ // f being a function returning (bool, error), replace
550+ //
551+ // return f(...)
552+ //
553+ // by
554+ //
555+ // return skipError(wrapResult(f(...)), "message to context the error")
556+ //
557+ // and then the return will always have error set to nil, but a continuing message will be displayed in case of error
558+ type result struct {
559+ condition bool
560+ err error
561+ }
562+
563+ func wrapResult (condition bool , err error ) result {
564+ return result {condition , err }
565+ }
566+
567+ func skipError (res result , msg string ) (bool , error ) {
568+ if res .err != nil {
569+ log .Printf ("%s, continuing to wait: %s" , msg , res .err )
570+ }
571+ return res .condition , nil
572+ }
0 commit comments