Skip to content

Commit 7fd2aa6

Browse files
committed
Debounce the degraded status for flaky conditions
1 parent af37b11 commit 7fd2aa6

File tree

4 files changed

+69
-19
lines changed

4 files changed

+69
-19
lines changed

pkg/operator/credentialsrequest/credentialsrequest_controller.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -594,9 +594,10 @@ var _ reconcile.Reconciler = &ReconcileCredentialsRequest{}
594594
// ReconcileCredentialsRequest reconciles a CredentialsRequest object
595595
type ReconcileCredentialsRequest struct {
596596
client.Client
597-
AdminClient client.Client
598-
Actuator actuator.Actuator
599-
platformType configv1.PlatformType
597+
AdminClient client.Client
598+
Actuator actuator.Actuator
599+
platformType configv1.PlatformType
600+
degradedDebouncer utils.DegradedDebouncer
600601
}
601602

602603
// Reconcile reads that state of the cluster for a CredentialsRequest object and

pkg/operator/credentialsrequest/status.go

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ func (r *ReconcileCredentialsRequest) GetConditions(logger log.FieldLogger) ([]c
3939
mode,
4040
credRequests,
4141
r.platformType,
42+
&r.degradedDebouncer,
4243
logger), nil
4344
}
4445

@@ -92,6 +93,7 @@ func computeStatusConditions(
9293
mode operatorv1.CloudCredentialsMode,
9394
credRequests []minterv1.CredentialsRequest,
9495
clusterCloudPlatform configv1.PlatformType,
96+
degradedDebouncer *utils.DegradedDebouncer,
9597
logger log.FieldLogger) []configv1.ClusterOperatorStatusCondition {
9698
operatorIsDisabled := mode == operatorv1.CloudCredentialsModeManual
9799

@@ -142,14 +144,26 @@ func computeStatusConditions(
142144
}
143145

144146
if failingCredRequests > 0 {
145-
var degradedCondition configv1.ClusterOperatorStatusCondition
146-
degradedCondition.Type = configv1.OperatorDegraded
147-
degradedCondition.Status = configv1.ConditionTrue
148-
degradedCondition.Reason = reasonCredentialsFailing
149-
degradedCondition.Message = fmt.Sprintf(
150-
"%d of %d credentials requests are failing to sync.",
151-
failingCredRequests, len(validCredRequests))
152-
conditions = append(conditions, degradedCondition)
147+
degradedDebouncer.MarkDegraded()
148+
if degradedDebouncer.ShouldReport() {
149+
var degradedCondition configv1.ClusterOperatorStatusCondition
150+
degradedCondition.Type = configv1.OperatorDegraded
151+
degradedCondition.Status = configv1.ConditionTrue
152+
degradedCondition.Reason = reasonCredentialsFailing
153+
degradedCondition.Message = fmt.Sprintf(
154+
"%d of %d credentials requests are failing to sync.",
155+
failingCredRequests, len(validCredRequests))
156+
conditions = append(conditions, degradedCondition)
157+
} else {
158+
logger.Debugf(
159+
"%d of %d credentials are failing to sync, but only for %s",
160+
failingCredRequests,
161+
len(validCredRequests),
162+
degradedDebouncer.DegradedFor(),
163+
)
164+
}
165+
} else {
166+
degradedDebouncer.MarkNotDegraded()
153167
}
154168

155169
// Progressing should be true if the operator is making changes to the operand. In this case

pkg/operator/podidentity/podidentitywebhook_controller.go

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@ type staticResourceReconciler struct {
285285
conditions []configv1.ClusterOperatorStatusCondition
286286
cache resourceapply.ResourceCache
287287
podIdentityType PodIdentityManifestSource
288+
degradedDebouncer utils.DegradedDebouncer
288289
}
289290

290291
var _ reconcile.Reconciler = &staticResourceReconciler{}
@@ -293,17 +294,21 @@ func (r *staticResourceReconciler) Reconcile(ctx context.Context, request reconc
293294
r.logger.Debugf("reconciling after watch event %#v", request)
294295
err := r.ReconcileResources(ctx)
295296
if err != nil {
296-
r.logger.Errorf("reconciliation failed, retrying in %s", retryInterval.String())
297-
r.conditions = []configv1.ClusterOperatorStatusCondition{
298-
{
299-
Type: configv1.OperatorDegraded,
300-
Status: configv1.ConditionTrue,
301-
Reason: reasonStaticResourceReconcileFailed,
302-
Message: fmt.Sprintf("static resource reconciliation failed: %v", err),
303-
},
297+
r.degradedDebouncer.MarkDegraded()
298+
r.logger.Errorf("reconciliation failed, retrying in %s (failing for %s)", retryInterval.String(), r.degradedDebouncer.DegradedFor())
299+
if r.degradedDebouncer.ShouldReport() {
300+
r.conditions = []configv1.ClusterOperatorStatusCondition{
301+
{
302+
Type: configv1.OperatorDegraded,
303+
Status: configv1.ConditionTrue,
304+
Reason: reasonStaticResourceReconcileFailed,
305+
Message: fmt.Sprintf("static resource reconciliation failed: %v", err),
306+
},
307+
}
304308
}
305309
return reconcile.Result{RequeueAfter: retryInterval}, err
306310
}
311+
r.degradedDebouncer.MarkNotDegraded()
307312
r.conditions = []configv1.ClusterOperatorStatusCondition{}
308313
return reconcile.Result{}, nil
309314
}

pkg/operator/utils/utils.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"fmt"
66
"strconv"
77
"strings"
8+
"time"
89

910
"golang.org/x/mod/semver"
1011
corev1 "k8s.io/api/core/v1"
@@ -492,3 +493,32 @@ func UpdateStatus(client client.Client, origCR, newCR *minterv1.CredentialsReque
492493

493494
return nil
494495
}
496+
497+
const degradedDebounceTime = 5 * time.Minute
498+
499+
type DegradedDebouncer struct {
500+
currentlyDegraded bool
501+
degradedAt time.Time
502+
}
503+
504+
func (d *DegradedDebouncer) MarkDegraded() {
505+
if !d.currentlyDegraded {
506+
d.currentlyDegraded = true
507+
d.degradedAt = time.Now()
508+
}
509+
}
510+
511+
func (d *DegradedDebouncer) MarkNotDegraded() {
512+
d.currentlyDegraded = false
513+
}
514+
515+
func (d *DegradedDebouncer) DegradedFor() time.Duration {
516+
if d.currentlyDegraded {
517+
return time.Since(d.degradedAt)
518+
}
519+
return time.Duration(0)
520+
}
521+
522+
func (d *DegradedDebouncer) ShouldReport() bool {
523+
return d.DegradedFor() > degradedDebounceTime
524+
}

0 commit comments

Comments
 (0)