Skip to content

Commit a89cace

Browse files
committed
Debounce the degraded status for flaky conditions
1 parent af37b11 commit a89cace

File tree

3 files changed

+79
-9
lines changed

3 files changed

+79
-9
lines changed

pkg/operator/credentialsrequest/credentialsrequest_controller_test.go

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ func TestCredentialsRequestReconcile(t *testing.T) {
311311
},
312312
{
313313
// This indicates an error state.
314-
name: "new credential no root creds available",
314+
name: "new credential no root creds available (initially)",
315315
existing: []runtime.Object{
316316
testOperatorConfig(""),
317317
createTestNamespace(testNamespace),
@@ -337,6 +337,55 @@ func TestCredentialsRequestReconcile(t *testing.T) {
337337
assert.False(t, cr.Status.Provisioned)
338338
},
339339
expectErr: true,
340+
expectedConditions: []ExpectedCondition{
341+
{
342+
conditionType: minterv1.CredentialsProvisionFailure,
343+
reason: "CredentialsProvisionFailure",
344+
status: corev1.ConditionTrue,
345+
},
346+
},
347+
expectedCOConditions: []ExpectedCOCondition{
348+
{
349+
conditionType: configv1.OperatorProgressing,
350+
status: corev1.ConditionTrue,
351+
},
352+
},
353+
},
354+
{
355+
// This indicates an error state.
356+
name: "new credential no root creds available (after waiting period)",
357+
existing: []runtime.Object{
358+
testOperatorConfig(""),
359+
createTestNamespace(testNamespace),
360+
createTestNamespace(testSecretNamespace),
361+
testFailedCredentialsRequest(t, 10*time.Minute),
362+
testAWSCredsSecret("openshift-cloud-credential-operator", "cloud-credential-operator-iam-ro-creds", testReadAWSAccessKeyID, testReadAWSSecretAccessKey),
363+
testClusterVersion(),
364+
testInfrastructure(testInfraName),
365+
},
366+
existingAdmin: []runtime.Object{},
367+
mockRootAWSClient: func(mockCtrl *gomock.Controller) *mockaws.MockClient {
368+
mockAWSClient := mockaws.NewMockClient(mockCtrl)
369+
return mockAWSClient
370+
},
371+
mockReadAWSClient: func(mockCtrl *gomock.Controller) *mockaws.MockClient {
372+
mockAWSClient := mockaws.NewMockClient(mockCtrl)
373+
return mockAWSClient
374+
},
375+
validate: func(c client.Client, t *testing.T) {
376+
targetSecret := getSecret(c)
377+
assert.Nil(t, targetSecret)
378+
cr := getCR(c)
379+
assert.False(t, cr.Status.Provisioned)
380+
},
381+
expectErr: true,
382+
expectedConditions: []ExpectedCondition{
383+
{
384+
conditionType: minterv1.CredentialsProvisionFailure,
385+
reason: "CredentialsProvisionFailure",
386+
status: corev1.ConditionTrue,
387+
},
388+
},
340389
expectedCOConditions: []ExpectedCOCondition{
341390
{
342391
conditionType: configv1.OperatorProgressing,
@@ -1683,6 +1732,17 @@ func testProvisionedCredentialsRequest(t *testing.T) *minterv1.CredentialsReques
16831732
return cr
16841733
}
16851734

1735+
func testFailedCredentialsRequest(t *testing.T, howLong time.Duration) *minterv1.CredentialsRequest {
1736+
cr := testCredentialsRequest(t)
1737+
cr.Status.Conditions = append(cr.Status.Conditions, minterv1.CredentialsRequestCondition{
1738+
Type: minterv1.CredentialsProvisionFailure,
1739+
Reason: "CredentialsProvisionFailure",
1740+
Status: corev1.ConditionTrue,
1741+
LastTransitionTime: metav1.NewTime(time.Now().Add(-howLong)),
1742+
})
1743+
return cr
1744+
}
1745+
16861746
func createTestNamespace(namespace string) *corev1.Namespace {
16871747
return &corev1.Namespace{
16881748
ObjectMeta: metav1.ObjectMeta{

pkg/operator/credentialsrequest/status.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package credentialsrequest
33
import (
44
"context"
55
"fmt"
6+
"time"
67

78
log "github.com/sirupsen/logrus"
89

@@ -131,7 +132,9 @@ func computeStatusConditions(
131132
for _, t := range minterv1.FailureConditionTypes {
132133
failureCond := utils.FindCredentialsRequestCondition(cr.Status.Conditions, t)
133134
if failureCond != nil && failureCond.Status == corev1.ConditionTrue {
134-
foundFailure = true
135+
if time.Since(failureCond.LastTransitionTime.Time) > 5*time.Minute {
136+
foundFailure = true
137+
}
135138
break
136139
}
137140
}

pkg/operator/podidentity/podidentitywebhook_controller.go

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@ type staticResourceReconciler struct {
285285
conditions []configv1.ClusterOperatorStatusCondition
286286
cache resourceapply.ResourceCache
287287
podIdentityType PodIdentityManifestSource
288+
degradedSince time.Time
288289
}
289290

290291
var _ reconcile.Reconciler = &staticResourceReconciler{}
@@ -294,16 +295,22 @@ func (r *staticResourceReconciler) Reconcile(ctx context.Context, request reconc
294295
err := r.ReconcileResources(ctx)
295296
if err != nil {
296297
r.logger.Errorf("reconciliation failed, retrying in %s", retryInterval.String())
297-
r.conditions = []configv1.ClusterOperatorStatusCondition{
298-
{
299-
Type: configv1.OperatorDegraded,
300-
Status: configv1.ConditionTrue,
301-
Reason: reasonStaticResourceReconcileFailed,
302-
Message: fmt.Sprintf("static resource reconciliation failed: %v", err),
303-
},
298+
if r.degradedSince.IsZero() {
299+
r.degradedSince = time.Now()
300+
} else if time.Since(r.degradedSince) > 5*time.Minute {
301+
r.conditions = []configv1.ClusterOperatorStatusCondition{
302+
{
303+
Type: configv1.OperatorDegraded,
304+
Status: configv1.ConditionTrue,
305+
Reason: reasonStaticResourceReconcileFailed,
306+
Message: fmt.Sprintf("static resource reconciliation failed: %v", err),
307+
},
308+
}
304309
}
305310
return reconcile.Result{RequeueAfter: retryInterval}, err
306311
}
312+
313+
r.degradedSince = time.Time{}
307314
r.conditions = []configv1.ClusterOperatorStatusCondition{}
308315
return reconcile.Result{}, nil
309316
}

0 commit comments

Comments
 (0)