Skip to content

Commit 5246c26

Browse files
committed
Add Drifted condition
This commit introduces a new Kubernetes condition type "Drifted" to improve observability of Helm release drift detection. Signed-off-by: Yasin Özel <yozel@nebius.com>
1 parent 865b5a6 commit 5246c26

File tree

6 files changed

+290
-14
lines changed

6 files changed

+290
-14
lines changed

.claude/settings.local.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"permissions": {
3+
"allow": [
4+
"Bash(git show:*)",
5+
"Bash(go test:*)"
6+
]
7+
}
8+
}

api/v2/condition_types.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ const (
2929
// (uninstall/rollback) due to a failure of the last release attempt against the
3030
// latest desired state.
3131
RemediatedCondition string = "Remediated"
32+
33+
// DriftedCondition represents the status of the Helm release drift detection,
34+
// indicating that the deployed release has drifted from the desired state.
35+
DriftedCondition string = "Drifted"
3236
)
3337

3438
const (
@@ -79,4 +83,12 @@ const (
7983
// DependencyNotReadyReason represents the fact that
8084
// one of the dependencies is not ready.
8185
DependencyNotReadyReason string = "DependencyNotReady"
86+
87+
// DriftDetectedReason represents the fact that drift has been detected in the
88+
// Helm release compared to the expected state.
89+
DriftDetectedReason string = "DriftDetected"
90+
91+
// NoDriftDetectedReason represents the fact that no drift has been detected in
92+
// the Helm release compared to the expected state.
93+
NoDriftDetectedReason string = "NoDriftDetected"
8294
)

docs/spec/v2/helmreleases.md

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1886,8 +1886,9 @@ A HelmRelease enters various states during its lifecycle, reflected as
18861886
[Kubernetes Conditions](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#typical-status-properties).
18871887
It can be [reconciling](#reconciling-helmrelease) when it is being processed by
18881888
the controller, it can be [ready](#ready-helmrelease) when the Helm release is
1889-
installed and up-to-date, or it can [fail](#failed-helmrelease) during
1890-
reconciliation.
1889+
installed and up-to-date, it can [fail](#failed-helmrelease) during
1890+
reconciliation, or it can be [drifted](#drifted-helmrelease) if the
1891+
drift detection mode is set to enabled/warn and there is a drift.
18911892

18921893
The HelmRelease API is compatible with the [kstatus specification](https://github.com/kubernetes-sigs/cli-utils/tree/master/pkg/kstatus),
18931894
and reports `Reconciling` and `Stalled` conditions where applicable to provide
@@ -1972,6 +1973,29 @@ HelmRelease's `.status.conditions`:
19721973
The `TestSuccess` Condition will retain a status value of `"True"` until the
19731974
next Helm install or upgrade occurs, or the Helm tests are disabled.
19741975

1976+
#### Drifted HelmRelease
1977+
1978+
The helm-controller marks the HelmRelease as _drifted_ when it has the following
1979+
characteristics:
1980+
1981+
- The HelmRelease have drift detection mode set to enabled or warn.
1982+
- There is a drift detected against the cluster state.
1983+
1984+
When the HelmRelease is "drifted", the controller sets a Condition with the
1985+
following attributes in the HelmRelease's `.status.conditions`:
1986+
1987+
- `type: Drifted`
1988+
- `status: "True"`
1989+
- `reason: DriftDetected`
1990+
1991+
When the HelmRelease have drift detection mode set to enabled or warn there
1992+
and there is no drift, the controller sets a Condition with the following
1993+
attributes in the HelmRelease's `.status.conditions`:
1994+
1995+
- `type: Drifted`
1996+
- `status: "False"`
1997+
- `reason: NoDriftDetected`
1998+
19751999
#### Failed HelmRelease
19762000

19772001
The helm-controller may get stuck trying to determine state or produce a Helm

internal/reconcile/atomic_release.go

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,11 @@ func (r *AtomicRelease) Reconcile(ctx context.Context, req *Request) error {
188188
}
189189
return fmt.Errorf("atomic release canceled: %w", ctx.Err())
190190
default:
191+
// Remove the drifted condition if drift detection is disabled.
192+
if conditions.Has(req.Object, v2.DriftedCondition) && !req.Object.GetDriftDetection().MustDetectChanges() {
193+
conditions.Delete(req.Object, v2.DriftedCondition)
194+
}
195+
191196
// Determine the current state of the Helm release.
192197
log.V(logger.DebugLevel).Info("determining current state of Helm release")
193198
state, err := DetermineReleaseState(ctx, r.configFactory, req, r.disallowedFieldManagers)
@@ -385,6 +390,10 @@ func (r *AtomicRelease) actionForState(ctx context.Context, req *Request, state
385390
conditions.MarkTrue(req.Object, v2.ReleasedCondition, v2.UpgradeSucceededReason, "%s", msg)
386391
}
387392

393+
if req.Object.GetDriftDetection().MustDetectChanges() {
394+
conditions.MarkFalse(req.Object, v2.DriftedCondition, v2.NoDriftDetectedReason, "No drift detected against the cluster state")
395+
}
396+
388397
return nil, nil
389398
case ReleaseStatusLocked:
390399
log.Info(msgWithReason("release locked", state.Reason))
@@ -440,10 +449,10 @@ func (r *AtomicRelease) actionForState(ctx context.Context, req *Request, state
440449
}
441450
}
442451

443-
r.eventRecorder.Eventf(req.Object, corev1.EventTypeWarning, "DriftDetected",
444-
"Cluster state of release %s has drifted from the desired state:\n%s",
445-
req.Object.Status.History.Latest().FullReleaseName(), diff.SummarizeDiffSet(state.Diff),
446-
)
452+
msg := fmt.Sprintf("Cluster state of release %s has drifted from the desired state:\n%s",
453+
req.Object.Status.History.Latest().FullReleaseName(), diff.SummarizeDiffSet(state.Diff))
454+
r.eventRecorder.Eventf(req.Object, corev1.EventTypeWarning, v2.DriftDetectedReason, msg)
455+
conditions.MarkTrue(req.Object, v2.DriftedCondition, v2.DriftDetectedReason, "%s", msg)
447456

448457
if req.Object.GetDriftDetection().GetMode() == v2.DriftDetectionEnabled {
449458
return NewCorrectClusterDrift(r.configFactory, r.eventRecorder, state.Diff, kube.ManagedFieldsManager), nil

internal/reconcile/atomic_release_test.go

Lines changed: 184 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -214,14 +214,15 @@ func TestAtomicRelease_Reconcile(t *testing.T) {
214214

215215
func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
216216
tests := []struct {
217-
name string
218-
releases func(namespace string) []*helmrelease.Release
219-
spec func(spec *v2.HelmReleaseSpec)
220-
status func(namespace string, releases []*helmrelease.Release) v2.HelmReleaseStatus
221-
chart *helmchart.Chart
222-
values map[string]interface{}
223-
expectHistory func(releases []*helmrelease.Release) v2.Snapshots
224-
wantErr error
217+
name string
218+
releases func(namespace string) []*helmrelease.Release
219+
spec func(spec *v2.HelmReleaseSpec)
220+
status func(namespace string, releases []*helmrelease.Release) v2.HelmReleaseStatus
221+
chart *helmchart.Chart
222+
values map[string]interface{}
223+
expectHistory func(releases []*helmrelease.Release) v2.Snapshots
224+
expectConditions []metav1.Condition
225+
wantErr error
225226
}{
226227
{
227228
name: "release is in-sync",
@@ -1161,6 +1162,42 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
11611162
},
11621163
chart: testutil.BuildChart(),
11631164
},
1165+
{
1166+
name: "removes Drifted condition when drift detection is disabled",
1167+
releases: func(namespace string) []*helmrelease.Release {
1168+
return []*helmrelease.Release{
1169+
testutil.BuildRelease(&helmrelease.MockReleaseOptions{
1170+
Name: mockReleaseName,
1171+
Namespace: namespace,
1172+
Version: 1,
1173+
Chart: testutil.BuildChart(),
1174+
Status: helmrelease.StatusDeployed,
1175+
}, testutil.ReleaseWithConfig(nil)),
1176+
}
1177+
},
1178+
spec: func(spec *v2.HelmReleaseSpec) {
1179+
spec.DriftDetection = &v2.DriftDetection{
1180+
Mode: v2.DriftDetectionDisabled,
1181+
}
1182+
},
1183+
status: func(namespace string, releases []*helmrelease.Release) v2.HelmReleaseStatus {
1184+
return v2.HelmReleaseStatus{
1185+
History: v2.Snapshots{
1186+
release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
1187+
},
1188+
Conditions: []metav1.Condition{
1189+
*conditions.TrueCondition(v2.DriftedCondition, v2.DriftDetectedReason, "drift detected"),
1190+
},
1191+
}
1192+
},
1193+
chart: testutil.BuildChart(),
1194+
expectHistory: func(releases []*helmrelease.Release) v2.Snapshots {
1195+
return v2.Snapshots{
1196+
release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
1197+
}
1198+
},
1199+
expectConditions: []metav1.Condition{},
1200+
},
11641201
}
11651202
for _, tt := range tests {
11661203
t.Run(tt.name, func(t *testing.T) {
@@ -1242,6 +1279,10 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
12421279

12431280
g.Expect(req.Object.Status.History).To(testutil.Equal(tt.expectHistory(history)))
12441281
}
1282+
1283+
if tt.expectConditions != nil {
1284+
g.Expect(req.Object.Status.Conditions).To(conditions.MatchConditions(tt.expectConditions))
1285+
}
12451286
})
12461287
}
12471288
}
@@ -1552,6 +1593,63 @@ func TestAtomicRelease_actionForState(t *testing.T) {
15521593
*conditions.FalseCondition(meta.ReadyCondition, v2.UpgradeFailedReason, "upgrade failed"),
15531594
},
15541595
},
1596+
{
1597+
name: "in-sync release with drift detection enabled sets Drifted condition to false",
1598+
spec: func(spec *v2.HelmReleaseSpec) {
1599+
spec.DriftDetection = &v2.DriftDetection{
1600+
Mode: v2.DriftDetectionEnabled,
1601+
}
1602+
},
1603+
status: func(releases []*helmrelease.Release) v2.HelmReleaseStatus {
1604+
return v2.HelmReleaseStatus{
1605+
History: v2.Snapshots{
1606+
{Version: 1},
1607+
},
1608+
}
1609+
},
1610+
state: ReleaseState{Status: ReleaseStatusInSync},
1611+
want: nil,
1612+
assertConditions: []metav1.Condition{
1613+
*conditions.FalseCondition(v2.DriftedCondition, v2.NoDriftDetectedReason, "No drift detected against the cluster state"),
1614+
},
1615+
},
1616+
{
1617+
name: "in-sync release with drift detection warn sets Drifted condition to false",
1618+
spec: func(spec *v2.HelmReleaseSpec) {
1619+
spec.DriftDetection = &v2.DriftDetection{
1620+
Mode: v2.DriftDetectionWarn,
1621+
}
1622+
},
1623+
status: func(releases []*helmrelease.Release) v2.HelmReleaseStatus {
1624+
return v2.HelmReleaseStatus{
1625+
History: v2.Snapshots{
1626+
{Version: 1},
1627+
},
1628+
}
1629+
},
1630+
state: ReleaseState{Status: ReleaseStatusInSync},
1631+
want: nil,
1632+
assertConditions: []metav1.Condition{
1633+
*conditions.FalseCondition(v2.DriftedCondition, v2.NoDriftDetectedReason, "No drift detected against the cluster state"),
1634+
},
1635+
},
1636+
{
1637+
name: "in-sync release with drift detection disabled does not set Drifted condition",
1638+
spec: func(spec *v2.HelmReleaseSpec) {
1639+
spec.DriftDetection = &v2.DriftDetection{
1640+
Mode: v2.DriftDetectionDisabled,
1641+
}
1642+
},
1643+
status: func(releases []*helmrelease.Release) v2.HelmReleaseStatus {
1644+
return v2.HelmReleaseStatus{
1645+
History: v2.Snapshots{
1646+
{Version: 1},
1647+
},
1648+
}
1649+
},
1650+
state: ReleaseState{Status: ReleaseStatusInSync},
1651+
want: nil,
1652+
},
15551653
{
15561654
name: "locked release triggers unlock action",
15571655
state: ReleaseState{Status: ReleaseStatusLocked},
@@ -1634,6 +1732,9 @@ func TestAtomicRelease_actionForState(t *testing.T) {
16341732
"Deployment/something/mock removed",
16351733
),
16361734
},
1735+
assertConditions: []metav1.Condition{
1736+
*conditions.TrueCondition(v2.DriftedCondition, v2.DriftDetectedReason, "Cluster state of release mock-ns/mock-release.v1 has drifted from the desired state:\nDeployment/something/mock removed"),
1737+
},
16371738
},
16381739
{
16391740
name: "drifted release only triggers event if mode is warn",
@@ -1703,6 +1804,81 @@ func TestAtomicRelease_actionForState(t *testing.T) {
17031804
"Deployment/something/mock changed (0 additions, 1 changes, 0 removals)",
17041805
),
17051806
},
1807+
assertConditions: []metav1.Condition{
1808+
*conditions.TrueCondition(v2.DriftedCondition, v2.DriftDetectedReason, "Cluster state of release mock-ns/mock-release.v1 has drifted from the desired state:\nDeployment/something/mock changed (0 additions, 1 changes, 0 removals)"),
1809+
},
1810+
},
1811+
{
1812+
name: "drifted release sets Drifted condition if mode is warn",
1813+
spec: func(spec *v2.HelmReleaseSpec) {
1814+
spec.DriftDetection = &v2.DriftDetection{
1815+
Mode: v2.DriftDetectionWarn,
1816+
}
1817+
},
1818+
status: func(releases []*helmrelease.Release) v2.HelmReleaseStatus {
1819+
return v2.HelmReleaseStatus{
1820+
History: v2.Snapshots{
1821+
{
1822+
Name: mockReleaseName,
1823+
Namespace: mockReleaseNamespace,
1824+
Version: 1,
1825+
},
1826+
},
1827+
}
1828+
},
1829+
state: ReleaseState{Status: ReleaseStatusDrifted, Diff: jsondiff.DiffSet{
1830+
{
1831+
Type: jsondiff.DiffTypeUpdate,
1832+
DesiredObject: &unstructured.Unstructured{
1833+
Object: map[string]interface{}{
1834+
"apiVersion": "apps/v1",
1835+
"kind": "Deployment",
1836+
"metadata": map[string]interface{}{
1837+
"name": "mock",
1838+
"namespace": "something",
1839+
},
1840+
"spec": map[string]interface{}{
1841+
"replicas": 2,
1842+
},
1843+
},
1844+
},
1845+
ClusterObject: &unstructured.Unstructured{
1846+
Object: map[string]interface{}{
1847+
"apiVersion": "apps/v1",
1848+
"kind": "Deployment",
1849+
"metadata": map[string]interface{}{
1850+
"name": "mock",
1851+
"namespace": "something",
1852+
},
1853+
"spec": map[string]interface{}{
1854+
"replicas": 1,
1855+
},
1856+
},
1857+
},
1858+
Patch: extjsondiff.Patch{
1859+
{
1860+
Type: extjsondiff.OperationReplace,
1861+
Path: "/spec/replicas",
1862+
OldValue: 1,
1863+
Value: 2,
1864+
},
1865+
},
1866+
},
1867+
}},
1868+
want: nil,
1869+
wantErr: nil,
1870+
wantEvent: &corev1.Event{
1871+
Reason: "DriftDetected",
1872+
Type: corev1.EventTypeWarning,
1873+
Message: fmt.Sprintf(
1874+
"Cluster state of release %s has drifted from the desired state:\n%s",
1875+
mockReleaseNamespace+"/"+mockReleaseName+".v1",
1876+
"Deployment/something/mock changed (0 additions, 1 changes, 0 removals)",
1877+
),
1878+
},
1879+
assertConditions: []metav1.Condition{
1880+
*conditions.TrueCondition(v2.DriftedCondition, v2.DriftDetectedReason, "Cluster state of release mock-ns/mock-release.v1 has drifted from the desired state:\nDeployment/something/mock changed (0 additions, 1 changes, 0 removals)"),
1881+
},
17061882
},
17071883
{
17081884
name: "out-of-sync release triggers upgrade",

wrong-test.md

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Bug Report: Test Name Doesn't Match Mode Setting
2+
3+
## Location
4+
File: `internal/reconcile/atomic_release_test.go`
5+
Line: 1642
6+
7+
## Description
8+
9+
The test case named `"drifted release only triggers event if mode is warn"` sets the drift detection mode to `DriftDetectionDisabled` instead of `DriftDetectionWarn`.
10+
11+
## Current Code
12+
13+
```go
14+
{
15+
name: "drifted release only triggers event if mode is warn",
16+
spec: func(spec *v2.HelmReleaseSpec) {
17+
spec.DriftDetection = &v2.DriftDetection{
18+
Mode: v2.DriftDetectionDisabled, // BUG: Should be DriftDetectionWarn
19+
}
20+
},
21+
// ...
22+
}
23+
```
24+
25+
## Expected Code
26+
27+
```go
28+
{
29+
name: "drifted release only triggers event if mode is warn",
30+
spec: func(spec *v2.HelmReleaseSpec) {
31+
spec.DriftDetection = &v2.DriftDetection{
32+
Mode: v2.DriftDetectionWarn, // FIXED
33+
}
34+
},
35+
// ...
36+
}
37+
```
38+
39+
## Impact
40+
41+
The test name indicates it's testing "warn" mode behavior, but it's actually testing "disabled" mode. This means:
42+
1. The warn mode + drift scenario is not properly tested
43+
2. The test results may be misleading
44+
45+
## Recommended Fix
46+
47+
Change `v2.DriftDetectionDisabled` to `v2.DriftDetectionWarn` on line 1645.

0 commit comments

Comments
 (0)