Skip to content

Commit 7ed8d21

Browse files
(fix) Helm to Boxcutter migration during OLM upgrade
When upgrading OLM from standard (Helm runtime) to experimental (Boxcutter runtime), the BoxcutterStorageMigrator creates a ClusterExtensionRevision from the existing Helm release. However, the migrated revision was created without status conditions, causing a race condition where it wasn't recognized as "Installed". Assisted-by: CLAUDE
1 parent 4d4f894 commit 7ed8d21

File tree

4 files changed

+453
-13
lines changed

4 files changed

+453
-13
lines changed

internal/operator-controller/applier/boxcutter.go

Lines changed: 127 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"helm.sh/helm/v3/pkg/release"
1616
"helm.sh/helm/v3/pkg/storage/driver"
1717
apierrors "k8s.io/apimachinery/pkg/api/errors"
18+
"k8s.io/apimachinery/pkg/api/meta"
1819
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1920
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
2021
"k8s.io/apimachinery/pkg/runtime"
@@ -244,8 +245,7 @@ func (m *BoxcutterStorageMigrator) Migrate(ctx context.Context, ext *ocv1.Cluste
244245
return fmt.Errorf("listing ClusterExtensionRevisions before attempting migration: %w", err)
245246
}
246247
if len(existingRevisionList.Items) != 0 {
247-
// No migration needed.
248-
return nil
248+
return m.ensureMigratedRevisionStatus(ctx, existingRevisionList.Items)
249249
}
250250

251251
ac, err := m.ActionClientGetter.ActionClientFor(ctx, ext)
@@ -262,11 +262,36 @@ func (m *BoxcutterStorageMigrator) Migrate(ctx context.Context, ext *ocv1.Cluste
262262
return err
263263
}
264264

265+
// Only migrate from a Helm release that represents a deployed, working installation.
266+
// If the latest revision is not deployed (e.g. FAILED), look through the history and
267+
// select the most-recent deployed release instead.
268+
if helmRelease == nil || helmRelease.Info == nil || helmRelease.Info.Status != release.StatusDeployed {
269+
var err error
270+
helmRelease, err = m.findLatestDeployedRelease(ac, ext.GetName())
271+
if err != nil {
272+
return err
273+
}
274+
if helmRelease == nil {
275+
// No deployed release found in history - skip migration. The ClusterExtension
276+
// controller will handle this via normal rollout.
277+
return nil
278+
}
279+
}
280+
265281
rev, err := m.RevisionGenerator.GenerateRevisionFromHelmRelease(ctx, helmRelease, ext, objectLabels)
266282
if err != nil {
267283
return err
268284
}
269285

286+
// Mark this revision as migrated from Helm so we can distinguish it from
287+
// normal Boxcutter revisions. This label is critical for ensuring we only
288+
// set Succeeded=True status on actually-migrated revisions, not on revision 1
289+
// created during normal Boxcutter operation.
290+
if rev.Labels == nil {
291+
rev.Labels = make(map[string]string)
292+
}
293+
rev.Labels[labels.MigratedFromHelmKey] = "true"
294+
270295
// Set ownerReference for proper garbage collection when the ClusterExtension is deleted.
271296
if err := controllerutil.SetControllerReference(ext, rev, m.Scheme); err != nil {
272297
return fmt.Errorf("set ownerref: %w", err)
@@ -276,9 +301,107 @@ func (m *BoxcutterStorageMigrator) Migrate(ctx context.Context, ext *ocv1.Cluste
276301
return err
277302
}
278303

279-
// Re-fetch to get server-managed fields like Generation
304+
// Set initial status on the migrated revision to mark it as succeeded.
305+
//
306+
// The revision must have a Succeeded=True status condition immediately after creation.
307+
//
308+
// A revision is only considered "Installed" (vs "RollingOut") when it has this condition.
309+
// Without it, the system cannot determine what version is currently installed, which breaks:
310+
// - Version resolution (can't compute upgrade paths from unknown starting point)
311+
// - Status reporting (installed bundle appears as nil)
312+
// - Subsequent upgrades (resolution fails without knowing current version)
313+
//
314+
// While the ClusterExtensionRevision controller would eventually reconcile and set this status,
315+
// that creates a timing gap where the ClusterExtension reconciliation happens before the status
316+
// is set, causing failures during the OLM upgrade window.
317+
//
318+
// Since we're creating this revision from a successfully deployed Helm release, we know it
319+
// represents a working installation and can safely mark it as succeeded immediately.
320+
return m.ensureRevisionStatus(ctx, rev)
321+
}
322+
323+
// ensureMigratedRevisionStatus checks if revision 1 exists and needs its status set.
324+
// This handles the case where revision creation succeeded but status update failed.
325+
// Returns nil if no action is needed.
326+
func (m *BoxcutterStorageMigrator) ensureMigratedRevisionStatus(ctx context.Context, revisions []ocv1.ClusterExtensionRevision) error {
327+
for i := range revisions {
328+
if revisions[i].Spec.Revision != 1 {
329+
continue
330+
}
331+
// Only process migrated revisions - skip normal Boxcutter revision 1 to avoid extra API calls.
332+
if revisions[i].Labels[labels.MigratedFromHelmKey] != "true" {
333+
return nil
334+
}
335+
// Skip if already succeeded - status is already set correctly.
336+
if meta.IsStatusConditionTrue(revisions[i].Status.Conditions, ocv1.ClusterExtensionRevisionTypeSucceeded) {
337+
return nil
338+
}
339+
return m.ensureRevisionStatus(ctx, &revisions[i])
340+
}
341+
// No revision 1 found - migration not applicable (revisions created by normal operation).
342+
return nil
343+
}
344+
345+
// findLatestDeployedRelease searches the Helm release history for the most recent deployed release.
346+
// Returns nil if no deployed release is found.
347+
func (m *BoxcutterStorageMigrator) findLatestDeployedRelease(ac helmclient.ActionInterface, name string) (*release.Release, error) {
348+
history, err := ac.History(name)
349+
if errors.Is(err, driver.ErrReleaseNotFound) {
350+
// no Helm Release history -> no prior installation.
351+
return nil, nil
352+
}
353+
if err != nil {
354+
return nil, err
355+
}
356+
357+
var latestDeployed *release.Release
358+
for _, rel := range history {
359+
if rel == nil || rel.Info == nil {
360+
continue
361+
}
362+
if rel.Info.Status != release.StatusDeployed {
363+
continue
364+
}
365+
if latestDeployed == nil || rel.Version > latestDeployed.Version {
366+
latestDeployed = rel
367+
}
368+
}
369+
370+
return latestDeployed, nil
371+
}
372+
373+
// ensureRevisionStatus ensures the revision has the Succeeded status condition set.
374+
// Returns nil if the status is already set or after successfully setting it.
375+
// Only sets status on revisions that were actually migrated from Helm (marked with MigratedFromHelmKey label).
376+
func (m *BoxcutterStorageMigrator) ensureRevisionStatus(ctx context.Context, rev *ocv1.ClusterExtensionRevision) error {
377+
// Re-fetch to get latest version before checking status
280378
if err := m.Client.Get(ctx, client.ObjectKeyFromObject(rev), rev); err != nil {
281-
return fmt.Errorf("getting created revision: %w", err)
379+
return fmt.Errorf("getting existing revision for status check: %w", err)
380+
}
381+
382+
// Only set status if this revision was actually migrated from Helm.
383+
// This prevents us from incorrectly marking normal Boxcutter revision 1 as succeeded
384+
// when it's still in progress.
385+
if rev.Labels[labels.MigratedFromHelmKey] != "true" {
386+
return nil
387+
}
388+
389+
// Check if status is already set
390+
if meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeSucceeded) != nil {
391+
return nil
392+
}
393+
394+
// Set the Succeeded status condition
395+
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
396+
Type: ocv1.ClusterExtensionRevisionTypeSucceeded,
397+
Status: metav1.ConditionTrue,
398+
Reason: ocv1.ReasonSucceeded,
399+
Message: "Revision succeeded - migrated from Helm release",
400+
ObservedGeneration: rev.GetGeneration(),
401+
})
402+
403+
if err := m.Client.Status().Update(ctx, rev); err != nil {
404+
return fmt.Errorf("updating migrated revision status: %w", err)
282405
}
283406

284407
return nil

0 commit comments

Comments
 (0)