From 13b927d4504d55340925e81008dbd02b802423ad Mon Sep 17 00:00:00 2001 From: Lawton Mizell Date: Fri, 6 Feb 2026 11:18:17 -0500 Subject: [PATCH] SREP-413: Prevent false UpgradeConfigValidationFailedSRE alert after upgrade completes --- .../upgradeconfig/upgradeconfig_controller.go | 59 ++++++++---- .../upgradeconfig_controller_test.go | 89 ++++++++++++++----- 2 files changed, 108 insertions(+), 40 deletions(-) diff --git a/controllers/upgradeconfig/upgradeconfig_controller.go b/controllers/upgradeconfig/upgradeconfig_controller.go index 7ce24683..b2694cba 100644 --- a/controllers/upgradeconfig/upgradeconfig_controller.go +++ b/controllers/upgradeconfig/upgradeconfig_controller.go @@ -11,6 +11,7 @@ import ( "github.com/go-logr/logr" "github.com/hashicorp/go-multierror" + configv1 "github.com/openshift/api/config/v1" upgradev1alpha1 "github.com/openshift/managed-upgrade-operator/api/v1alpha1" muocfg "github.com/openshift/managed-upgrade-operator/config" "github.com/openshift/managed-upgrade-operator/pkg/clusterversion" @@ -31,9 +32,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" ) -var ( - log = logf.Log.WithName("controller_upgradeconfig") -) +var log = logf.Log.WithName("controller_upgradeconfig") // blank assignment to verify that ReconcileUpgradeConfig implements reconcile.Reconciler var _ reconcile.Reconciler = &ReconcileUpgradeConfig{} @@ -101,23 +100,50 @@ func (r *ReconcileUpgradeConfig) Reconcile(ctx context.Context, request reconcil if history == nil { precedingVersion := clusterversion.GetPrecedingVersion(clusterVersion, instance) - upgraded, err := cvClient.HasUpgradeCommenced(instance) - if err != nil { - return reconcile.Result{}, fmt.Errorf("could not tell if cluster was upgrading: %v", err) - } - if upgraded { - // If CVO is currently set to the version of the UC, then we need to be in an Upgrading phase, at minimum. - // We also won't know the actual start time, so picking now will just have to do + // Check if the upgrade has already completed in CVO. This handles the case where + // the UpgradeConfig was recreated (e.g., by UpgradeConfigManager due to spec changes) + // after an upgrade finished, causing the status/history to be lost. + if cvClient.HasUpgradeCompleted(clusterVersion, instance) { + reqLogger.Info("Upgrade already completed for this version, setting phase to Upgraded") + // Find the completion and start times from CVO's history + var startTime, completeTime *metav1.Time + for _, h := range clusterVersion.Status.History { + if h.Version == instance.Spec.Desired.Version && h.State == configv1.CompletedUpdate { + startTime = &metav1.Time{Time: h.StartedTime.Time} + if h.CompletionTime != nil { + completeTime = &metav1.Time{Time: h.CompletionTime.Time} + } + break + } + } history = &upgradev1alpha1.UpgradeHistory{ PrecedingVersion: precedingVersion, Version: instance.Spec.Desired.Version, - Phase: upgradev1alpha1.UpgradePhaseUpgrading, - StartTime: &metav1.Time{Time: time.Now()}} + Phase: upgradev1alpha1.UpgradePhaseUpgraded, + StartTime: startTime, + CompleteTime: completeTime, + } } else { - history = &upgradev1alpha1.UpgradeHistory{ - PrecedingVersion: precedingVersion, - Version: instance.Spec.Desired.Version, - Phase: upgradev1alpha1.UpgradePhaseNew} + upgraded, err := cvClient.HasUpgradeCommenced(instance) + if err != nil { + return reconcile.Result{}, fmt.Errorf("could not tell if cluster was upgrading: %v", err) + } + if upgraded { + // If CVO is currently set to the version of the UC, then we need to be in an Upgrading phase, at minimum. + // We also won't know the actual start time, so picking now will just have to do + history = &upgradev1alpha1.UpgradeHistory{ + PrecedingVersion: precedingVersion, + Version: instance.Spec.Desired.Version, + Phase: upgradev1alpha1.UpgradePhaseUpgrading, + StartTime: &metav1.Time{Time: time.Now()}, + } + } else { + history = &upgradev1alpha1.UpgradeHistory{ + PrecedingVersion: precedingVersion, + Version: instance.Spec.Desired.Version, + Phase: upgradev1alpha1.UpgradePhaseNew, + } + } } history.Conditions = upgradev1alpha1.NewConditions() instance.Status.History = append([]upgradev1alpha1.UpgradeHistory{*history}, instance.Status.History...) @@ -370,7 +396,6 @@ func isManagedUpgrade(name string) bool { // SetupWithManager sets up the controller with the Manager. func (r *ReconcileUpgradeConfig) SetupWithManager(mgr ctrl.Manager) error { - return ctrl.NewControllerManagedBy(mgr). For(&upgradev1alpha1.UpgradeConfig{}). WithEventFilter(StatusChangedPredicate()). diff --git a/controllers/upgradeconfig/upgradeconfig_controller_test.go b/controllers/upgradeconfig/upgradeconfig_controller_test.go index dfd97e2d..bcc846ab 100644 --- a/controllers/upgradeconfig/upgradeconfig_controller_test.go +++ b/controllers/upgradeconfig/upgradeconfig_controller_test.go @@ -143,7 +143,6 @@ var _ = Describe("UpgradeConfigController", func() { }) Context("Reconcile", func() { - BeforeEach(func() { gomock.InOrder( mockMetricsBuilder.EXPECT().NewClient(gomock.Any()).Return(mockMetricsClient, nil), @@ -180,8 +179,8 @@ var _ = Describe("UpgradeConfigController", func() { }) Context("When attempting to fetch the configmap", func() { - var version = "a version" - var fakeError = fmt.Errorf("configmap not found") + version := "a version" + fakeError := fmt.Errorf("configmap not found") BeforeEach(func() { upgradeConfig.Status.History = []upgradev1alpha1.UpgradeHistory{{}} cfg = config{ @@ -223,6 +222,50 @@ var _ = Describe("UpgradeConfigController", func() { Context("When an UpgradeConfig exists", func() { Context("and there is no existing history", func() { + Context("and the cluster has already completed the upgrade to that version", func() { + var upgradeStartTime, upgradeCompleteTime metav1.Time + BeforeEach(func() { + // CVO shows the desired version as completed with timestamps + upgradeStartTime = metav1.Now() + upgradeCompleteTime = metav1.Now() + testClusterVersion.Status.History = []configv1.UpdateHistory{ + { + State: configv1.CompletedUpdate, + Version: upgradeConfig.Spec.Desired.Version, + StartedTime: upgradeStartTime, + CompletionTime: &upgradeCompleteTime, + }, + {State: configv1.CompletedUpdate, Version: "PreviousVersion"}, + } + testClusterVersion.Status.Desired.Version = upgradeConfig.Spec.Desired.Version + }) + It("sets the history phase to Upgraded and skips validation", func() { + matcher := testStructs.NewUpgradeConfigMatcher() + gomock.InOrder( + mockEMBuilder.EXPECT().NewManager(gomock.Any()).Return(mockEMClient, nil), + mockKubeClient.EXPECT().Get(gomock.Any(), upgradeConfigName, gomock.Any()).SetArg(2, *upgradeConfig), + mockCVClientBuilder.EXPECT().New(gomock.Any()).Return(mockCVClient), + mockCVClient.EXPECT().GetClusterVersion().Return(testClusterVersion, nil), + mockCVClient.EXPECT().HasUpgradeCompleted(testClusterVersion, upgradeConfig).Return(true), + mockKubeClient.EXPECT().Status().Return(mockUpdater), + mockUpdater.EXPECT().Update(gomock.Any(), matcher), + mockConfigManagerBuilder.EXPECT().New(gomock.Any(), gomock.Any()).Return(mockConfigManager), + mockConfigManager.EXPECT().Into(gomock.Any()).SetArg(0, cfg), + mockClusterUpgraderBuilder.EXPECT().NewClient(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), upgradeConfig.Spec.Type).Return(mockClusterUpgrader, nil), + // When phase is Upgraded, the controller reports metrics + mockMetricsClient.EXPECT().AlertsFromUpgrade(gomock.Any(), gomock.Any()), + mockMetricsClient.EXPECT().UpdateMetricUpgradeResult(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()), + ) + result, err := reconciler.Reconcile(context.TODO(), reconcile.Request{NamespacedName: upgradeConfigName}) + Expect(err).NotTo(HaveOccurred()) + Expect(result.RequeueAfter).To(BeZero()) + Expect(matcher.ActualUpgradeConfig.Status.History).To(ContainElement( + gstruct.MatchFields(gstruct.IgnoreExtras, gstruct.Fields{ + "Version": Equal(upgradeConfig.Spec.Desired.Version), + "Phase": Equal(upgradev1alpha1.UpgradePhaseUpgraded), + }))) + }) + }) Context("and the cluster is already upgrading to that version", func() { BeforeEach(func() { // set CVO's version to that of the UC's @@ -236,6 +279,7 @@ var _ = Describe("UpgradeConfigController", func() { mockKubeClient.EXPECT().Get(gomock.Any(), upgradeConfigName, gomock.Any()).SetArg(2, *upgradeConfig), mockCVClientBuilder.EXPECT().New(gomock.Any()).Return(mockCVClient), mockCVClient.EXPECT().GetClusterVersion().Return(testClusterVersion, nil), + mockCVClient.EXPECT().HasUpgradeCompleted(testClusterVersion, upgradeConfig).Return(false), mockCVClient.EXPECT().HasUpgradeCommenced(gomock.Any()).Return(true, nil), mockKubeClient.EXPECT().Status().Return(mockUpdater), mockUpdater.EXPECT().Update(gomock.Any(), matcher), @@ -249,15 +293,17 @@ var _ = Describe("UpgradeConfigController", func() { _, err := reconciler.Reconcile(context.TODO(), reconcile.Request{NamespacedName: upgradeConfigName}) Expect(err).NotTo(HaveOccurred()) Expect(matcher.ActualUpgradeConfig.Status.History).To(ContainElement( - gstruct.MatchFields(gstruct.IgnoreExtras, gstruct.Fields{"PrecedingVersion": Equal("PreviousVersion"), "Version": Equal(upgradeConfig.Spec.Desired.Version), - "Phase": Equal(upgradev1alpha1.UpgradePhaseUpgrading)}))) + gstruct.MatchFields(gstruct.IgnoreExtras, gstruct.Fields{ + "PrecedingVersion": Equal("PreviousVersion"), "Version": Equal(upgradeConfig.Spec.Desired.Version), + "Phase": Equal(upgradev1alpha1.UpgradePhaseUpgrading), + }))) }) }) }) Context("When the desired version isn't in the UpgradeConfig's history", func() { - var desiredVersion = "a new version" - var existingVersion = "not the same as desired version" + desiredVersion := "a new version" + existingVersion := "not the same as desired version" BeforeEach(func() { upgradeConfig.Spec.Desired.Version = desiredVersion upgradeConfig.Status.History = []upgradev1alpha1.UpgradeHistory{ @@ -274,6 +320,7 @@ var _ = Describe("UpgradeConfigController", func() { mockKubeClient.EXPECT().Get(gomock.Any(), upgradeConfigName, gomock.Any()).SetArg(2, *upgradeConfig), mockCVClientBuilder.EXPECT().New(gomock.Any()).Return(mockCVClient), mockCVClient.EXPECT().GetClusterVersion().Return(testClusterVersion, nil), + mockCVClient.EXPECT().HasUpgradeCompleted(testClusterVersion, upgradeConfig).Return(false), mockCVClient.EXPECT().HasUpgradeCommenced(gomock.Any()).Return(true, nil), mockKubeClient.EXPECT().Status().Return(mockUpdater), mockUpdater.EXPECT().Update(gomock.Any(), gomock.Any()).Return(fakeError), @@ -292,6 +339,7 @@ var _ = Describe("UpgradeConfigController", func() { mockKubeClient.EXPECT().Get(gomock.Any(), upgradeConfigName, gomock.Any()).SetArg(2, *upgradeConfig), mockCVClientBuilder.EXPECT().New(gomock.Any()).Return(mockCVClient), mockCVClient.EXPECT().GetClusterVersion().Return(testClusterVersion, nil), + mockCVClient.EXPECT().HasUpgradeCompleted(testClusterVersion, upgradeConfig).Return(false), mockCVClient.EXPECT().HasUpgradeCommenced(gomock.Any()).Return(false, nil), mockKubeClient.EXPECT().Status().Return(mockUpdater), mockUpdater.EXPECT().Update(gomock.Any(), matcher), @@ -312,7 +360,7 @@ var _ = Describe("UpgradeConfigController", func() { }) Context("When inspecting the UpgradeConfig's phase", func() { - var version = "a version" + version := "a version" BeforeEach(func() { upgradeConfig.Status.History = []upgradev1alpha1.UpgradeHistory{{}} }) @@ -355,7 +403,7 @@ var _ = Describe("UpgradeConfigController", func() { Expect(result.RequeueAfter).To(Equal(time.Minute * 1)) }) - var fakeError = fmt.Errorf("a healthcheck error") + fakeError := fmt.Errorf("a healthcheck error") It("Should move to pending phase if the HealthCheck fails", func() { gomock.InOrder( mockEMBuilder.EXPECT().NewManager(gomock.Any()).Return(mockEMClient, nil), @@ -454,7 +502,7 @@ var _ = Describe("UpgradeConfigController", func() { }) }) Context("When a cluster upgrade client can't be built", func() { - var fakeError = fmt.Errorf("an upgrader builder error") + fakeError := fmt.Errorf("an upgrader builder error") It("does not proceed with upgrading the cluster", func() { gomock.InOrder( mockEMBuilder.EXPECT().NewManager(gomock.Any()).Return(mockEMClient, nil), @@ -501,7 +549,7 @@ var _ = Describe("UpgradeConfigController", func() { BeforeEach(func() { upgradeConfig.Status.History[0].Phase = upgradev1alpha1.UpgradePhaseNew }) - var fakeError = fmt.Errorf("a status update error") + fakeError := fmt.Errorf("a status update error") sr := scheduler.SchedulerResult{ IsReady: false, IsBreached: false, @@ -524,7 +572,6 @@ var _ = Describe("UpgradeConfigController", func() { Expect(err).To(HaveOccurred()) }) }) - }) Context("When the UpgradePhase is Pending", func() { @@ -533,7 +580,7 @@ var _ = Describe("UpgradeConfigController", func() { }) Context("When building the validator client fails", func() { - var fakeError = fmt.Errorf("an validator builder error") + fakeError := fmt.Errorf("an validator builder error") It("reconcile should fail", func() { gomock.InOrder( mockEMBuilder.EXPECT().NewManager(gomock.Any()).Return(mockEMClient, nil), @@ -589,7 +636,7 @@ var _ = Describe("UpgradeConfigController", func() { }) Context("When the cluster is ready to upgrade", func() { - var fakeError = fmt.Errorf("fake upgradeconfig manager builder error") + fakeError := fmt.Errorf("fake upgradeconfig manager builder error") It("Should fail if cannot create upgradeconfig manager builder", func() { gomock.InOrder( mockEMBuilder.EXPECT().NewManager(gomock.Any()).Return(mockEMClient, nil), @@ -681,7 +728,7 @@ var _ = Describe("UpgradeConfigController", func() { Expect(upgradeConfig.Status.History.GetHistory("a version").Phase == upgradev1alpha1.UpgradePhaseNew).To(BeTrue()) }) - var fakeError = fmt.Errorf("fake remote config error") + fakeError := fmt.Errorf("fake remote config error") It("should reconcile with failure if error is other than remote config manager not configured", func() { gomock.InOrder( mockEMBuilder.EXPECT().NewManager(gomock.Any()).Return(mockEMClient, nil), @@ -784,7 +831,7 @@ var _ = Describe("UpgradeConfigController", func() { }) Context("When invoking the upgrader fails", func() { - var fakeError = fmt.Errorf("the upgrader failed") + fakeError := fmt.Errorf("the upgrader failed") It("reacts accordingly", func() { gomock.InOrder( mockEMBuilder.EXPECT().NewManager(gomock.Any()).Return(mockEMClient, nil), @@ -814,7 +861,6 @@ var _ = Describe("UpgradeConfigController", func() { }) Context("When the cluster is not ready to upgrade", func() { - It("Should update phase status to be pending phase", func() { gomock.InOrder( mockEMBuilder.EXPECT().NewManager(gomock.Any()).Return(mockEMClient, nil), @@ -837,7 +883,7 @@ var _ = Describe("UpgradeConfigController", func() { }) Context("When the status update fails to set to pending phase", func() { - var statusError = fmt.Errorf("a status update error") + statusError := fmt.Errorf("a status update error") It("Should reconcile error", func() { gomock.InOrder( mockEMBuilder.EXPECT().NewManager(gomock.Any()).Return(mockEMClient, nil), @@ -898,7 +944,7 @@ var _ = Describe("UpgradeConfigController", func() { }) Context("When a cluster upgrade client can't be built", func() { - var fakeError = fmt.Errorf("a maintenance builder error") + fakeError := fmt.Errorf("a maintenance builder error") It("does not proceed with upgrading the cluster", func() { gomock.InOrder( mockEMBuilder.EXPECT().NewManager(gomock.Any()).Return(mockEMClient, nil), @@ -937,7 +983,7 @@ var _ = Describe("UpgradeConfigController", func() { }) Context("When invoking the upgrader fails", func() { - var fakeError = fmt.Errorf("the upgrader failed") + fakeError := fmt.Errorf("the upgrader failed") It("reacts accordingly", func() { gomock.InOrder( mockEMBuilder.EXPECT().NewManager(gomock.Any()).Return(mockEMClient, nil), @@ -963,7 +1009,6 @@ var _ = Describe("UpgradeConfigController", func() { upgradeConfig.Status.History[0].Phase = upgradev1alpha1.UpgradePhaseUpgraded upgradeConfig.Status.History[0].StartTime = &metav1.Time{Time: time.Now()} upgradeConfig.Status.History[0].CompleteTime = &metav1.Time{Time: time.Now()} - }) It("reports metrics", func() { gomock.InOrder( @@ -1002,7 +1047,6 @@ var _ = Describe("UpgradeConfigController", func() { Expect(err).NotTo(HaveOccurred()) Expect(result.RequeueAfter).To(BeZero()) - }) }) @@ -1027,7 +1071,6 @@ var _ = Describe("UpgradeConfigController", func() { Expect(err).NotTo(HaveOccurred()) Expect(result.RequeueAfter).To(BeZero()) - }) }) })