Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pkg/controller/chi/worker-reconciler-chi.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ func (w *worker) reconcileCR(ctx context.Context, old, new *api.ClickHouseInstal

new = w.buildCR(ctx, new)

// Check for StatefulSets stuck at 0 replicas when they should have more.
// This must run before ActionPlan check to handle stuck state.
common.FixStuckStatefulSets(ctx, w.a, w.c.kube.STS(), new)

switch {
case new.Spec.Suspend.Value():
// if CR is suspended, should skip reconciliation
Expand Down
4 changes: 4 additions & 0 deletions pkg/controller/chk/worker-reconciler-chk.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ func (w *worker) reconcileCR(ctx context.Context, old, new *apiChk.ClickHouseKee

new = w.buildCR(ctx, new)

// Check for StatefulSets stuck at 0 replicas when they should have more.
// This must run before ActionPlan check to handle stuck state.
common.FixStuckStatefulSets(ctx, w.a, w.c.kube.STS(), new)

switch {
case new.Spec.Suspend.Value():
// if CR is suspended, should skip reconciliation
Expand Down
25 changes: 25 additions & 0 deletions pkg/controller/common/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,15 @@
package common

import (
"context"

"gopkg.in/d4l3k/messagediff.v1"
apps "k8s.io/api/apps/v1"

can "github.com/altinity/clickhouse-operator/pkg/controller/common/announcer"
log "github.com/altinity/clickhouse-operator/pkg/announcer"
api "github.com/altinity/clickhouse-operator/pkg/apis/clickhouse.altinity.com/v1"
"github.com/altinity/clickhouse-operator/pkg/interfaces"
"github.com/altinity/clickhouse-operator/pkg/util"
)

Expand Down Expand Up @@ -68,3 +72,24 @@ func DumpStatefulSetDiff(host *api.Host, cur, new *apps.StatefulSet) {
}
}
}

// FixStuckStatefulSets checks all hosts in a CR for StatefulSets stuck at 0 replicas
// and forces them to scale up to the desired replica count.
func FixStuckStatefulSets(ctx context.Context, announcer can.Announcer, sts interfaces.IKubeSTS, cr api.ICustomResource) {
cr.WalkHosts(func(host *api.Host) error {
curStatefulSet := host.Runtime.CurStatefulSet
if curStatefulSet == nil {
return nil
}

desiredReplicas := host.GetStatefulSetReplicasNum(false)
if curStatefulSet.Spec.Replicas != nil && *curStatefulSet.Spec.Replicas == 0 && desiredReplicas != nil && *desiredReplicas > 0 {
announcer.V(1).M(host).Info("StatefulSet stuck at 0 replicas, forcing scale to %d", *desiredReplicas)
curStatefulSet.Spec.Replicas = desiredReplicas
if _, err := sts.Update(ctx, curStatefulSet); err != nil {
announcer.V(1).M(host).Error("Failed to scale up stuck StatefulSet: %v", err)
}
}
return nil
})
}