-
Notifications
You must be signed in to change notification settings - Fork 66
Expand file tree
/
Copy pathmustgather_controller.go
More file actions
702 lines (610 loc) · 26.3 KB
/
mustgather_controller.go
File metadata and controls
702 lines (610 loc) · 26.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
/*
Copyright 2022.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package mustgather
import (
"context"
goerror "errors"
"fmt"
"os"
"time"
"github.com/go-logr/logr"
configv1 "github.com/openshift/api/config/v1"
imagev1 "github.com/openshift/api/image/v1"
mustgatherv1alpha1 "github.com/openshift/must-gather-operator/api/v1alpha1"
"github.com/openshift/must-gather-operator/pkg/localmetrics"
"github.com/redhat-cop/operator-utils/pkg/util"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
apimeta "k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
)
const (
ControllerName = "mustgather-controller"
// default namespace is always present
DefaultMustGatherNamespace = "default"
)
var log = logf.Log.WithName(ControllerName)
// blank assignment to verify that MustGatherReconciler implements reconcile.Reconciler
var _ reconcile.Reconciler = &MustGatherReconciler{}
// MustGatherReconciler reconciles a MustGather object
type MustGatherReconciler struct {
// This client, initialized using mgr.Client() above, is a split client
// that reads objects from the cache and writes to the apiserver
util.ReconcilerBase
// TrustedCAConfigMap is the name of the ConfigMap containing the trusted CA certificate bundle
TrustedCAConfigMap string
// OperatorNamespace is the namespace where the operator is running
OperatorNamespace string
// DefaultMustGatherImage is the default must-gather image
DefaultMustGatherImage string
}
const mustGatherFinalizer = "finalizer.mustgathers.operator.openshift.io"
//+kubebuilder:rbac:groups=operator.openshift.io,resources=mustgathers,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=operator.openshift.io,resources=mustgathers/status,verbs=get;update;patch
//+kubebuilder:rbac:groups=operator.openshift.io,resources=mustgathers/finalizers,verbs=update
//+kubebuilder:rbac:groups=config.openshift.io,resources=clusterversions,verbs=get;list;watch
//+kubebuilder:rbac:groups=batch,resources=jobs;jobs/finalizers,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=monitoring.coreos.com,resources=servicemonitors,verbs=get;create
//+kubebuilder:rbac:groups=apps,resources=deployments;daemonsets;replicasets;statefulsets,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=apps,resources=deployments/finalizers,verbs=update
//+kubebuilder:rbac:groups=image.openshift.io,resources=imagestreams,verbs=get;list;watch
//+kubebuilder:rbac:groups="",resources=pods;services;services/finalizers;endpoints;persistentvolumeclaims;events;configmaps;secrets,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups="",resources=serviceaccounts,verbs=get;list;watch
// ServiceAccount read access needed for pre-flight validation before Job creation
// Reconcile is part of the main kubernetes reconciliation loop which aims to
// move the current state of the cluster closer to the desired state.
// TODO(user): Modify the Reconcile function to compare the state specified by
// the MustGather object against the actual cluster state, and then
// perform operations to make the cluster state reflect the state specified by
// the user.
//
// For more details, check Reconcile and its Result here:
// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.11.2/pkg/reconcile
func (r *MustGatherReconciler) Reconcile(ctx context.Context, request reconcile.Request) (reconcile.Result, error) {
reqLogger := log.WithValues("Request.Namespace", request.Namespace, "Request.Name", request.Name)
reqLogger.Info("Reconciling MustGather")
// Fetch the MustGather instance
instance := &mustgatherv1alpha1.MustGather{}
err := r.GetClient().Get(ctx, request.NamespacedName, instance)
if err != nil {
if errors.IsNotFound(err) {
// Request object not found, could have been deleted after reconcile request.
// Owned objects are automatically garbage collected. For additional cleanup logic use finalizers.
// Return and don't requeue
return reconcile.Result{}, nil
}
// Error reading the object - requeue the request.
return reconcile.Result{}, err
}
// Check if the MustGather instance is marked to be deleted, which is
// indicated by the deletion timestamp being set.
isMustGatherMarkedToBeDeleted := instance.GetDeletionTimestamp() != nil
if isMustGatherMarkedToBeDeleted {
reqLogger.Info("mustgather instance is marked for deletion")
if contains(instance.GetFinalizers(), mustGatherFinalizer) {
// Run finalization logic for mustGatherFinalizer. If the
// finalization logic fails, don't remove the finalizer so
// that we can retry during the next reconciliation.
// Clean up resources if RetainResourcesOnCompletion is false (default behavior)
if instance.Spec.RetainResourcesOnCompletion == nil || !*instance.Spec.RetainResourcesOnCompletion {
reqLogger.V(4).Info("running finalization logic for mustGatherFinalizer")
err := r.cleanupMustGatherResources(ctx, reqLogger, instance)
if err != nil {
reqLogger.Error(err, "failed to cleanup MustGather resources during deletion")
return reconcile.Result{}, err
}
}
// Remove mustGatherFinalizer. Once all finalizers have been
// removed, the object will be deleted.
instance.SetFinalizers(remove(instance.GetFinalizers(), mustGatherFinalizer))
err := r.GetClient().Update(ctx, instance)
if err != nil {
return r.ManageError(ctx, instance, err)
}
}
return reconcile.Result{}, nil
}
// Add finalizer for this CR
if !contains(instance.GetFinalizers(), mustGatherFinalizer) {
return reconcile.Result{}, r.addFinalizer(ctx, reqLogger, instance)
}
// perform CA config map copy, iff set in caller
if r.TrustedCAConfigMap != "" {
if err := r.ensureTrustedCAConfigMap(ctx, reqLogger, instance); err != nil {
log.Error(err, "failed to ensure trustedCA ConfigMap exists")
return r.ManageError(ctx, instance, err)
}
}
job, err := r.getJobFromInstance(ctx, instance)
if err != nil {
log.Error(err, "unable to get job from", "instance", instance)
return r.ManageError(ctx, instance, err)
}
job1 := &batchv1.Job{}
err = r.GetClient().Get(ctx, types.NamespacedName{
Name: job.GetName(),
Namespace: job.GetNamespace(),
}, job1)
if err != nil {
if !errors.IsNotFound(err) {
// Error reading the object - requeue the request.
log.Error(err, "unable to look up", "job", types.NamespacedName{
Name: job.GetName(),
Namespace: job.GetNamespace(),
})
return r.ManageError(ctx, instance, err)
}
// Validate that the ServiceAccount exists before creating the Job.
// This prevents the Job from being stuck in pending state due to a missing ServiceAccount.
// If no ServiceAccount is specified, default to "default" which should exist in all namespaces.
// Note: If the "default" SA has been deleted, this validation will catch it and report an error.
saName := instance.Spec.ServiceAccountName
if saName == "" {
saName = "default"
log.Info("no serviceAccountName specified, defaulting to 'default'", "namespace", instance.Namespace)
}
serviceAccount := &corev1.ServiceAccount{}
err = r.GetClient().Get(ctx, types.NamespacedName{
Namespace: instance.Namespace,
Name: saName,
}, serviceAccount)
if err != nil {
if errors.IsNotFound(err) {
log.Error(err, "service account not found", "name", saName, "namespace", instance.Namespace)
return r.setValidationFailureStatus(ctx, reqLogger, instance, ValidationServiceAccount, err)
}
log.Error(err, "failed to get service account (transient error, will retry)", "name", saName, "namespace", instance.Namespace)
return reconcile.Result{Requeue: true}, err
}
// look up user secret
if instance.Spec.UploadTarget != nil && instance.Spec.UploadTarget.SFTP != nil && instance.Spec.UploadTarget.SFTP.CaseManagementAccountSecretRef.Name != "" {
secretName := instance.Spec.UploadTarget.SFTP.CaseManagementAccountSecretRef.Name
userSecret := &corev1.Secret{}
err = r.GetClient().Get(ctx, types.NamespacedName{
Namespace: instance.Namespace,
Name: secretName,
}, userSecret)
if err != nil {
if errors.IsNotFound(err) {
log.Error(err, "secret not found", "secret", secretName, "namespace", instance.Namespace)
return r.ManageError(ctx, instance, fmt.Errorf("secret %s not found in namespace %s: Please create the secret referenced by caseManagementAccountSecretRef", secretName, instance.Namespace))
}
log.Error(err, "error getting secret", "secret", secretName)
return reconcile.Result{Requeue: true}, err
}
// Validate and extract required credentials
username, usernameExists := userSecret.Data["username"]
password, passwordExists := userSecret.Data["password"]
if !usernameExists || len(username) == 0 {
validationErr := fmt.Errorf("sftp credentials secret %q is missing required field 'username'", secretName)
reqLogger.Error(validationErr, "sftp credential validation failed")
return r.setValidationFailureStatus(ctx, reqLogger, instance, ValidationSFTPCredentials, validationErr)
}
if !passwordExists || len(password) == 0 {
validationErr := fmt.Errorf("sftp credentials secret %q is missing required field 'password'", secretName)
reqLogger.Error(validationErr, "sftp credential validation failed")
return r.setValidationFailureStatus(ctx, reqLogger, instance, ValidationSFTPCredentials, validationErr)
}
// Validate SFTP credentials before creating the job
reqLogger.Info("Validating SFTP credentials before creating must-gather job")
validationErr := validateSFTPWithRetry(
ctx,
reqLogger,
string(username),
string(password),
instance.Spec.UploadTarget.SFTP.Host,
)
if validationErr != nil {
reqLogger.Error(validationErr, "SFTP credential validation failed")
return r.setValidationFailureStatus(ctx, reqLogger, instance, ProtocolSFTP, validationErr)
}
reqLogger.Info("SFTP credentials validated successfully")
}
// job is not there, create it.
err = r.CreateResourceIfNotExists(ctx, instance, instance.Namespace, job)
if err != nil {
log.Error(err, "unable to create", "job", job)
return r.ManageError(ctx, instance, err)
}
// Increment prometheus metrics for must gather total
localmetrics.MetricMustGatherTotal.Inc()
return r.ManageSuccess(ctx, instance)
}
// Check status of job and update any metric counts
if job1.Status.Active > 0 {
reqLogger.Info("mustgather Job pods are still running")
} else {
// if the job has been marked as Succeeded or Failed but instance has no DeletionTimestamp,
// requeue instance to handle resource clean-up (delete secret, job, and MustGather)
if job1.Status.Succeeded > 0 {
reqLogger.Info("mustgather Job pods succeeded")
// Update the MustGather CR status to indicate success
instance.Status.Status = "Completed"
instance.Status.Completed = true
instance.Status.Reason = "MustGather Job pods succeeded"
err := r.GetClient().Status().Update(ctx, instance)
if err != nil {
log.Error(err, "unable to update instance", "instance", instance)
return r.ManageError(ctx, instance, err)
}
// Clean up resources if RetainResourcesOnCompletion is false (default behavior)
if instance.Spec.RetainResourcesOnCompletion == nil || !*instance.Spec.RetainResourcesOnCompletion {
err := r.cleanupMustGatherResources(ctx, reqLogger, instance)
if err != nil {
reqLogger.Error(err, "failed to cleanup MustGather resources")
return r.ManageError(ctx, instance, err)
}
}
return reconcile.Result{}, nil
}
backoffLimit := int32(0)
if job1.Spec.BackoffLimit != nil {
backoffLimit = *job1.Spec.BackoffLimit
}
if job1.Status.Failed > backoffLimit {
reqLogger.Info("MustGather Job pods failed")
// Increment prometheus metrics for must gather errors
localmetrics.MetricMustGatherErrors.Inc()
// Update the MustGather CR status to indicate failure
instance.Status.Status = "Failed"
instance.Status.Completed = true
instance.Status.Reason = "MustGather Job pods failed"
err := r.GetClient().Status().Update(ctx, instance)
if err != nil {
log.Error(err, "unable to update instance", "instance", instance)
return r.ManageError(ctx, instance, err)
}
// Clean up resources if RetainResourcesOnCompletion is false (default behavior)
if instance.Spec.RetainResourcesOnCompletion == nil || !*instance.Spec.RetainResourcesOnCompletion {
err := r.cleanupMustGatherResources(ctx, reqLogger, instance)
if err != nil {
reqLogger.Error(err, "failed to cleanup MustGather resources")
return r.ManageError(ctx, instance, err)
}
}
return reconcile.Result{}, nil
}
}
// if we get here it means that either
// 1. the mustgather instance was updated, which we don't support and we are going to ignore
// 2. the job was updated, probably the status piece. we should the update the status of the instance, not supported yet.
return r.updateStatus(ctx, instance, job1)
}
func (r *MustGatherReconciler) updateStatus(ctx context.Context, instance *mustgatherv1alpha1.MustGather, job *batchv1.Job) (reconcile.Result, error) {
instance.Status.Completed = !job.Status.CompletionTime.IsZero()
return r.ManageSuccess(ctx, instance)
}
// setValidationFailureStatus updates the MustGather status to indicate a validation failure.
// It sets the status to Failed, marks it as completed, updates the reason with the validation type, and sets the timestamp.
// validationType should describe what kind of validation failed (e.g., "SFTP", "Service Account", "Secret").
func (r *MustGatherReconciler) setValidationFailureStatus(
ctx context.Context,
reqLogger logr.Logger,
instance *mustgatherv1alpha1.MustGather,
validationType string,
validationErr error,
) (reconcile.Result, error) {
errorMessage := fmt.Sprintf("%s validation failed: %v", validationType, validationErr)
instance.Status.Status = "Failed"
instance.Status.Completed = true
instance.Status.Reason = errorMessage
instance.Status.LastUpdate = metav1.Now()
apimeta.SetStatusCondition(&instance.Status.Conditions, metav1.Condition{
Type: "ReconcileError",
Status: metav1.ConditionTrue,
Reason: "ValidationFailed",
Message: errorMessage,
ObservedGeneration: instance.GetGeneration(),
})
// Record a warning event for the validation failure
r.GetRecorder().Event(instance, "Warning", "ProcessingError", errorMessage)
if statusErr := r.GetClient().Status().Update(ctx, instance); statusErr != nil {
reqLogger.Error(statusErr, "failed to update status after validation error")
return r.ManageError(ctx, instance, statusErr)
}
return reconcile.Result{}, nil
}
// SetupWithManager sets up the controller with the Manager.
func (r *MustGatherReconciler) SetupWithManager(mgr ctrl.Manager) error {
b := ctrl.NewControllerManagedBy(mgr).
For(&mustgatherv1alpha1.MustGather{}, builder.WithPredicates(resourceGenerationOrFinalizerChangedPredicate())).
Owns(&batchv1.Job{}, builder.WithPredicates(isStateUpdated()))
if r.TrustedCAConfigMap != "" {
b = b.Owns(&corev1.ConfigMap{}, builder.WithPredicates(isNameEquals(r.TrustedCAConfigMap)))
}
return b.Complete(r)
}
// addFinalizer is a function that adds a finalizer for the MustGather CR
func (r *MustGatherReconciler) addFinalizer(ctx context.Context, reqLogger logr.Logger, m *mustgatherv1alpha1.MustGather) error {
reqLogger.Info("Adding Finalizer for the MustGather")
m.SetFinalizers(append(m.GetFinalizers(), mustGatherFinalizer))
// Update CR
err := r.GetClient().Update(ctx, m)
if err != nil {
reqLogger.Error(err, "Failed to update MustGather with finalizer")
return err
}
return nil
}
func (r *MustGatherReconciler) getJobFromInstance(ctx context.Context, instance *mustgatherv1alpha1.MustGather) (*batchv1.Job, error) {
image, err := r.getMustGatherImage(ctx, instance)
if err != nil {
_, validationErr := r.setValidationFailureStatus(ctx, log, instance, ValidationImageStream, err)
if validationErr != nil {
return nil, fmt.Errorf("failed to set validation failure status: %w, %w", err, validationErr)
}
return nil, err
}
// Inject the operator image URI from the pod's env variables
operatorImage, varPresent := os.LookupEnv("OPERATOR_IMAGE")
if !varPresent {
err := goerror.New("operator image environment variable not found")
log.Error(err, "Error: no operator image found for job template")
return nil, err
}
// Best-effort fetch of cluster creation time (used to clamp since/sinceTime filters).
// Errors here must NOT block must-gather execution.
var clusterCreationTime *time.Time
if t, err := r.getClusterCreationTime(ctx); err != nil {
log.V(2).Info("unable to determine cluster creation time; since filters will not be clamped", "err", err)
} else {
clusterCreationTime = t
}
return getJobTemplate(image, operatorImage, *instance, r.TrustedCAConfigMap, clusterCreationTime), nil
}
func (r *MustGatherReconciler) getClusterCreationTime(ctx context.Context) (*time.Time, error) {
cv := &configv1.ClusterVersion{}
if err := r.GetClient().Get(ctx, types.NamespacedName{Name: "version"}, cv); err != nil {
return nil, err
}
t := cv.CreationTimestamp.Time
return &t, nil
}
func (r *MustGatherReconciler) getMustGatherImage(ctx context.Context, instance *mustgatherv1alpha1.MustGather) (string, error) {
if instance.Spec.ImageStreamRef == nil {
// Use default image
return r.DefaultMustGatherImage, nil
}
// Use custom image from ImageStream
imageStream := &imagev1.ImageStream{}
if err := r.GetClient().Get(ctx, types.NamespacedName{Name: instance.Spec.ImageStreamRef.Name, Namespace: r.OperatorNamespace}, imageStream); err != nil {
return "", fmt.Errorf("failed to get imagestream %s in namespace %s: %w", instance.Spec.ImageStreamRef.Name, r.OperatorNamespace, err)
}
var foundTag bool
var pullable bool
var image string
for _, tag := range imageStream.Status.Tags {
if tag.Tag == instance.Spec.ImageStreamRef.Tag {
foundTag = true
if len(tag.Items) > 0 && tag.Items[0].DockerImageReference != "" {
pullable = true
image = tag.Items[0].DockerImageReference
}
break
}
}
if !foundTag {
return "", fmt.Errorf("imagestream tag %s not found in imagestream %s", instance.Spec.ImageStreamRef.Tag, instance.Spec.ImageStreamRef.Name)
}
if !pullable {
return "", fmt.Errorf("imagestream tag %s in imagestream %s is not pullable", instance.Spec.ImageStreamRef.Tag, instance.Spec.ImageStreamRef.Name)
}
return image, nil
}
// contains is a helper function for finalizer
func contains(list []string, s string) bool {
for _, v := range list {
if v == s {
return true
}
}
return false
}
// remove is a helper function for finalizer
func remove(list []string, s string) []string {
for i, v := range list {
if v == s {
list = append(list[:i], list[i+1:]...)
}
}
return list
}
// cleanupMustGatherResources cleans up the secret, job, and pods associated with a MustGather instance
func (r *MustGatherReconciler) cleanupMustGatherResources(ctx context.Context, reqLogger logr.Logger, instance *mustgatherv1alpha1.MustGather) error {
reqLogger.Info("cleaning up resources")
var err error
// delete job from instance namespace
tmpJob := &batchv1.Job{}
err = r.GetClient().Get(ctx, types.NamespacedName{
Namespace: instance.Namespace,
Name: instance.Name,
}, tmpJob)
if err != nil {
if !errors.IsNotFound(err) {
reqLogger.Info(fmt.Sprintf("failed to get %s job", instance.Name))
return err
}
reqLogger.Info(fmt.Sprintf("job %s not found", instance.Name))
reqLogger.V(4).Info("successfully cleaned up mustgather resources")
return nil
}
// delete pods owned by job
podList := &corev1.PodList{}
listOpts := []client.ListOption{
client.InNamespace(instance.Namespace),
client.MatchingLabels{"controller-uid": string(tmpJob.UID)},
}
if err = r.GetClient().List(ctx, podList, listOpts...); err != nil {
reqLogger.Error(err, "failed to list pods", "Namespace", instance.Namespace, "UID", tmpJob.UID)
return err
}
podObjs := make([]client.Object, len(podList.Items))
for i, tmpPod := range podList.Items {
podObjs[i] = &tmpPod
}
err = r.DeleteResourcesIfExist(ctx, podObjs)
if err != nil {
reqLogger.Error(err, fmt.Sprintf("failed to delete pods for job %s", tmpJob.Name))
return err
}
reqLogger.Info(fmt.Sprintf("deleted pods for job %s", tmpJob.Name))
// finally delete job
err = r.GetClient().Delete(ctx, tmpJob)
if err != nil {
reqLogger.Error(err, fmt.Sprintf("failed to delete %s job", tmpJob.Name))
return err
}
reqLogger.Info(fmt.Sprintf("deleted job %s", tmpJob.Name))
if r.TrustedCAConfigMap != "" {
if err := r.cleanupTrustedCAConfigMap(ctx, reqLogger, instance); err != nil {
reqLogger.Error(err, "failed to cleanup trustedCA ConfigMap")
return err
}
}
reqLogger.V(4).Info("successfully cleaned up mustgather resources")
return nil
}
// ensureTrustedCAConfigMap copies the trustedCA ConfigMap from operator namespace to the CR namespace,
// adds/updates the ownerReference to include the MustGather CR.
func (r *MustGatherReconciler) ensureTrustedCAConfigMap(ctx context.Context, reqLogger logr.Logger, instance *mustgatherv1alpha1.MustGather) error {
if instance.Namespace == r.OperatorNamespace {
reqLogger.V(4).Info("MustGather CR is in the same namespace as the operator, skipping ConfigMap copy")
return nil
}
// fetch source config map
sourceConfigMap := &corev1.ConfigMap{}
err := r.GetClient().Get(ctx, types.NamespacedName{
Namespace: r.OperatorNamespace,
Name: r.TrustedCAConfigMap,
}, sourceConfigMap)
if err != nil {
if errors.IsNotFound(err) {
reqLogger.V(2).Info("trustedCA ConfigMap not found in operator namespace, skipping copy",
"configMapName", r.TrustedCAConfigMap, "operatorNamespace", r.OperatorNamespace)
}
return fmt.Errorf("failed to get trustedCA ConfigMap from operator namespace: %w", err)
}
existingConfigMap := &corev1.ConfigMap{}
err = r.GetClient().Get(ctx, types.NamespacedName{
Namespace: instance.Namespace,
Name: r.TrustedCAConfigMap,
}, existingConfigMap)
if err != nil {
if !errors.IsNotFound(err) {
return fmt.Errorf("failed to check for existing ConfigMap in instance namespace: %w", err)
}
// config map doesn't exist, create it with ownerReference
newConfigMap := &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: r.TrustedCAConfigMap,
Namespace: instance.Namespace,
Labels: sourceConfigMap.Labels,
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: instance.APIVersion,
Kind: instance.Kind,
Name: instance.Name,
UID: instance.UID,
},
},
},
Data: sourceConfigMap.Data,
}
err = r.GetClient().Create(ctx, newConfigMap)
if err != nil {
return fmt.Errorf("failed to create trustedCA ConfigMap in instance namespace: %w", err)
}
reqLogger.V(4).Info("successfully copied trustedCA ConfigMap",
"configMapName", r.TrustedCAConfigMap)
return nil
}
// ConfigMap exists, check if ownerReference for this instance already exists
ownerRefExists := false
for _, ownerRef := range existingConfigMap.OwnerReferences {
if ownerRef.UID == instance.UID {
ownerRefExists = true
break
}
}
// add ownerReference and update config map
if !ownerRefExists {
existingConfigMap.OwnerReferences = append(existingConfigMap.OwnerReferences, metav1.OwnerReference{
APIVersion: instance.APIVersion,
Kind: instance.Kind,
Name: instance.Name,
UID: instance.UID,
})
err = r.GetClient().Update(ctx, existingConfigMap)
if err != nil {
return fmt.Errorf("failed to update ownerReferences on trustedCA ConfigMap: %w", err)
}
reqLogger.V(4).Info("added ownerReference to existing trustedCA ConfigMap",
"configMapName", r.TrustedCAConfigMap)
}
return nil
}
// cleanupTrustedCAConfigMap removes the owner reference for the given instance from the trustedCA ConfigMap.
// If there are other owner references, UPDATE the ConfigMap to remove only this instance's owner reference.
// If the instance is the only owner, the ConfigMap is DELETEd.
func (r *MustGatherReconciler) cleanupTrustedCAConfigMap(ctx context.Context, reqLogger logr.Logger, instance *mustgatherv1alpha1.MustGather) error {
if instance.Namespace == r.OperatorNamespace {
return nil
}
existingConfigMap := &corev1.ConfigMap{}
err := r.GetClient().Get(ctx, types.NamespacedName{
Namespace: instance.Namespace,
Name: r.TrustedCAConfigMap,
}, existingConfigMap)
if err != nil {
// continue cleanup: in absence of the ConfigMap
if errors.IsNotFound(err) {
reqLogger.V(4).Info("trustedCA ConfigMap not found, nothing to cleanup",
"configMapName", r.TrustedCAConfigMap)
return nil
}
return fmt.Errorf("failed to get trustedCA ConfigMap: %w", err)
}
updatedOwnerRefs := make([]metav1.OwnerReference, 0, len(existingConfigMap.OwnerReferences))
for _, ownerRef := range existingConfigMap.OwnerReferences {
if ownerRef.UID != instance.UID {
updatedOwnerRefs = append(updatedOwnerRefs, ownerRef)
}
}
// If no owner references remain, delete the ConfigMap
if len(updatedOwnerRefs) == 0 {
err = r.GetClient().Delete(ctx, existingConfigMap)
if err != nil {
return fmt.Errorf("failed to delete trustedCA ConfigMap: %w", err)
}
reqLogger.V(4).Info("deleted trustedCA ConfigMap",
"configMapName", r.TrustedCAConfigMap)
return nil
}
// Else, update the ConfigMap to remove only this instance's owner reference
updatedConfigMap := existingConfigMap.DeepCopy()
updatedConfigMap.OwnerReferences = updatedOwnerRefs
err = r.GetClient().Update(ctx, updatedConfigMap)
if err != nil {
return fmt.Errorf("failed to update trustedCA ConfigMap owner references: %w", err)
}
reqLogger.V(4).Info("removed ownerReference from trustedCA ConfigMap",
"configMapName", r.TrustedCAConfigMap, "remainingNumOwners", len(updatedOwnerRefs))
return nil
}