diff --git a/cmd/osde2e/cleanup/cmd.go b/cmd/osde2e/cleanup/cmd.go index aec933a530..20849f9ce0 100644 --- a/cmd/osde2e/cleanup/cmd.go +++ b/cmd/osde2e/cleanup/cmd.go @@ -26,7 +26,11 @@ var Cmd = &cobra.Command{ Short: "Cleans up expired clusters or a specific cluster.", Long: "Cleans up expired clusters or a specific cluster.", Args: cobra.OnlyValidArgs, - RunE: run, + RunE: func(cmd *cobra.Command, _ []string) error { + msg, err := run(cmd.Context()) + sendSlackNotification(msg, err) + return err + }, } var args struct { @@ -179,19 +183,30 @@ func collectActiveClusters() (map[string]bool, error) { return activeClusters, nil } -//nolint:gocyclo -func run(cmd *cobra.Command, argv []string) error { - var err error - if err = common.LoadConfigs(args.configString, args.customConfig, args.secretLocations); err != nil { - return fmt.Errorf("error loading initial state: %v", err) +// sendSlackNotification sends the cleanup summary to Slack if sendSummary is set and webhook is configured. +// When runErr is non-nil, it appends the run failure to the message summary. +func sendSlackNotification(msg Message, runErr error) { + if !args.sendSummary { + return } - fmtDuration, err := time.ParseDuration(args.olderThan) - if err != nil { - return fmt.Errorf("error parsing --older-than: %v", err) + webhook := viper.GetString(config.Tests.SlackWebhook) + if webhook == "" { + fmt.Println("Slack Webhook is not set, skipping notification.") + return } + if runErr != nil { + msg.Summary += "\n\nRun failed: " + runErr.Error() + } + ctx := context.Background() + if err := commonslack.SendWebhook(ctx, webhook, msg); err != nil { + fmt.Printf("Failed to send slack notification: %v\n", err) + return + } + fmt.Println("Slack notification sent successfully") +} - // message format: `{"summary":"", "buildfile":"", "s3":"", - // "iam":"", "ip":"", "ec2":"", "vpc":""}` +//nolint:gocyclo +func run(_ context.Context) (msg Message, err error) { var summaryBuilder strings.Builder var iamErrorBuilder strings.Builder var s3ErrorBuilder strings.Builder @@ -199,6 +214,35 @@ func run(cmd *cobra.Command, argv []string) error { var ec2ErrorBuilder strings.Builder var vpcErrorBuilder strings.Builder + defer func() { + buildFile := "" + if strings.Contains(viper.GetString(config.JobName), "rehearse") { + basePRJobURL := "https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/test-platform-results/pr-logs/pull/openshift_release" + buildFile += basePRJobURL + "/" + os.Getenv("PULL_NUMBER") + } else { + buildFile += viper.GetString(config.BaseJobURL) + } + buildFile += "/" + viper.GetString(config.JobName) + + "/" + viper.GetString(config.JobID) + "/artifacts/test/build-log.txt" + msg = Message{ + Summary: summaryBuilder.String(), + BuildFile: "Build Logs: " + buildFile, + S3Errors: "S3 Errors: " + s3ErrorBuilder.String(), + IAMErrors: "IAM Errors: " + iamErrorBuilder.String(), + IPErrors: "IP Errors: " + ipErrorBuilder.String(), + EC2Errors: "EC2 Errors: " + ec2ErrorBuilder.String(), + VPCErrors: "VPC Errors: " + vpcErrorBuilder.String(), + } + }() + + if err = common.LoadConfigs(args.configString, args.customConfig, args.secretLocations); err != nil { + return msg, err + } + fmtDuration, err := time.ParseDuration(args.olderThan) + if err != nil { + return msg, fmt.Errorf("error parsing --older-than: %v", err) + } + if args.dryRun { summaryBuilder.WriteString("-- Cleanup dry run -- \n") } @@ -213,7 +257,7 @@ func run(cmd *cobra.Command, argv []string) error { // Collect active clusters once for all cleanup operations activeClusters, err := collectActiveClusters() if err != nil { - return fmt.Errorf("could not collect active clusters: %v", err) + return msg, fmt.Errorf("could not collect active clusters: %v", err) } log.Printf("Found %d active clusters for cleanup operations\n", len(activeClusters)) @@ -223,19 +267,19 @@ func run(cmd *cobra.Command, argv []string) error { err = aws.CcsAwsSession.CleanupVPCs(activeClusters, args.dryRun, args.sendSummary, &vpcDeletedCounter, &vpcFailedCounter, &vpcErrorBuilder) summaryBuilder.WriteString("VPCs: " + strconv.Itoa(vpcDeletedCounter) + "/" + strconv.Itoa(vpcFailedCounter) + "\n") if err != nil { - return fmt.Errorf("could not cleanup vpc resources: %s", err.Error()) + return msg, fmt.Errorf("could not cleanup vpc resources: %s", err.Error()) } } if args.clusters { provider, err := ocmprovider.NewWithEnv(viper.GetString(ocmprovider.Env)) if err != nil { - return fmt.Errorf("could not setup cluster provider: %v", err) + return msg, fmt.Errorf("could not setup cluster provider: %v", err) } clusters, err := provider.ListClusters("properties.MadeByOSDe2e='true'") if err != nil { - return err + return msg, err } // delete clusters older than cutoffTime cutoffTime := time.Now().UTC().Add(-fmtDuration) @@ -258,20 +302,19 @@ func run(cmd *cobra.Command, argv []string) error { if args.clusterID != "" { provider, err := ocmprovider.NewWithEnv(viper.GetString(ocmprovider.Env)) if err != nil { - return fmt.Errorf("could not setup cluster provider: %v", err) + return msg, fmt.Errorf("could not setup cluster provider: %v", err) } cluster, err := provider.GetCluster(args.clusterID) if err != nil { - return fmt.Errorf("cluster id: %s not found, unable to delete it", args.clusterID) + return msg, fmt.Errorf("cluster id: %s not found, unable to delete it", args.clusterID) } fmt.Printf("Cluster will be deleted: %s \n", cluster.ID()) if !args.dryRun { if err = provider.DeleteCluster(cluster.ID()); err != nil { - return fmt.Errorf("failed to delete cluster: %v", err) - } else { - fmt.Println("Uninstall started successfully") + return msg, fmt.Errorf("failed to delete cluster: %v", err) } + fmt.Println("Uninstall started successfully") } } @@ -281,14 +324,14 @@ func run(cmd *cobra.Command, argv []string) error { err = aws.CcsAwsSession.CleanupOpenIDConnectProviders(activeClusters, args.dryRun, args.sendSummary, &oidcDeletedCounter, &oidcFailedCounter, &iamErrorBuilder) summaryBuilder.WriteString("OIDC providers: " + strconv.Itoa(oidcDeletedCounter) + "/" + strconv.Itoa(oidcFailedCounter) + "\n") if err != nil { - return fmt.Errorf("could not delete OIDC providers: %s", err.Error()) + return msg, fmt.Errorf("could not delete OIDC providers: %s", err.Error()) } rolesDeletedCounter := 0 rolesFailedCounter := 0 err = aws.CcsAwsSession.CleanupRoles(activeClusters, args.dryRun, args.sendSummary, &rolesDeletedCounter, &rolesFailedCounter, &iamErrorBuilder) summaryBuilder.WriteString("Roles: " + strconv.Itoa(rolesDeletedCounter) + "/" + strconv.Itoa(rolesFailedCounter) + "\n") if err != nil { - return fmt.Errorf("could not delete IAM roles: %s", err.Error()) + return msg, fmt.Errorf("could not delete IAM roles: %s", err.Error()) } } @@ -298,7 +341,7 @@ func run(cmd *cobra.Command, argv []string) error { err = aws.CcsAwsSession.CleanupS3Buckets(activeClusters, args.dryRun, args.sendSummary, &s3BucketDeletedCounter, &s3BucketFailedCounter, &s3ErrorBuilder) summaryBuilder.WriteString("S3 Buckets: " + strconv.Itoa(s3BucketDeletedCounter) + "/" + strconv.Itoa(s3BucketFailedCounter) + "\n") if err != nil { - return fmt.Errorf("could not delete s3 buckets: %s", err.Error()) + return msg, fmt.Errorf("could not delete s3 buckets: %s", err.Error()) } } @@ -307,7 +350,7 @@ func run(cmd *cobra.Command, argv []string) error { summaryBuilder.WriteString("EC2 Instances: " + strconv.Itoa(instancesDeleted) + "/" + strconv.Itoa(instancesFailedToDelete) + "\n") if err != nil { if !errors.Is(err, aws.ErrTerminateEC2Instances) { - return fmt.Errorf("could not terminate ec2 instances: %s", err.Error()) + return msg, fmt.Errorf("could not terminate ec2 instances: %s", err.Error()) } ec2ErrorMessage := err.Error() if len(ec2ErrorMessage) > config.SlackMessageLength { @@ -323,44 +366,9 @@ func run(cmd *cobra.Command, argv []string) error { err = aws.CcsAwsSession.ReleaseElasticIPs(args.dryRun, args.sendSummary, &elasticIpDeletedCounter, &elasticIpFailedCounter, &ipErrorBuilder) summaryBuilder.WriteString("Elastic IPs: " + strconv.Itoa(elasticIpDeletedCounter) + "/" + strconv.Itoa(elasticIpFailedCounter) + "\n") if err != nil { - return fmt.Errorf("could not release ips: %s", err.Error()) - } - } - - if args.sendSummary { - webhook := viper.GetString(config.Tests.SlackWebhook) - if webhook == "" { - fmt.Println("Slack Webhook is not set, skipping notification.") - return nil - } - buildFile := "" - if strings.Contains(viper.GetString(config.JobName), "rehearse") { - basePRJobURL := "https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/test-platform-results/pr-logs/pull/openshift_release" - buildFile += basePRJobURL + "/" + os.Getenv("PULL_NUMBER") - } else { - buildFile += viper.GetString(config.BaseJobURL) + return msg, fmt.Errorf("could not release ips: %s", err.Error()) } - buildFile += "/" + viper.GetString(config.JobName) + - "/" + viper.GetString(config.JobID) + "/artifacts/test/build-log.txt" - - message := Message{ - Summary: summaryBuilder.String(), - BuildFile: "Build Logs: " + buildFile, - S3Errors: "S3 Errors: " + s3ErrorBuilder.String(), - IAMErrors: "IAM Errors: " + iamErrorBuilder.String(), - IPErrors: "IP Errors: " + ipErrorBuilder.String(), - EC2Errors: "EC2 Errors: " + ec2ErrorBuilder.String(), - VPCErrors: "VPC Errors: " + vpcErrorBuilder.String(), - } - - // Send notification using common slack package - ctx := context.Background() - if err := commonslack.SendWebhook(ctx, webhook, message); err != nil { - return fmt.Errorf("failed to send slack notification: %w", err) - } - - fmt.Println("Slack notification sent successfully") } - return nil + return msg, nil }