Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 69 additions & 61 deletions cmd/osde2e/cleanup/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@ var Cmd = &cobra.Command{
Short: "Cleans up expired clusters or a specific cluster.",
Long: "Cleans up expired clusters or a specific cluster.",
Args: cobra.OnlyValidArgs,
RunE: run,
RunE: func(cmd *cobra.Command, _ []string) error {
msg, err := run(cmd.Context())
sendSlackNotification(msg, err)
return err
},
}

var args struct {
Expand Down Expand Up @@ -179,26 +183,66 @@ func collectActiveClusters() (map[string]bool, error) {
return activeClusters, nil
}

//nolint:gocyclo
func run(cmd *cobra.Command, argv []string) error {
var err error
if err = common.LoadConfigs(args.configString, args.customConfig, args.secretLocations); err != nil {
return fmt.Errorf("error loading initial state: %v", err)
// sendSlackNotification sends the cleanup summary to Slack if sendSummary is set and webhook is configured.
// When runErr is non-nil, it appends the run failure to the message summary.
func sendSlackNotification(msg Message, runErr error) {
if !args.sendSummary {
return
}
fmtDuration, err := time.ParseDuration(args.olderThan)
if err != nil {
return fmt.Errorf("error parsing --older-than: %v", err)
webhook := viper.GetString(config.Tests.SlackWebhook)
if webhook == "" {
fmt.Println("Slack Webhook is not set, skipping notification.")
return
}
if runErr != nil {
msg.Summary += "\n\nRun failed: " + runErr.Error()
}
ctx := context.Background()
if err := commonslack.SendWebhook(ctx, webhook, msg); err != nil {
fmt.Printf("Failed to send slack notification: %v\n", err)
return
}
fmt.Println("Slack notification sent successfully")
}

// message format: `{"summary":"<summary>", "buildfile":"<buildfile>", "s3":"<s3 errors>",
// "iam":"<iam errors>", "ip":"<ip errors>", "ec2":"<ec2 errors>", "vpc":"<vpc errors>"}`
//nolint:gocyclo
func run(_ context.Context) (msg Message, err error) {
var summaryBuilder strings.Builder
var iamErrorBuilder strings.Builder
var s3ErrorBuilder strings.Builder
var ipErrorBuilder strings.Builder
var ec2ErrorBuilder strings.Builder
var vpcErrorBuilder strings.Builder

defer func() {
buildFile := ""
if strings.Contains(viper.GetString(config.JobName), "rehearse") {
basePRJobURL := "https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/test-platform-results/pr-logs/pull/openshift_release"
buildFile += basePRJobURL + "/" + os.Getenv("PULL_NUMBER")
} else {
buildFile += viper.GetString(config.BaseJobURL)
}
buildFile += "/" + viper.GetString(config.JobName) +
"/" + viper.GetString(config.JobID) + "/artifacts/test/build-log.txt"
msg = Message{
Summary: summaryBuilder.String(),
BuildFile: "Build Logs: " + buildFile,
S3Errors: "S3 Errors: " + s3ErrorBuilder.String(),
IAMErrors: "IAM Errors: " + iamErrorBuilder.String(),
IPErrors: "IP Errors: " + ipErrorBuilder.String(),
EC2Errors: "EC2 Errors: " + ec2ErrorBuilder.String(),
VPCErrors: "VPC Errors: " + vpcErrorBuilder.String(),
}
}()

if err = common.LoadConfigs(args.configString, args.customConfig, args.secretLocations); err != nil {
return msg, err
}
fmtDuration, err := time.ParseDuration(args.olderThan)
if err != nil {
return msg, fmt.Errorf("error parsing --older-than: %v", err)
}

if args.dryRun {
summaryBuilder.WriteString("-- Cleanup dry run -- \n")
}
Expand All @@ -213,7 +257,7 @@ func run(cmd *cobra.Command, argv []string) error {
// Collect active clusters once for all cleanup operations
activeClusters, err := collectActiveClusters()
if err != nil {
return fmt.Errorf("could not collect active clusters: %v", err)
return msg, fmt.Errorf("could not collect active clusters: %v", err)
}
log.Printf("Found %d active clusters for cleanup operations\n", len(activeClusters))

Expand All @@ -223,19 +267,19 @@ func run(cmd *cobra.Command, argv []string) error {
err = aws.CcsAwsSession.CleanupVPCs(activeClusters, args.dryRun, args.sendSummary, &vpcDeletedCounter, &vpcFailedCounter, &vpcErrorBuilder)
summaryBuilder.WriteString("VPCs: " + strconv.Itoa(vpcDeletedCounter) + "/" + strconv.Itoa(vpcFailedCounter) + "\n")
if err != nil {
return fmt.Errorf("could not cleanup vpc resources: %s", err.Error())
return msg, fmt.Errorf("could not cleanup vpc resources: %s", err.Error())
}
}

if args.clusters {
provider, err := ocmprovider.NewWithEnv(viper.GetString(ocmprovider.Env))
if err != nil {
return fmt.Errorf("could not setup cluster provider: %v", err)
return msg, fmt.Errorf("could not setup cluster provider: %v", err)
}

clusters, err := provider.ListClusters("properties.MadeByOSDe2e='true'")
if err != nil {
return err
return msg, err
}
// delete clusters older than cutoffTime
cutoffTime := time.Now().UTC().Add(-fmtDuration)
Expand All @@ -258,20 +302,19 @@ func run(cmd *cobra.Command, argv []string) error {
if args.clusterID != "" {
provider, err := ocmprovider.NewWithEnv(viper.GetString(ocmprovider.Env))
if err != nil {
return fmt.Errorf("could not setup cluster provider: %v", err)
return msg, fmt.Errorf("could not setup cluster provider: %v", err)
}
cluster, err := provider.GetCluster(args.clusterID)
if err != nil {
return fmt.Errorf("cluster id: %s not found, unable to delete it", args.clusterID)
return msg, fmt.Errorf("cluster id: %s not found, unable to delete it", args.clusterID)
}

fmt.Printf("Cluster will be deleted: %s \n", cluster.ID())
if !args.dryRun {
if err = provider.DeleteCluster(cluster.ID()); err != nil {
return fmt.Errorf("failed to delete cluster: %v", err)
} else {
fmt.Println("Uninstall started successfully")
return msg, fmt.Errorf("failed to delete cluster: %v", err)
}
fmt.Println("Uninstall started successfully")
}
}

Expand All @@ -281,14 +324,14 @@ func run(cmd *cobra.Command, argv []string) error {
err = aws.CcsAwsSession.CleanupOpenIDConnectProviders(activeClusters, args.dryRun, args.sendSummary, &oidcDeletedCounter, &oidcFailedCounter, &iamErrorBuilder)
summaryBuilder.WriteString("OIDC providers: " + strconv.Itoa(oidcDeletedCounter) + "/" + strconv.Itoa(oidcFailedCounter) + "\n")
if err != nil {
return fmt.Errorf("could not delete OIDC providers: %s", err.Error())
return msg, fmt.Errorf("could not delete OIDC providers: %s", err.Error())
}
rolesDeletedCounter := 0
rolesFailedCounter := 0
err = aws.CcsAwsSession.CleanupRoles(activeClusters, args.dryRun, args.sendSummary, &rolesDeletedCounter, &rolesFailedCounter, &iamErrorBuilder)
summaryBuilder.WriteString("Roles: " + strconv.Itoa(rolesDeletedCounter) + "/" + strconv.Itoa(rolesFailedCounter) + "\n")
if err != nil {
return fmt.Errorf("could not delete IAM roles: %s", err.Error())
return msg, fmt.Errorf("could not delete IAM roles: %s", err.Error())
}
}

Expand All @@ -298,7 +341,7 @@ func run(cmd *cobra.Command, argv []string) error {
err = aws.CcsAwsSession.CleanupS3Buckets(activeClusters, args.dryRun, args.sendSummary, &s3BucketDeletedCounter, &s3BucketFailedCounter, &s3ErrorBuilder)
summaryBuilder.WriteString("S3 Buckets: " + strconv.Itoa(s3BucketDeletedCounter) + "/" + strconv.Itoa(s3BucketFailedCounter) + "\n")
if err != nil {
return fmt.Errorf("could not delete s3 buckets: %s", err.Error())
return msg, fmt.Errorf("could not delete s3 buckets: %s", err.Error())
}
}

Expand All @@ -307,7 +350,7 @@ func run(cmd *cobra.Command, argv []string) error {
summaryBuilder.WriteString("EC2 Instances: " + strconv.Itoa(instancesDeleted) + "/" + strconv.Itoa(instancesFailedToDelete) + "\n")
if err != nil {
if !errors.Is(err, aws.ErrTerminateEC2Instances) {
return fmt.Errorf("could not terminate ec2 instances: %s", err.Error())
return msg, fmt.Errorf("could not terminate ec2 instances: %s", err.Error())
}
ec2ErrorMessage := err.Error()
if len(ec2ErrorMessage) > config.SlackMessageLength {
Expand All @@ -323,44 +366,9 @@ func run(cmd *cobra.Command, argv []string) error {
err = aws.CcsAwsSession.ReleaseElasticIPs(args.dryRun, args.sendSummary, &elasticIpDeletedCounter, &elasticIpFailedCounter, &ipErrorBuilder)
summaryBuilder.WriteString("Elastic IPs: " + strconv.Itoa(elasticIpDeletedCounter) + "/" + strconv.Itoa(elasticIpFailedCounter) + "\n")
if err != nil {
return fmt.Errorf("could not release ips: %s", err.Error())
}
}

if args.sendSummary {
webhook := viper.GetString(config.Tests.SlackWebhook)
if webhook == "" {
fmt.Println("Slack Webhook is not set, skipping notification.")
return nil
}
buildFile := ""
if strings.Contains(viper.GetString(config.JobName), "rehearse") {
basePRJobURL := "https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/test-platform-results/pr-logs/pull/openshift_release"
buildFile += basePRJobURL + "/" + os.Getenv("PULL_NUMBER")
} else {
buildFile += viper.GetString(config.BaseJobURL)
return msg, fmt.Errorf("could not release ips: %s", err.Error())
}
buildFile += "/" + viper.GetString(config.JobName) +
"/" + viper.GetString(config.JobID) + "/artifacts/test/build-log.txt"

message := Message{
Summary: summaryBuilder.String(),
BuildFile: "Build Logs: " + buildFile,
S3Errors: "S3 Errors: " + s3ErrorBuilder.String(),
IAMErrors: "IAM Errors: " + iamErrorBuilder.String(),
IPErrors: "IP Errors: " + ipErrorBuilder.String(),
EC2Errors: "EC2 Errors: " + ec2ErrorBuilder.String(),
VPCErrors: "VPC Errors: " + vpcErrorBuilder.String(),
}

// Send notification using common slack package
ctx := context.Background()
if err := commonslack.SendWebhook(ctx, webhook, message); err != nil {
return fmt.Errorf("failed to send slack notification: %w", err)
}

fmt.Println("Slack notification sent successfully")
}

return nil
return msg, nil
}