Skip to content

Commit 7354025

Browse files
SDCICD-1765: print cleanup errors in slack notification summary
1 parent c2744e8 commit 7354025

File tree

1 file changed

+69
-61
lines changed

1 file changed

+69
-61
lines changed

cmd/osde2e/cleanup/cmd.go

Lines changed: 69 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,11 @@ var Cmd = &cobra.Command{
2626
Short: "Cleans up expired clusters or a specific cluster.",
2727
Long: "Cleans up expired clusters or a specific cluster.",
2828
Args: cobra.OnlyValidArgs,
29-
RunE: run,
29+
RunE: func(cmd *cobra.Command, argv []string) error {
30+
msg, err := run(cmd, argv)
31+
sendSlackNotification(msg, err)
32+
return err
33+
},
3034
}
3135

3236
var args struct {
@@ -179,26 +183,66 @@ func collectActiveClusters() (map[string]bool, error) {
179183
return activeClusters, nil
180184
}
181185

182-
//nolint:gocyclo
183-
func run(cmd *cobra.Command, argv []string) error {
184-
var err error
185-
if err = common.LoadConfigs(args.configString, args.customConfig, args.secretLocations); err != nil {
186-
return fmt.Errorf("error loading initial state: %v", err)
186+
// sendSlackNotification sends the cleanup summary to Slack if sendSummary is set and webhook is configured.
187+
// When runErr is non-nil, it appends the run failure to the message summary.
188+
func sendSlackNotification(msg Message, runErr error) {
189+
if !args.sendSummary {
190+
return
187191
}
188-
fmtDuration, err := time.ParseDuration(args.olderThan)
189-
if err != nil {
190-
return fmt.Errorf("error parsing --older-than: %v", err)
192+
webhook := viper.GetString(config.Tests.SlackWebhook)
193+
if webhook == "" {
194+
fmt.Println("Slack Webhook is not set, skipping notification.")
195+
return
191196
}
197+
if runErr != nil {
198+
msg.Summary += "\n\nRun failed: " + runErr.Error()
199+
}
200+
ctx := context.Background()
201+
if err := commonslack.SendWebhook(ctx, webhook, msg); err != nil {
202+
fmt.Printf("Failed to send slack notification: %v\n", err)
203+
return
204+
}
205+
fmt.Println("Slack notification sent successfully")
206+
}
192207

193-
// message format: `{"summary":"<summary>", "buildfile":"<buildfile>", "s3":"<s3 errors>",
194-
// "iam":"<iam errors>", "ip":"<ip errors>", "ec2":"<ec2 errors>", "vpc":"<vpc errors>"}`
208+
//nolint:gocyclo
209+
func run(cmd *cobra.Command, argv []string) (msg Message, err error) {
195210
var summaryBuilder strings.Builder
196211
var iamErrorBuilder strings.Builder
197212
var s3ErrorBuilder strings.Builder
198213
var ipErrorBuilder strings.Builder
199214
var ec2ErrorBuilder strings.Builder
200215
var vpcErrorBuilder strings.Builder
201216

217+
defer func() {
218+
buildFile := ""
219+
if strings.Contains(viper.GetString(config.JobName), "rehearse") {
220+
basePRJobURL := "https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/test-platform-results/pr-logs/pull/openshift_release"
221+
buildFile += basePRJobURL + "/" + os.Getenv("PULL_NUMBER")
222+
} else {
223+
buildFile += viper.GetString(config.BaseJobURL)
224+
}
225+
buildFile += "/" + viper.GetString(config.JobName) +
226+
"/" + viper.GetString(config.JobID) + "/artifacts/test/build-log.txt"
227+
msg = Message{
228+
Summary: summaryBuilder.String(),
229+
BuildFile: "Build Logs: " + buildFile,
230+
S3Errors: "S3 Errors: " + s3ErrorBuilder.String(),
231+
IAMErrors: "IAM Errors: " + iamErrorBuilder.String(),
232+
IPErrors: "IP Errors: " + ipErrorBuilder.String(),
233+
EC2Errors: "EC2 Errors: " + ec2ErrorBuilder.String(),
234+
VPCErrors: "VPC Errors: " + vpcErrorBuilder.String(),
235+
}
236+
}()
237+
238+
if err = common.LoadConfigs(args.configString, args.customConfig, args.secretLocations); err != nil {
239+
return msg, err
240+
}
241+
fmtDuration, err := time.ParseDuration(args.olderThan)
242+
if err != nil {
243+
return msg, fmt.Errorf("error parsing --older-than: %v", err)
244+
}
245+
202246
if args.dryRun {
203247
summaryBuilder.WriteString("-- Cleanup dry run -- \n")
204248
}
@@ -213,7 +257,7 @@ func run(cmd *cobra.Command, argv []string) error {
213257
// Collect active clusters once for all cleanup operations
214258
activeClusters, err := collectActiveClusters()
215259
if err != nil {
216-
return fmt.Errorf("could not collect active clusters: %v", err)
260+
return msg, fmt.Errorf("could not collect active clusters: %v", err)
217261
}
218262
log.Printf("Found %d active clusters for cleanup operations\n", len(activeClusters))
219263

@@ -223,19 +267,19 @@ func run(cmd *cobra.Command, argv []string) error {
223267
err = aws.CcsAwsSession.CleanupVPCs(activeClusters, args.dryRun, args.sendSummary, &vpcDeletedCounter, &vpcFailedCounter, &vpcErrorBuilder)
224268
summaryBuilder.WriteString("VPCs: " + strconv.Itoa(vpcDeletedCounter) + "/" + strconv.Itoa(vpcFailedCounter) + "\n")
225269
if err != nil {
226-
return fmt.Errorf("could not cleanup vpc resources: %s", err.Error())
270+
return msg, fmt.Errorf("could not cleanup vpc resources: %s", err.Error())
227271
}
228272
}
229273

230274
if args.clusters {
231275
provider, err := ocmprovider.NewWithEnv(viper.GetString(ocmprovider.Env))
232276
if err != nil {
233-
return fmt.Errorf("could not setup cluster provider: %v", err)
277+
return msg, fmt.Errorf("could not setup cluster provider: %v", err)
234278
}
235279

236280
clusters, err := provider.ListClusters("properties.MadeByOSDe2e='true'")
237281
if err != nil {
238-
return err
282+
return msg, err
239283
}
240284
// delete clusters older than cutoffTime
241285
cutoffTime := time.Now().UTC().Add(-fmtDuration)
@@ -258,20 +302,19 @@ func run(cmd *cobra.Command, argv []string) error {
258302
if args.clusterID != "" {
259303
provider, err := ocmprovider.NewWithEnv(viper.GetString(ocmprovider.Env))
260304
if err != nil {
261-
return fmt.Errorf("could not setup cluster provider: %v", err)
305+
return msg, fmt.Errorf("could not setup cluster provider: %v", err)
262306
}
263307
cluster, err := provider.GetCluster(args.clusterID)
264308
if err != nil {
265-
return fmt.Errorf("cluster id: %s not found, unable to delete it", args.clusterID)
309+
return msg, fmt.Errorf("cluster id: %s not found, unable to delete it", args.clusterID)
266310
}
267311

268312
fmt.Printf("Cluster will be deleted: %s \n", cluster.ID())
269313
if !args.dryRun {
270314
if err = provider.DeleteCluster(cluster.ID()); err != nil {
271-
return fmt.Errorf("failed to delete cluster: %v", err)
272-
} else {
273-
fmt.Println("Uninstall started successfully")
315+
return msg, fmt.Errorf("failed to delete cluster: %v", err)
274316
}
317+
fmt.Println("Uninstall started successfully")
275318
}
276319
}
277320

@@ -281,14 +324,14 @@ func run(cmd *cobra.Command, argv []string) error {
281324
err = aws.CcsAwsSession.CleanupOpenIDConnectProviders(activeClusters, args.dryRun, args.sendSummary, &oidcDeletedCounter, &oidcFailedCounter, &iamErrorBuilder)
282325
summaryBuilder.WriteString("OIDC providers: " + strconv.Itoa(oidcDeletedCounter) + "/" + strconv.Itoa(oidcFailedCounter) + "\n")
283326
if err != nil {
284-
return fmt.Errorf("could not delete OIDC providers: %s", err.Error())
327+
return msg, fmt.Errorf("could not delete OIDC providers: %s", err.Error())
285328
}
286329
rolesDeletedCounter := 0
287330
rolesFailedCounter := 0
288331
err = aws.CcsAwsSession.CleanupRoles(activeClusters, args.dryRun, args.sendSummary, &rolesDeletedCounter, &rolesFailedCounter, &iamErrorBuilder)
289332
summaryBuilder.WriteString("Roles: " + strconv.Itoa(rolesDeletedCounter) + "/" + strconv.Itoa(rolesFailedCounter) + "\n")
290333
if err != nil {
291-
return fmt.Errorf("could not delete IAM roles: %s", err.Error())
334+
return msg, fmt.Errorf("could not delete IAM roles: %s", err.Error())
292335
}
293336
}
294337

@@ -298,7 +341,7 @@ func run(cmd *cobra.Command, argv []string) error {
298341
err = aws.CcsAwsSession.CleanupS3Buckets(activeClusters, args.dryRun, args.sendSummary, &s3BucketDeletedCounter, &s3BucketFailedCounter, &s3ErrorBuilder)
299342
summaryBuilder.WriteString("S3 Buckets: " + strconv.Itoa(s3BucketDeletedCounter) + "/" + strconv.Itoa(s3BucketFailedCounter) + "\n")
300343
if err != nil {
301-
return fmt.Errorf("could not delete s3 buckets: %s", err.Error())
344+
return msg, fmt.Errorf("could not delete s3 buckets: %s", err.Error())
302345
}
303346
}
304347

@@ -307,7 +350,7 @@ func run(cmd *cobra.Command, argv []string) error {
307350
summaryBuilder.WriteString("EC2 Instances: " + strconv.Itoa(instancesDeleted) + "/" + strconv.Itoa(instancesFailedToDelete) + "\n")
308351
if err != nil {
309352
if !errors.Is(err, aws.ErrTerminateEC2Instances) {
310-
return fmt.Errorf("could not terminate ec2 instances: %s", err.Error())
353+
return msg, fmt.Errorf("could not terminate ec2 instances: %s", err.Error())
311354
}
312355
ec2ErrorMessage := err.Error()
313356
if len(ec2ErrorMessage) > config.SlackMessageLength {
@@ -323,44 +366,9 @@ func run(cmd *cobra.Command, argv []string) error {
323366
err = aws.CcsAwsSession.ReleaseElasticIPs(args.dryRun, args.sendSummary, &elasticIpDeletedCounter, &elasticIpFailedCounter, &ipErrorBuilder)
324367
summaryBuilder.WriteString("Elastic IPs: " + strconv.Itoa(elasticIpDeletedCounter) + "/" + strconv.Itoa(elasticIpFailedCounter) + "\n")
325368
if err != nil {
326-
return fmt.Errorf("could not release ips: %s", err.Error())
327-
}
328-
}
329-
330-
if args.sendSummary {
331-
webhook := viper.GetString(config.Tests.SlackWebhook)
332-
if webhook == "" {
333-
fmt.Println("Slack Webhook is not set, skipping notification.")
334-
return nil
335-
}
336-
buildFile := ""
337-
if strings.Contains(viper.GetString(config.JobName), "rehearse") {
338-
basePRJobURL := "https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/test-platform-results/pr-logs/pull/openshift_release"
339-
buildFile += basePRJobURL + "/" + os.Getenv("PULL_NUMBER")
340-
} else {
341-
buildFile += viper.GetString(config.BaseJobURL)
369+
return msg, fmt.Errorf("could not release ips: %s", err.Error())
342370
}
343-
buildFile += "/" + viper.GetString(config.JobName) +
344-
"/" + viper.GetString(config.JobID) + "/artifacts/test/build-log.txt"
345-
346-
message := Message{
347-
Summary: summaryBuilder.String(),
348-
BuildFile: "Build Logs: " + buildFile,
349-
S3Errors: "S3 Errors: " + s3ErrorBuilder.String(),
350-
IAMErrors: "IAM Errors: " + iamErrorBuilder.String(),
351-
IPErrors: "IP Errors: " + ipErrorBuilder.String(),
352-
EC2Errors: "EC2 Errors: " + ec2ErrorBuilder.String(),
353-
VPCErrors: "VPC Errors: " + vpcErrorBuilder.String(),
354-
}
355-
356-
// Send notification using common slack package
357-
ctx := context.Background()
358-
if err := commonslack.SendWebhook(ctx, webhook, message); err != nil {
359-
return fmt.Errorf("failed to send slack notification: %w", err)
360-
}
361-
362-
fmt.Println("Slack notification sent successfully")
363371
}
364372

365-
return nil
373+
return msg, nil
366374
}

0 commit comments

Comments
 (0)