|
| 1 | +package cad |
| 2 | + |
| 3 | +import ( |
| 4 | + "context" |
| 5 | + "fmt" |
| 6 | + |
| 7 | + "github.com/openshift/osdctl/pkg/k8s" |
| 8 | + "github.com/openshift/osdctl/pkg/utils" |
| 9 | + "github.com/spf13/cobra" |
| 10 | + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" |
| 11 | + "k8s.io/apimachinery/pkg/runtime/schema" |
| 12 | + "sigs.k8s.io/controller-runtime/pkg/client" |
| 13 | +) |
| 14 | + |
| 15 | +const ( |
| 16 | + cadClusterIDProd = "2fbi9mjhqpobh20ot5d7e5eeq3a8gfhs" // These IDs are hard-coded in app-interface |
| 17 | + cadClusterIDStage = "2f9ghpikkv446iidcv7b92em2hgk13q9" |
| 18 | +) |
| 19 | + |
| 20 | +var validInvestigations = []string{ |
| 21 | + "chgm", |
| 22 | + "cmbb", |
| 23 | + "can-not-retrieve-updates", |
| 24 | + "ai", |
| 25 | + "cpd", |
| 26 | + "etcd-quota-low", |
| 27 | + "insightsoperatordown", |
| 28 | + "machine-health-check", |
| 29 | + "must-gather", |
| 30 | + "upgrade-config", |
| 31 | +} |
| 32 | + |
| 33 | +var validEnvironments = []string{ |
| 34 | + "stage", |
| 35 | + "production", |
| 36 | +} |
| 37 | + |
| 38 | +type cadRunOptions struct { |
| 39 | + clusterID string |
| 40 | + investigation string |
| 41 | + elevationReason string |
| 42 | + environment string |
| 43 | +} |
| 44 | + |
| 45 | +func newCmdRun() *cobra.Command { |
| 46 | + opts := &cadRunOptions{} |
| 47 | + |
| 48 | + runCmd := &cobra.Command{ |
| 49 | + Use: "run", |
| 50 | + Short: "Run a manual investigation on the CAD cluster", |
| 51 | + Long: `Run a manual investigation on the Configuration Anomaly Detection (CAD) cluster. |
| 52 | +
|
| 53 | +This command schedules a Tekton PipelineRun on the appropriate CAD cluster (stage or production) |
| 54 | +to run an investigation against a target cluster. |
| 55 | +
|
| 56 | +Prerequisites: |
| 57 | + - Connected to the target cluster's OCM environment (production or stage) |
| 58 | + - The CAD clusters themselves are always in production OCM |
| 59 | +
|
| 60 | +Available Investigations: |
| 61 | + chgm, cmbb, can-not-retrieve-updates, ai, cpd, etcd-quota-low, |
| 62 | + insightsoperatordown, machine-health-check, must-gather, upgrade-config |
| 63 | +
|
| 64 | +Example: |
| 65 | + # Run a change management investigation on a production cluster |
| 66 | + osdctl cluster cad run \ |
| 67 | + --cluster-id 1a2b3c4d5e6f7g8h9i0j \ |
| 68 | + --investigation chgm \ |
| 69 | + --environment production \ |
| 70 | + --reason "OHSS-12345" |
| 71 | +
|
| 72 | +Note: |
| 73 | + After the investigation completes (may take several minutes), view results using: |
| 74 | + osdctl cluster reports list -C <cluster-id> -l 1 |
| 75 | +
|
| 76 | + You must be connected to the target cluster's OCM environment to view its reports.`, |
| 77 | + Args: cobra.NoArgs, |
| 78 | + DisableAutoGenTag: true, |
| 79 | + RunE: func(cmd *cobra.Command, args []string) error { |
| 80 | + return opts.run() |
| 81 | + }, |
| 82 | + } |
| 83 | + |
| 84 | + runCmd.Flags().StringVarP(&opts.clusterID, "cluster-id", "C", "", "Cluster ID (internal or external)") |
| 85 | + runCmd.Flags().StringVarP(&opts.investigation, "investigation", "i", "", "Investigation name") |
| 86 | + runCmd.Flags().StringVarP(&opts.environment, "environment", "e", "", "Environment of the cluster we want to run the investigation on. Allowed values: \"stage\" or \"production\"") |
| 87 | + runCmd.Flags().StringVar(&opts.elevationReason, "reason", "", "Provide a reason for running a manual investigation, used for backplane. Eg: 'OHSS-XXXX', or '#ITN-2024-XXXXX.") |
| 88 | + |
| 89 | + _ = runCmd.RegisterFlagCompletionFunc("investigation", func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { |
| 90 | + return validInvestigations, cobra.ShellCompDirectiveNoFileComp |
| 91 | + }) |
| 92 | + |
| 93 | + _ = runCmd.RegisterFlagCompletionFunc("environment", func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { |
| 94 | + return validEnvironments, cobra.ShellCompDirectiveNoFileComp |
| 95 | + }) |
| 96 | + |
| 97 | + return runCmd |
| 98 | +} |
| 99 | + |
| 100 | +func (o *cadRunOptions) run() error { |
| 101 | + if err := o.validate(); err != nil { |
| 102 | + return err |
| 103 | + } |
| 104 | + |
| 105 | + cadClusterID, cadNamespace := o.getCADClusterConfig() |
| 106 | + |
| 107 | + // CAD clusters are always in production OCM, so explicitly create a production connection |
| 108 | + ocmConn, err := utils.CreateConnectionWithUrl("production") |
| 109 | + if err != nil { |
| 110 | + return fmt.Errorf("failed to create production OCM connection: %w", err) |
| 111 | + } |
| 112 | + defer ocmConn.Close() |
| 113 | + |
| 114 | + k8sClient, err := k8s.NewAsBackplaneClusterAdminWithConn(cadClusterID, client.Options{}, ocmConn, o.elevationReason, "Need elevation for cad cluster in order to schedule a Tekton pipeline run") |
| 115 | + if err != nil { |
| 116 | + return fmt.Errorf("failed to create k8s client: %w", err) |
| 117 | + } |
| 118 | + |
| 119 | + u := o.pipelineRunTemplate(cadNamespace) |
| 120 | + |
| 121 | + err = k8sClient.Create(context.Background(), u) |
| 122 | + if err != nil { |
| 123 | + return fmt.Errorf("failed to schedule task: %w", err) |
| 124 | + } |
| 125 | + |
| 126 | + reportCmd := fmt.Sprintf("'osdctl cluster reports list -C %s -l 1'", o.clusterID) |
| 127 | + fmt.Println("Successfully scheduled manual investigation. It can take several minutes until a report is available. Run this command to check the latest report for the results while being connected to the right OCM backplane environment. " + reportCmd) |
| 128 | + |
| 129 | + return nil |
| 130 | +} |
| 131 | + |
| 132 | +func (o *cadRunOptions) validate() error { |
| 133 | + conn, err := utils.CreateConnection() |
| 134 | + if err != nil { |
| 135 | + return err |
| 136 | + } |
| 137 | + defer conn.Close() |
| 138 | + |
| 139 | + if o.clusterID == "" { |
| 140 | + return fmt.Errorf("cluster-id is required") |
| 141 | + } |
| 142 | + |
| 143 | + validInvestigation := false |
| 144 | + for _, v := range validInvestigations { |
| 145 | + if o.investigation == v { |
| 146 | + validInvestigation = true |
| 147 | + break |
| 148 | + } |
| 149 | + } |
| 150 | + if !validInvestigation { |
| 151 | + return fmt.Errorf("invalid investigation %q, must be one of: %v", o.investigation, validInvestigations) |
| 152 | + } |
| 153 | + |
| 154 | + validEnvironment := false |
| 155 | + for _, v := range validEnvironments { |
| 156 | + if o.environment == v { |
| 157 | + validEnvironment = true |
| 158 | + break |
| 159 | + } |
| 160 | + } |
| 161 | + if !validEnvironment { |
| 162 | + return fmt.Errorf("invalid environment %q, must be one of: %v", o.environment, validEnvironments) |
| 163 | + } |
| 164 | + |
| 165 | + if o.elevationReason == "" { |
| 166 | + return fmt.Errorf("elevation reason is required") |
| 167 | + } |
| 168 | + |
| 169 | + return nil |
| 170 | +} |
| 171 | + |
| 172 | +func (o *cadRunOptions) getCADClusterConfig() (clusterID, namespace string) { |
| 173 | + if o.environment == "stage" { |
| 174 | + return cadClusterIDStage, "configuration-anomaly-detection-stage" |
| 175 | + } |
| 176 | + return cadClusterIDProd, "configuration-anomaly-detection-production" |
| 177 | +} |
| 178 | + |
| 179 | +func (o *cadRunOptions) pipelineRunTemplate(cadNamespace string) *unstructured.Unstructured { |
| 180 | + u := unstructured.Unstructured{} |
| 181 | + u.Object = map[string]interface{}{ |
| 182 | + "apiVersion": "tekton.dev/v1beta1", |
| 183 | + "kind": "PipelineRun", |
| 184 | + "metadata": map[string]interface{}{ |
| 185 | + "generateName": "cad-manual-", |
| 186 | + "namespace": cadNamespace, |
| 187 | + }, |
| 188 | + "spec": map[string]interface{}{ |
| 189 | + "params": []map[string]interface{}{ |
| 190 | + { |
| 191 | + "name": "cluster-id", |
| 192 | + "value": o.clusterID, |
| 193 | + }, |
| 194 | + { |
| 195 | + "name": "investigation", |
| 196 | + "value": o.investigation, |
| 197 | + }, |
| 198 | + { |
| 199 | + "name": "dry-run", |
| 200 | + "value": "false", |
| 201 | + }, |
| 202 | + }, |
| 203 | + "pipelineRef": map[string]interface{}{ |
| 204 | + "name": "cad-manual-investigation-pipeline", |
| 205 | + }, |
| 206 | + "serviceAccountName": "cad-sa", |
| 207 | + "timeout": "30m", |
| 208 | + }, |
| 209 | + } |
| 210 | + |
| 211 | + u.SetGroupVersionKind(schema.GroupVersionKind{ |
| 212 | + Group: "tekton.dev", |
| 213 | + Version: "v1beta1", |
| 214 | + Kind: "PipelineRun", |
| 215 | + }) |
| 216 | + |
| 217 | + return &u |
| 218 | +} |
0 commit comments