-
Notifications
You must be signed in to change notification settings - Fork 29
Expand file tree
/
Copy pathcli.go
More file actions
198 lines (180 loc) · 7.37 KB
/
cli.go
File metadata and controls
198 lines (180 loc) · 7.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
package cli
import (
"fmt"
"io"
"path/filepath"
"github.com/openshift/must-gather-clean/pkg/cleaner"
"github.com/openshift/must-gather-clean/pkg/fsutil"
"github.com/openshift/must-gather-clean/pkg/reporting"
"k8s.io/klog/v2"
"github.com/openshift/must-gather-clean/pkg/obfuscator"
"github.com/openshift/must-gather-clean/pkg/omitter"
"github.com/openshift/must-gather-clean/pkg/schema"
"github.com/openshift/must-gather-clean/pkg/traversal"
watermarking "github.com/openshift/must-gather-clean/pkg/watermarker"
)
const (
reportFileName = "report.yaml"
)
func RunPipe(configPath string, stdin io.Reader, stdout io.Writer) error {
var multiObfuscator *obfuscator.MultiObfuscator
if configPath != "" {
config, err := schema.ReadConfigFromPath(configPath)
if err != nil {
return fmt.Errorf("failed to read config at %s: %w", configPath, err)
}
// we cannot logically prescan because the end of input isn't clear
multiObfuscator, _, err = createObfuscatorsFromConfig(config)
if err != nil {
return fmt.Errorf("failed to create obfuscators via config at %s: %w", configPath, err)
}
} else {
ipObfuscator, err := obfuscator.NewIPObfuscator(schema.ObfuscateReplacementTypeConsistent, obfuscator.NewSimpleTracker())
if err != nil {
return fmt.Errorf("failed to create IP obfuscator: %w", err)
}
macObfuscator, err := obfuscator.NewMacAddressObfuscator(schema.ObfuscateReplacementTypeConsistent, obfuscator.NewSimpleTracker())
if err != nil {
return fmt.Errorf("failed to create MAC obfuscator: %w", err)
}
multiObfuscator = obfuscator.NewMultiObfuscator([]obfuscator.ReportingObfuscator{
ipObfuscator,
macObfuscator,
})
}
contentObfuscator := cleaner.ContentObfuscator{Obfuscator: multiObfuscator}
err := contentObfuscator.ObfuscateReader(stdin, stdout)
if err != nil {
return fmt.Errorf("failed to obfuscate via pipe: %w", err)
}
return nil
}
func Run(configPath string, inputPath string, outputPath string, deleteOutputFolder bool, reportingFolder string, workerCount int) error {
if workerCount < 1 {
return fmt.Errorf("invalid number of workers specified %d", workerCount)
}
err := fsutil.EnsureInputOutputPath(inputPath, outputPath, deleteOutputFolder)
if err != nil {
return err
}
config, err := schema.ReadConfigFromPath(configPath)
if err != nil {
return fmt.Errorf("failed to read config at %s: %w", configPath, err)
}
obfuscator, prescanObfuscator, err := createObfuscatorsFromConfig(config)
if err != nil {
return fmt.Errorf("failed to create obfuscators via config at %s: %w", configPath, err)
}
// this pass allows obfuscators that first need to scan the input to determine what needs to be obfuscated to run before
// redactor actually happens. The empty input path signals a dry-run.
klog.V(0).Info("starting pre obfuscation file scan")
prescanCleaner := cleaner.NewFileCleaner(inputPath, "", prescanObfuscator, &omitter.NoopOmitter{})
prescanWorkerFactory := func(id int) traversal.QueueProcessor {
return traversal.NewWorker(id, prescanCleaner)
}
traversal.NewParallelFileWalker(inputPath, workerCount, prescanWorkerFactory).Traverse()
mro, err := createOmittersFromConfig(config, inputPath)
if err != nil {
return fmt.Errorf("failed to create omitters via config at %s: %w", configPath, err)
}
fileCleaner := cleaner.NewFileCleaner(inputPath, outputPath, obfuscator, mro)
klog.V(0).Info("starting obfuscation process")
workerFactory := func(id int) traversal.QueueProcessor {
return traversal.NewWorker(id, fileCleaner)
}
traversal.NewParallelFileWalker(inputPath, workerCount, workerFactory).Traverse()
klog.V(0).Info("creating report")
reporter := reporting.NewSimpleReporter(config)
reporter.CollectOmitterReport(mro.Report())
reporter.CollectObfuscatorReport(obfuscator.ReportPerObfuscator())
reporterErr := reporter.WriteReport(filepath.Join(reportingFolder, reportFileName))
if reporterErr != nil {
return reporterErr
}
klog.V(0).Info("creating watermark")
watermarker := watermarking.NewSimpleWaterMarker()
return watermarker.WriteWaterMarkFile(outputPath)
}
func createOmittersFromConfig(config *schema.SchemaJson, inputPath string) (omitter.ReportingOmitter, error) {
var fileOmitters []omitter.FileOmitter
var k8sOmitters []omitter.KubernetesResourceOmitter
for _, o := range config.Config.Omit {
switch o.Type {
case schema.OmitTypeSymbolicLink:
fileOmitters = append(fileOmitters, omitter.NewSymlinkOmitter(inputPath))
case schema.OmitTypeFile:
om, err := omitter.NewFilenamePatternOmitter(*o.Pattern)
if err != nil {
return nil, err
}
fileOmitters = append(fileOmitters, om)
case schema.OmitTypeKubernetes:
if o.KubernetesResource == nil {
klog.Exitf("type Kubernetes must also include a 'kubernetesResource'. Given: %v", o)
}
kr := *o.KubernetesResource
om, err := omitter.NewKubernetesResourceOmitter(kr.ApiVersion, kr.Kind, kr.Namespaces)
if err != nil {
return nil, err
}
k8sOmitters = append(k8sOmitters, om)
}
}
return omitter.NewMultiReportingOmitter(fileOmitters, k8sOmitters), nil
}
// finalObfuscator is the obfuscator to use to actually clean a directory.
// prescanObfuscator is an obfuscator that shares some instances of individual obfuscators with the finalObfuscator, but is run in
// a dryRun mode (no output directory) to pre-scan the input and determine the full set of strings to elide. This allows for
// usage patterns like:
//
// file/B (exact name unknown) may contain strings like /subscription/ID, where ID needs to be redacted in all files,
// but file/A contains only ID. We won't recognize ID as needing redaction until we read file/B. This means we need to first
// scan all files, then redact.
func createObfuscatorsFromConfig(config *schema.SchemaJson) (finalObfuscator *obfuscator.MultiObfuscator, prescanObfuscator *obfuscator.MultiObfuscator, finalErr error) {
var obfuscators []obfuscator.ReportingObfuscator
var prescanObfuscators []obfuscator.ReportingObfuscator
for _, o := range config.Config.Obfuscate {
var (
k obfuscator.ReportingObfuscator
err error
)
tracker := obfuscator.NewSimpleTrackerMap(o.Replacement)
switch o.Type {
case schema.ObfuscateTypeKeywords:
k = obfuscator.NewKeywordsObfuscator(o.Replacement)
case schema.ObfuscateTypeMAC:
k, err = obfuscator.NewMacAddressObfuscator(o.ReplacementType, tracker)
if err != nil {
return nil, nil, err
}
case schema.ObfuscateTypeRegex:
k, err = obfuscator.NewRegexObfuscator(*o.Regex, tracker)
if err != nil {
return nil, nil, err
}
case schema.ObfuscateTypeDomain:
k, err = obfuscator.NewDomainObfuscator(o.DomainNames, o.ReplacementType, tracker)
if err != nil {
return nil, nil, err
}
case schema.ObfuscateTypeAzureResources:
k, err = obfuscator.NewAzureResourceObfuscator(o.ReplacementType, tracker)
if err != nil {
return nil, nil, err
}
prescanObfuscators = append(prescanObfuscators, k)
case schema.ObfuscateTypeExact:
k = obfuscator.NewExactReplacementObfuscator(o.ExactReplacements, tracker)
case schema.ObfuscateTypeIP:
k, err = obfuscator.NewIPObfuscator(o.ReplacementType, tracker)
if err != nil {
return nil, nil, err
}
default:
return nil, nil, fmt.Errorf("unknown obfuscator type %s", o.Type)
}
k = obfuscator.NewTargetObfuscator(o.Target, k)
obfuscators = append(obfuscators, k)
}
return obfuscator.NewMultiObfuscator(obfuscators), obfuscator.NewMultiObfuscator(prescanObfuscators), nil
}