Skip to content

Commit 435edc2

Browse files
authored
Add network packet-capture (openshift#77)
* Add network packet-capture to perform a priviledged node tcpdump. * Use timestamp in pcap filename and add 'pcap' alias * Throw error when DS with the same name exists * Change default packet capture DS name * Change image name to openshift-sre org * Change test function name * Make docs
1 parent e359fe0 commit 435edc2

File tree

9 files changed

+468
-3
lines changed

9 files changed

+468
-3
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ bin/
1818
# IDE
1919
.idea
2020
.vscode
21+
*.code-workspace
2122

2223
# MacOS
2324
.DS_Store

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ BUILDFLAGS ?=
55
LDFLAGS = -ldflags="-X '${REPOSITORY}/cmd.GitCommit=${GIT_COMMIT}'"
66
unexport GOFLAGS
77

8-
all: format build test
8+
all: format mod build test
99

1010
format: vet fmt mockgen docs
1111

@@ -14,7 +14,7 @@ fmt:
1414
@gofmt -w -s .
1515
@git diff --exit-code .
1616

17-
build: mod
17+
build:
1818
go build ${BUILDFLAGS} ${LDFLAGS} -o ./bin/osdctl main.go
1919

2020
vet:

cmd/network/cmd.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
package network
2+
3+
import (
4+
"github.com/spf13/cobra"
5+
"k8s.io/cli-runtime/pkg/genericclioptions"
6+
)
7+
8+
// NewCmdNetwork implements the base cluster deployment command
9+
func NewCmdNetwork(streams genericclioptions.IOStreams, flags *genericclioptions.ConfigFlags) *cobra.Command {
10+
netCmd := &cobra.Command{
11+
Use: "network",
12+
Short: "network related utilities",
13+
Args: cobra.NoArgs,
14+
DisableAutoGenTag: true,
15+
Run: help,
16+
}
17+
18+
netCmd.AddCommand(newCmdPacketCapture(streams, flags))
19+
return netCmd
20+
}
21+
22+
func help(cmd *cobra.Command, _ []string) {
23+
cmd.Help()
24+
}

cmd/network/packet-capture.go

Lines changed: 327 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,327 @@
1+
package network
2+
3+
import (
4+
"bytes"
5+
"context"
6+
"errors"
7+
"fmt"
8+
"io"
9+
"log"
10+
"os"
11+
"os/exec"
12+
"strconv"
13+
"time"
14+
15+
"github.com/spf13/cobra"
16+
appsv1 "k8s.io/api/apps/v1"
17+
corev1 "k8s.io/api/core/v1"
18+
k8serr "k8s.io/apimachinery/pkg/api/errors"
19+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
20+
"k8s.io/apimachinery/pkg/labels"
21+
"k8s.io/apimachinery/pkg/types"
22+
"k8s.io/apimachinery/pkg/util/wait"
23+
24+
"k8s.io/cli-runtime/pkg/genericclioptions"
25+
cmdutil "k8s.io/kubectl/pkg/cmd/util"
26+
"sigs.k8s.io/controller-runtime/pkg/client"
27+
28+
"github.com/openshift/osd-utils-cli/pkg/k8s"
29+
)
30+
31+
const (
32+
packetCaptureImage = "quay.io/openshift-sre/network-toolbox:latest"
33+
packetCaptureName = "sre-packet-capture"
34+
packetCaptureNamespace = "default"
35+
outputDir = "capture-output"
36+
nodeLabelKey = "node-role.kubernetes.io/worker"
37+
nodeLabelValue = ""
38+
packetCaptureDurationSec = 60
39+
)
40+
41+
// newCmdPacketCapture implements the packet-capture command to run a packet capture
42+
func newCmdPacketCapture(streams genericclioptions.IOStreams, flags *genericclioptions.ConfigFlags) *cobra.Command {
43+
ops := newPacketCaptureOptions(streams, flags)
44+
packetCaptureCmd := &cobra.Command{
45+
Use: "packet-capture",
46+
Aliases: []string{"pcap"},
47+
Short: "Start packet capture",
48+
Args: cobra.NoArgs,
49+
DisableAutoGenTag: true,
50+
Run: func(cmd *cobra.Command, args []string) {
51+
cmdutil.CheckErr(ops.complete(cmd, args))
52+
cmdutil.CheckErr(ops.run())
53+
},
54+
}
55+
56+
packetCaptureCmd.Flags().IntVarP(&ops.duration, "duration", "d", packetCaptureDurationSec, "Duration (in seconds) of packet capture")
57+
packetCaptureCmd.Flags().StringVarP(&ops.name, "name", "", packetCaptureName, "Name of Daemonset")
58+
packetCaptureCmd.Flags().StringVarP(&ops.namespace, "namespace", "n", packetCaptureNamespace, "Namespace to deploy Daemonset")
59+
packetCaptureCmd.Flags().StringVarP(&ops.nodeLabelKey, "node-label-key", "", nodeLabelKey, "Node label key")
60+
packetCaptureCmd.Flags().StringVarP(&ops.nodeLabelValue, "node-label-value", "", nodeLabelValue, "Node label value")
61+
62+
ops.startTime = time.Now()
63+
return packetCaptureCmd
64+
}
65+
66+
// packetCaptureOptions defines the struct for running packet-capture command
67+
type packetCaptureOptions struct {
68+
name string
69+
namespace string
70+
nodeLabelKey string
71+
nodeLabelValue string
72+
duration int
73+
74+
flags *genericclioptions.ConfigFlags
75+
genericclioptions.IOStreams
76+
kubeCli client.Client
77+
startTime time.Time
78+
}
79+
80+
func newPacketCaptureOptions(streams genericclioptions.IOStreams, flags *genericclioptions.ConfigFlags) *packetCaptureOptions {
81+
return &packetCaptureOptions{
82+
flags: flags,
83+
IOStreams: streams,
84+
}
85+
}
86+
87+
func (o *packetCaptureOptions) complete(cmd *cobra.Command, _ []string) error {
88+
var err error
89+
o.kubeCli, err = k8s.NewClient(o.flags)
90+
if err != nil {
91+
return err
92+
}
93+
return nil
94+
}
95+
96+
func (o *packetCaptureOptions) run() error {
97+
log.Println("Ensuring Packet Capture Daemonset")
98+
ds, err := ensurePacketCaptureDaemonSet(o)
99+
if err != nil {
100+
log.Fatalf("Error ensuring packet capture daemonset %v", err)
101+
return err
102+
}
103+
log.Println("Waiting For Packet Capture Daemonset")
104+
err = waitForPacketCaptureDaemonset(o, ds)
105+
if err != nil {
106+
log.Fatalf("Error Waiting for daemonset %v", err)
107+
return err
108+
}
109+
log.Println("Copying Files From Packet Capture Pods")
110+
err = copyFilesFromPacketCapturePods(o)
111+
if err != nil {
112+
log.Fatalf("Error copying files %v", err)
113+
return err
114+
}
115+
log.Println("Deleting Packet Capture Daemonset")
116+
err = deletePacketCaptureDaemonSet(o, ds)
117+
if err != nil {
118+
log.Fatalf("Error deleting packet capture daemonset %v", err)
119+
return err
120+
}
121+
return nil
122+
}
123+
124+
// ensurePacketCaptureDaemonSet ensures the daemonset exists
125+
func ensurePacketCaptureDaemonSet(o *packetCaptureOptions) (*appsv1.DaemonSet, error) {
126+
key := types.NamespacedName{Name: o.name, Namespace: o.namespace}
127+
desired := desiredPacketCaptureDaemonSet(o, key)
128+
haveDs, err := hasPacketCaptureDaemonSet(o, key)
129+
if err != nil {
130+
log.Fatalf("Error getting current daemonset %v", err)
131+
return nil, err
132+
}
133+
134+
if haveDs {
135+
log.Println("Already have packet-capture daemonset")
136+
return nil, errors.New(fmt.Sprintf("%s daemonset already exists in the %s namespace", o.name, o.namespace))
137+
}
138+
139+
err = createPacketCaptureDaemonSet(o, desired)
140+
if err != nil {
141+
log.Fatalf("Error creating packet capture daemonset %v", err)
142+
return nil, err
143+
}
144+
145+
log.Println("Successfully ensured packet capture daemonset")
146+
return desired, nil
147+
}
148+
149+
// hasPacketCaptureDaemonSet returns the current daemonset
150+
func hasPacketCaptureDaemonSet(o *packetCaptureOptions, key types.NamespacedName) (bool, error) {
151+
ds := &appsv1.DaemonSet{}
152+
153+
if err := o.kubeCli.Get(context.TODO(), key, ds); err != nil {
154+
if k8serr.IsNotFound(err) {
155+
return false, nil
156+
}
157+
return false, err
158+
}
159+
return true, nil
160+
}
161+
162+
// createPacketCaptureDaemonSet creates the given daemonset resource
163+
func createPacketCaptureDaemonSet(o *packetCaptureOptions, ds *appsv1.DaemonSet) error {
164+
if err := o.kubeCli.Create(context.TODO(), ds); err != nil {
165+
return fmt.Errorf("failed to create daemonset %s/%s: %v", ds.Namespace, ds.Name, err)
166+
}
167+
return nil
168+
}
169+
170+
// deletePacketCaptureDaemonSet creates the given daemonset resource
171+
func deletePacketCaptureDaemonSet(o *packetCaptureOptions, ds *appsv1.DaemonSet) error {
172+
if err := o.kubeCli.Delete(context.TODO(), ds); err != nil {
173+
return fmt.Errorf("failed to delete daemonset %s/%s: %v", ds.Namespace, ds.Name, err)
174+
}
175+
return nil
176+
}
177+
178+
// desiredPacketCaptureDaemonSet returns the desired daemonset read in from manifests
179+
func desiredPacketCaptureDaemonSet(o *packetCaptureOptions, key types.NamespacedName) *appsv1.DaemonSet {
180+
ds := &appsv1.DaemonSet{}
181+
t := true
182+
ls := &metav1.LabelSelector{
183+
MatchLabels: map[string]string{
184+
"app": key.Name,
185+
},
186+
}
187+
ds.Name = key.Name
188+
ds.Namespace = key.Namespace
189+
190+
ds.Spec.Selector = ls
191+
ds.Spec.Template.Spec.NodeSelector = map[string]string{
192+
o.nodeLabelKey: o.nodeLabelValue,
193+
}
194+
ds.Spec.Template.Labels = ls.MatchLabels
195+
ds.Spec.Template.Spec.Tolerations = []corev1.Toleration{
196+
{
197+
Effect: "NoSchedule",
198+
Key: o.nodeLabelKey,
199+
Operator: "Exists",
200+
},
201+
}
202+
ds.Spec.Template.Spec.Volumes = []corev1.Volume{
203+
{
204+
Name: "capture-output",
205+
VolumeSource: corev1.VolumeSource{
206+
EmptyDir: &corev1.EmptyDirVolumeSource{},
207+
},
208+
},
209+
}
210+
ds.Spec.Template.Spec.HostNetwork = true
211+
ds.Spec.Template.Spec.InitContainers = []corev1.Container{
212+
{
213+
Name: "init-capture",
214+
Image: packetCaptureImage,
215+
ImagePullPolicy: corev1.PullIfNotPresent,
216+
Command: []string{"/bin/bash", "-c", "tcpdump -G " + strconv.Itoa(o.duration) + " -W 1 -w /tmp/capture-output/capture.pcap -i vxlan_sys_4789 -nn -s0; sync"},
217+
SecurityContext: &corev1.SecurityContext{Privileged: &t},
218+
VolumeMounts: []corev1.VolumeMount{
219+
{
220+
Name: "capture-output",
221+
MountPath: "/tmp/capture-output",
222+
ReadOnly: false,
223+
},
224+
},
225+
},
226+
}
227+
ds.Spec.Template.Spec.Containers = []corev1.Container{
228+
{
229+
Name: "copy",
230+
Image: packetCaptureImage,
231+
ImagePullPolicy: corev1.PullIfNotPresent,
232+
Command: []string{"/bin/bash", "-c", "trap : TERM INT; sleep infinity & wait"},
233+
SecurityContext: &corev1.SecurityContext{Privileged: &t},
234+
VolumeMounts: []corev1.VolumeMount{
235+
{
236+
Name: "capture-output",
237+
MountPath: "/tmp/capture-output",
238+
ReadOnly: false,
239+
},
240+
},
241+
},
242+
}
243+
244+
return ds
245+
}
246+
247+
func copyFilesFromPod(o *packetCaptureOptions, pod *corev1.Pod) error {
248+
os.MkdirAll(outputDir, 0755)
249+
fileName := fmt.Sprintf("%s-%s.pcap", pod.Spec.NodeName, o.startTime.UTC().Format("20060102T150405"))
250+
cmd := exec.Command("oc", "cp", pod.Namespace+"/"+pod.Name+":/tmp/capture-output/capture.pcap", outputDir+"/"+fileName)
251+
var stdBuffer bytes.Buffer
252+
mw := io.MultiWriter(os.Stdout, &stdBuffer)
253+
254+
cmd.Stdout = mw
255+
cmd.Stderr = mw
256+
257+
err := cmd.Run()
258+
259+
if err != nil {
260+
log.Println(stdBuffer.String())
261+
}
262+
263+
return err
264+
}
265+
266+
func waitForPacketCaptureDaemonset(o *packetCaptureOptions, ds *appsv1.DaemonSet) error {
267+
pollErr := wait.PollImmediate(10*time.Second, time.Duration(600)*time.Second, func() (bool, error) {
268+
var err error
269+
tmp := &appsv1.DaemonSet{}
270+
key := types.NamespacedName{Name: ds.Name, Namespace: ds.Namespace}
271+
if err = o.kubeCli.Get(context.TODO(), key, tmp); err == nil {
272+
ready := (tmp.Status.NumberReady > 0 &&
273+
tmp.Status.NumberAvailable == tmp.Status.NumberReady &&
274+
tmp.Status.NumberReady == tmp.Status.DesiredNumberScheduled)
275+
return ready, nil
276+
}
277+
return false, err
278+
})
279+
return pollErr
280+
}
281+
282+
func waitForPacketCaptureContainerRunning(o *packetCaptureOptions, pod *corev1.Pod) error {
283+
pollErr := wait.PollImmediate(10*time.Second, time.Duration(600)*time.Second, func() (bool, error) {
284+
var err error
285+
tmp := &corev1.Pod{}
286+
key := types.NamespacedName{Name: pod.Name, Namespace: pod.Namespace}
287+
if err = o.kubeCli.Get(context.TODO(), key, tmp); err == nil {
288+
if len(tmp.Status.ContainerStatuses) == 0 {
289+
return false, nil
290+
}
291+
state := tmp.Status.ContainerStatuses[0].State
292+
running := state.Running != nil
293+
return running, nil
294+
}
295+
return false, err
296+
})
297+
return pollErr
298+
}
299+
300+
func copyFilesFromPacketCapturePods(o *packetCaptureOptions) error {
301+
var pods corev1.PodList
302+
303+
if err := o.kubeCli.List(context.TODO(), &pods, &client.ListOptions{
304+
LabelSelector: labels.SelectorFromSet(labels.Set{"app": o.name}),
305+
Namespace: o.namespace,
306+
}); err != nil {
307+
return err
308+
}
309+
for _, pod := range pods.Items {
310+
if len(pod.Status.ContainerStatuses) == 0 {
311+
continue
312+
}
313+
err := waitForPacketCaptureContainerRunning(o, &pod)
314+
if err != nil {
315+
log.Fatalf("Error waiting for pods %v", err)
316+
return err
317+
}
318+
log.Printf("Copying files from %s\n", pod.Name)
319+
err = copyFilesFromPod(o, &pod)
320+
if err != nil {
321+
log.Fatalf("error copying files %v", err)
322+
return err
323+
}
324+
}
325+
326+
return nil
327+
}

0 commit comments

Comments
 (0)