-
Notifications
You must be signed in to change notification settings - Fork 209
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add pod-to-pod strict mode tests #1947
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ import ( | |
"time" | ||
|
||
"github.com/cilium/cilium/pkg/defaults" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
|
||
"github.com/cilium/cilium-cli/connectivity/check" | ||
) | ||
|
@@ -115,13 +116,55 @@ func (s *podToPodEncryption) Run(ctx context.Context, t *check.Test) { | |
clientHost := ct.HostNetNSPodsByNode()[client.Pod.Spec.NodeName] | ||
|
||
t.ForEachIPFamily(func(ipFam check.IPFamily) { | ||
testNoTrafficLeak(ctx, t, s, client, &server, &clientHost, requestHTTP, ipFam) | ||
testNoTrafficLeak(ctx, t, client, &server, &clientHost, requestHTTP, ipFam, false) | ||
}) | ||
} | ||
|
||
func testNoTrafficLeak(ctx context.Context, t *check.Test, s check.Scenario, | ||
// PodToPodEncryption is a test case which checks the following: | ||
// - There is a connectivity between pods on different nodes when any | ||
// encryption mode is on (either WireGuard or IPsec). | ||
// - No unencrypted packet is leaked. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you clarify what is meant by 'leaked' here? |
||
// | ||
// The checks are implemented by curl'ing a server pod from a client pod, and | ||
// then inspecting tcpdump captures from the client pod's node. | ||
func PodToPodStrictEncryption() check.Scenario { | ||
return &podToPodStrictEncryption{} | ||
} | ||
|
||
type podToPodStrictEncryption struct{} | ||
|
||
func (s *podToPodStrictEncryption) Name() string { | ||
return "pod-to-pod-strict-encryption" | ||
} | ||
|
||
func (s *podToPodStrictEncryption) Run(ctx context.Context, t *check.Test) { | ||
ct := t.Context() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: do we need this tmp var? |
||
client := ct.RandomClientPod() | ||
|
||
var server check.Pod | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Better to use a pointer for this so we can check the outcome of the loop. |
||
for _, pod := range ct.EchoPods() { | ||
// Make sure that the server pod is on another node than client | ||
if pod.Pod.Status.HostIP != client.Pod.Status.HostIP { | ||
server = pod | ||
break | ||
} | ||
} | ||
|
||
// clientHost is a pod running on the same node as the client pod, just in | ||
// the host netns. | ||
clientHost := ct.HostNetNSPodsByNode()[client.Pod.Spec.NodeName] | ||
|
||
t.ForEachIPFamily(func(ipFam check.IPFamily) { | ||
testNoTrafficLeakStrict(ctx, t, client, &server, &clientHost, requestHTTP, ipFam) | ||
}) | ||
} | ||
|
||
// startTcpdump starts tcpdump in the background, and returns a cancel function | ||
// to stop it, and a channel which is closed when tcpdump has exited. | ||
// It writes captured pkts to /tmp/$TEST_NAME.pcap. | ||
func startTcpdump(ctx context.Context, t *check.Test, | ||
client, server, clientHost *check.Pod, reqType requestType, ipFam check.IPFamily, | ||
) { | ||
) (context.CancelFunc, chan struct{}) { | ||
dstAddr := server.Address(ipFam) | ||
iface := getInterNodeIface(ctx, t, clientHost, dstAddr) | ||
srcAddr := getSourceAddress(ctx, t, client, clientHost, ipFam, dstAddr) | ||
|
@@ -182,25 +225,12 @@ func testNoTrafficLeak(ctx context.Context, t *check.Test, s check.Scenario, | |
} | ||
} | ||
|
||
switch reqType { | ||
case requestHTTP: | ||
// Curl the server from the client to generate some traffic | ||
t.NewAction(s, fmt.Sprintf("curl-%s", ipFam), client, server, ipFam).Run(func(a *check.Action) { | ||
a.ExecInPod(ctx, t.Context().CurlCommand(server, ipFam)) | ||
}) | ||
case requestICMPEcho: | ||
// Ping the server from the client to generate some traffic | ||
t.NewAction(s, fmt.Sprintf("ping-%s", ipFam), client, server, ipFam).Run(func(a *check.Action) { | ||
a.ExecInPod(ctx, t.Context().PingCommand(server, ipFam)) | ||
}) | ||
default: | ||
t.Fatalf("Invalid request type: %d", reqType) | ||
} | ||
|
||
// Wait until tcpdump has exited | ||
killCmd() | ||
<-bgExited | ||
return killCmd, bgExited | ||
} | ||
|
||
// checkPcapForLeak checks whether there is any unencrypted pkt captured in the | ||
// pcap file. If so, it fails the test. | ||
func checkPcapForLeak(ctx context.Context, t *check.Test, clientHost *check.Pod) { | ||
// Redirect stderr to /dev/null, as tcpdump logs to stderr, and ExecInPod | ||
// will return an error if any char is written to stderr. Anyway, the count | ||
// is written to stdout. | ||
|
@@ -224,6 +254,125 @@ func testNoTrafficLeak(ctx context.Context, t *check.Test, s check.Scenario, | |
} | ||
} | ||
|
||
func testNoTrafficLeakStrict(ctx context.Context, t *check.Test, | ||
client, server, clientHost *check.Pod, reqType requestType, ipFam check.IPFamily, | ||
) { | ||
deleteCES := func(endpointName string) { | ||
cesList, err := clientHost.K8sClient.CiliumClientset.CiliumV2alpha1().CiliumEndpointSlices().List(ctx, metav1.ListOptions{}) | ||
if err != nil { | ||
t.Fatalf("Failed to list CiliumEndpointSlices: %s", err) | ||
} | ||
var cesToDelete string | ||
for _, ces := range cesList.Items { | ||
for _, ep := range ces.Endpoints { | ||
if ep.Name == endpointName { | ||
cesToDelete = ces.Name | ||
break | ||
} | ||
} | ||
} | ||
if cesToDelete == "" { | ||
t.Fatalf("Failed to find CiliumEndpointSlice for pod %s", server.Pod.Name) | ||
} | ||
if err := clientHost.K8sClient.CiliumClientset.CiliumV2alpha1().CiliumEndpointSlices().Delete(ctx, cesToDelete, metav1.DeleteOptions{}); err != nil { | ||
t.Fatalf("Failed to delete CiliumEndpointSlice %s: %s", cesToDelete, err) | ||
} | ||
} | ||
setCiliumOperatorScale := func(replicas int32) int32 { | ||
scale, err := clientHost.K8sClient.Clientset.AppsV1().Deployments(t.Context().Params().CiliumNamespace).GetScale(ctx, "cilium-operator", metav1.GetOptions{}) | ||
if err != nil { | ||
t.Fatalf("Failed to get cilium-operator scale: %s", err) | ||
} | ||
savedReplicas := scale.Spec.Replicas | ||
scale.Spec.Replicas = replicas | ||
if _, err := clientHost.K8sClient.Clientset.AppsV1().Deployments(t.Context().Params().CiliumNamespace).UpdateScale(ctx, "cilium-operator", scale, metav1.UpdateOptions{}); err != nil { | ||
t.Fatalf("Failed to scale cilium-operator: %s", err) | ||
} | ||
return savedReplicas | ||
} | ||
waitForIPCacheEntry := func(clientPod, dstPod *check.Pod) { | ||
timeout := time.After(20 * time.Second) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Perhaps using consts here for the timeout and sleep durations would make it easier to adjust in the future? |
||
var ciliumHostPodName string | ||
for _, pod := range t.Context().CiliumPods() { | ||
if pod.NodeName() == clientPod.Pod.Spec.NodeName { | ||
ciliumHostPodName = pod.Pod.Name | ||
break | ||
} | ||
} | ||
for found := false; !found; { | ||
select { | ||
case <-timeout: | ||
t.Fatalf("Failed to wait for ipcache to contain pod %s's IP", dstPod.Pod.Name) | ||
default: | ||
cmd := []string{"/bin/sh", "-c", "cilium bpf ipcache list"} | ||
out, err := clientPod.K8sClient.ExecInPod(ctx, t.Context().Params().CiliumNamespace, ciliumHostPodName, "", cmd) | ||
if err != nil { | ||
t.Fatalf("Failed to retrieve ipcache output: %s", err) | ||
} | ||
if strings.Contains(out.String(), dstPod.Pod.Status.PodIP) { | ||
found = true | ||
break | ||
} | ||
|
||
time.Sleep(500 * time.Millisecond) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Per above... constant? |
||
} | ||
} | ||
} | ||
|
||
// Disable endpoint propagation by scaling down the cilium-operator in the cilium namespace | ||
savedScale := setCiliumOperatorScale(0) | ||
|
||
// Delete CES of the server pod | ||
deleteCES(server.Pod.Name) | ||
deleteCES(client.Pod.Name) | ||
|
||
// Run the test | ||
testNoTrafficLeak(ctx, t, client, server, clientHost, reqType, ipFam, true) | ||
|
||
// Restore the cilium-operator scale | ||
_ = setCiliumOperatorScale(savedScale) | ||
|
||
// wait for the ipcache to contain the server pod's IP | ||
waitForIPCacheEntry(client, server) | ||
|
||
// Run the test | ||
testNoTrafficLeak(ctx, t, client, server, clientHost, reqType, ipFam, false) | ||
} | ||
|
||
func testNoTrafficLeak(ctx context.Context, t *check.Test, client, server, clientHost *check.Pod, | ||
reqType requestType, ipFam check.IPFamily, expectFail bool, | ||
) { | ||
// Setup | ||
killCmd, bgExited := startTcpdump(ctx, t, client, server, clientHost, reqType, ipFam) | ||
|
||
var cmd []string | ||
// Run the test | ||
switch reqType { | ||
case requestHTTP: | ||
// Curl the server from the client to generate some traffic | ||
cmd = t.Context().CurlCommand(server, ipFam) | ||
case requestICMPEcho: | ||
// Ping the server from the client to generate some traffic | ||
cmd = t.Context().PingCommand(server, ipFam) | ||
default: | ||
t.Fatalf("Invalid request type: %d", reqType) | ||
} | ||
|
||
_, err := client.K8sClient.ExecInPod(ctx, client.Pod.Namespace, client.Pod.Name, "", cmd) | ||
if expectFail && err == nil { | ||
t.Failf("Expected curl to fail, but it succeeded") | ||
} else if !expectFail && err != nil { | ||
t.Fatalf("Failed to curl server: %s", err) | ||
} | ||
|
||
// Wait until tcpdump has exited | ||
killCmd() | ||
<-bgExited | ||
|
||
// Assert no traffic leak | ||
checkPcapForLeak(ctx, t, clientHost) | ||
} | ||
|
||
// bytes.Buffer from the stdlib is non-thread safe, thus our custom | ||
// implementation. Unfortunately, we cannot use io.Pipe, as Write() blocks until | ||
// Read() has read all content, which makes it deadlock-prone when used with | ||
|
@@ -288,10 +437,10 @@ func (s *nodeToNodeEncryption) Run(ctx context.Context, t *check.Test) { | |
t.ForEachIPFamily(func(ipFam check.IPFamily) { | ||
// Test pod-to-remote-host (ICMP Echo instead of HTTP because a remote host | ||
// does not have a HTTP server running) | ||
testNoTrafficLeak(ctx, t, s, client, &serverHost, &clientHost, requestICMPEcho, ipFam) | ||
testNoTrafficLeak(ctx, t, client, &serverHost, &clientHost, requestICMPEcho, ipFam, false) | ||
// Test host-to-remote-host | ||
testNoTrafficLeak(ctx, t, s, &clientHost, &serverHost, &clientHost, requestICMPEcho, ipFam) | ||
testNoTrafficLeak(ctx, t, &clientHost, &serverHost, &clientHost, requestICMPEcho, ipFam, false) | ||
// Test host-to-remote-pod | ||
testNoTrafficLeak(ctx, t, s, &clientHost, &server, &clientHost, requestHTTP, ipFam) | ||
testNoTrafficLeak(ctx, t, &clientHost, &server, &clientHost, requestHTTP, ipFam, false) | ||
}) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.