Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pod-to-pod strict mode tests #1947

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 20 additions & 9 deletions connectivity/check/features.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,14 @@ import (
type Feature string

const (
FeatureCNIChaining Feature = "cni-chaining"
FeatureMonitorAggregation Feature = "monitor-aggregation"
FeatureL7Proxy Feature = "l7-proxy"
FeatureHostFirewall Feature = "host-firewall"
FeatureICMPPolicy Feature = "icmp-policy"
FeatureTunnel Feature = "tunnel"
FeatureEndpointRoutes Feature = "endpoint-routes"
FeatureCNIChaining Feature = "cni-chaining"
FeatureMonitorAggregation Feature = "monitor-aggregation"
FeatureL7Proxy Feature = "l7-proxy"
FeatureHostFirewall Feature = "host-firewall"
FeatureICMPPolicy Feature = "icmp-policy"
FeatureTunnel Feature = "tunnel"
FeatureEndpointRoutes Feature = "endpoint-routes"
FeatureCiliumEndpointSlice Feature = "cilium-endpoint-slice"

FeatureKPRMode Feature = "kpr-mode"
FeatureKPRExternalIPs Feature = "kpr-external-ips"
Expand All @@ -49,8 +50,9 @@ const (

FeatureHealthChecking Feature = "health-checking"

FeatureEncryptionPod Feature = "encryption-pod"
FeatureEncryptionNode Feature = "encryption-node"
FeatureEncryptionPod Feature = "encryption-pod"
FeatureEncryptionNode Feature = "encryption-node"
FeatureStrictEncryption Feature = "encryption-strict"

FeatureIPv4 Feature = "ipv4"
FeatureIPv6 Feature = "ipv6"
Expand Down Expand Up @@ -243,6 +245,15 @@ func (ct *ConnectivityTest) extractFeaturesFromConfigMap(ctx context.Context, cl
}
}

result[FeatureStrictEncryption] = FeatureStatus{
Enabled: cm.Data["enable-encryption-strict-mode"] == "true",
Mode: cm.Data["encryption-strict-mode-cidr"],
}

result[FeatureCiliumEndpointSlice] = FeatureStatus{
Enabled: cm.Data["enable-cilium-endpoint-slice"] == "true",
}

result[FeatureIPv4] = FeatureStatus{
Enabled: cm.Data["enable-ipv4"] == "true",
}
Expand Down
14 changes: 14 additions & 0 deletions connectivity/suite.go
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,20 @@ func Run(ctx context.Context, ct *check.ConnectivityTest, addExtraTests func(*ch
tests.NodeToNodeEncryption(),
)

if ct.Params().IncludeUnsafeTests {
ct.NewTest("pod-to-pod-strict-wireguard-encryption").
WithFeatureRequirements(
check.RequireFeatureEnabled(check.FeatureEncryptionPod),
check.RequireFeatureEnabled(check.FeatureStrictEncryption),
check.RequireFeatureEnabled(check.FeatureIPv4), // strict encryption only supported for IPv4
check.RequireFeatureDisabled(check.FeatureIPv6),
check.RequireFeatureEnabled(check.FeatureCiliumEndpointSlice),
).
WithScenarios(
tests.PodToPodStrictEncryption(),
)
}

if ct.Params().IncludeUnsafeTests {
ct.NewTest("egress-gateway").
WithCiliumEgressGatewayPolicy(egressGatewayPolicyYAML, check.CiliumEgressGatewayPolicyParams{}).
Expand Down
197 changes: 173 additions & 24 deletions connectivity/tests/encryption.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"time"

"github.com/cilium/cilium/pkg/defaults"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/cilium/cilium-cli/connectivity/check"
)
Expand Down Expand Up @@ -115,13 +116,55 @@ func (s *podToPodEncryption) Run(ctx context.Context, t *check.Test) {
clientHost := ct.HostNetNSPodsByNode()[client.Pod.Spec.NodeName]

t.ForEachIPFamily(func(ipFam check.IPFamily) {
testNoTrafficLeak(ctx, t, s, client, &server, &clientHost, requestHTTP, ipFam)
testNoTrafficLeak(ctx, t, client, &server, &clientHost, requestHTTP, ipFam, false)
})
}

func testNoTrafficLeak(ctx context.Context, t *check.Test, s check.Scenario,
// PodToPodEncryption is a test case which checks the following:
// - There is a connectivity between pods on different nodes when any
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// - There is a connectivity between pods on different nodes when any
// - There is connectivity between pods on different nodes when any

// encryption mode is on (either WireGuard or IPsec).
// - No unencrypted packet is leaked.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you clarify what is meant by 'leaked' here?

//
// The checks are implemented by curl'ing a server pod from a client pod, and
// then inspecting tcpdump captures from the client pod's node.
func PodToPodStrictEncryption() check.Scenario {
return &podToPodStrictEncryption{}
}

type podToPodStrictEncryption struct{}

func (s *podToPodStrictEncryption) Name() string {
return "pod-to-pod-strict-encryption"
}

func (s *podToPodStrictEncryption) Run(ctx context.Context, t *check.Test) {
ct := t.Context()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: do we need this tmp var?

client := ct.RandomClientPod()

var server check.Pod
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Better to use a pointer for this so we can check the outcome of the loop.
i,e what happens if we could not locate a matching pod?

for _, pod := range ct.EchoPods() {
// Make sure that the server pod is on another node than client
if pod.Pod.Status.HostIP != client.Pod.Status.HostIP {
server = pod
break
}
}

// clientHost is a pod running on the same node as the client pod, just in
// the host netns.
clientHost := ct.HostNetNSPodsByNode()[client.Pod.Spec.NodeName]

t.ForEachIPFamily(func(ipFam check.IPFamily) {
testNoTrafficLeakStrict(ctx, t, client, &server, &clientHost, requestHTTP, ipFam)
})
}

// startTcpdump starts tcpdump in the background, and returns a cancel function
// to stop it, and a channel which is closed when tcpdump has exited.
// It writes captured pkts to /tmp/$TEST_NAME.pcap.
func startTcpdump(ctx context.Context, t *check.Test,
client, server, clientHost *check.Pod, reqType requestType, ipFam check.IPFamily,
) {
) (context.CancelFunc, chan struct{}) {
dstAddr := server.Address(ipFam)
iface := getInterNodeIface(ctx, t, clientHost, dstAddr)
srcAddr := getSourceAddress(ctx, t, client, clientHost, ipFam, dstAddr)
Expand Down Expand Up @@ -182,25 +225,12 @@ func testNoTrafficLeak(ctx context.Context, t *check.Test, s check.Scenario,
}
}

switch reqType {
case requestHTTP:
// Curl the server from the client to generate some traffic
t.NewAction(s, fmt.Sprintf("curl-%s", ipFam), client, server, ipFam).Run(func(a *check.Action) {
a.ExecInPod(ctx, t.Context().CurlCommand(server, ipFam))
})
case requestICMPEcho:
// Ping the server from the client to generate some traffic
t.NewAction(s, fmt.Sprintf("ping-%s", ipFam), client, server, ipFam).Run(func(a *check.Action) {
a.ExecInPod(ctx, t.Context().PingCommand(server, ipFam))
})
default:
t.Fatalf("Invalid request type: %d", reqType)
}

// Wait until tcpdump has exited
killCmd()
<-bgExited
return killCmd, bgExited
}

// checkPcapForLeak checks whether there is any unencrypted pkt captured in the
// pcap file. If so, it fails the test.
func checkPcapForLeak(ctx context.Context, t *check.Test, clientHost *check.Pod) {
// Redirect stderr to /dev/null, as tcpdump logs to stderr, and ExecInPod
// will return an error if any char is written to stderr. Anyway, the count
// is written to stdout.
Expand All @@ -224,6 +254,125 @@ func testNoTrafficLeak(ctx context.Context, t *check.Test, s check.Scenario,
}
}

func testNoTrafficLeakStrict(ctx context.Context, t *check.Test,
client, server, clientHost *check.Pod, reqType requestType, ipFam check.IPFamily,
) {
deleteCES := func(endpointName string) {
cesList, err := clientHost.K8sClient.CiliumClientset.CiliumV2alpha1().CiliumEndpointSlices().List(ctx, metav1.ListOptions{})
if err != nil {
t.Fatalf("Failed to list CiliumEndpointSlices: %s", err)
}
var cesToDelete string
for _, ces := range cesList.Items {
for _, ep := range ces.Endpoints {
if ep.Name == endpointName {
cesToDelete = ces.Name
break
}
}
}
if cesToDelete == "" {
t.Fatalf("Failed to find CiliumEndpointSlice for pod %s", server.Pod.Name)
}
if err := clientHost.K8sClient.CiliumClientset.CiliumV2alpha1().CiliumEndpointSlices().Delete(ctx, cesToDelete, metav1.DeleteOptions{}); err != nil {
t.Fatalf("Failed to delete CiliumEndpointSlice %s: %s", cesToDelete, err)
}
}
setCiliumOperatorScale := func(replicas int32) int32 {
scale, err := clientHost.K8sClient.Clientset.AppsV1().Deployments(t.Context().Params().CiliumNamespace).GetScale(ctx, "cilium-operator", metav1.GetOptions{})
if err != nil {
t.Fatalf("Failed to get cilium-operator scale: %s", err)
}
savedReplicas := scale.Spec.Replicas
scale.Spec.Replicas = replicas
if _, err := clientHost.K8sClient.Clientset.AppsV1().Deployments(t.Context().Params().CiliumNamespace).UpdateScale(ctx, "cilium-operator", scale, metav1.UpdateOptions{}); err != nil {
t.Fatalf("Failed to scale cilium-operator: %s", err)
}
return savedReplicas
}
waitForIPCacheEntry := func(clientPod, dstPod *check.Pod) {
timeout := time.After(20 * time.Second)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Perhaps using consts here for the timeout and sleep durations would make it easier to adjust in the future?

var ciliumHostPodName string
for _, pod := range t.Context().CiliumPods() {
if pod.NodeName() == clientPod.Pod.Spec.NodeName {
ciliumHostPodName = pod.Pod.Name
break
}
}
for found := false; !found; {
select {
case <-timeout:
t.Fatalf("Failed to wait for ipcache to contain pod %s's IP", dstPod.Pod.Name)
default:
cmd := []string{"/bin/sh", "-c", "cilium bpf ipcache list"}
out, err := clientPod.K8sClient.ExecInPod(ctx, t.Context().Params().CiliumNamespace, ciliumHostPodName, "", cmd)
if err != nil {
t.Fatalf("Failed to retrieve ipcache output: %s", err)
}
if strings.Contains(out.String(), dstPod.Pod.Status.PodIP) {
found = true
break
}

time.Sleep(500 * time.Millisecond)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Per above... constant?

}
}
}

// Disable endpoint propagation by scaling down the cilium-operator in the cilium namespace
savedScale := setCiliumOperatorScale(0)

// Delete CES of the server pod
deleteCES(server.Pod.Name)
deleteCES(client.Pod.Name)

// Run the test
testNoTrafficLeak(ctx, t, client, server, clientHost, reqType, ipFam, true)

// Restore the cilium-operator scale
_ = setCiliumOperatorScale(savedScale)

// wait for the ipcache to contain the server pod's IP
waitForIPCacheEntry(client, server)

// Run the test
testNoTrafficLeak(ctx, t, client, server, clientHost, reqType, ipFam, false)
}

func testNoTrafficLeak(ctx context.Context, t *check.Test, client, server, clientHost *check.Pod,
reqType requestType, ipFam check.IPFamily, expectFail bool,
) {
// Setup
killCmd, bgExited := startTcpdump(ctx, t, client, server, clientHost, reqType, ipFam)

var cmd []string
// Run the test
switch reqType {
case requestHTTP:
// Curl the server from the client to generate some traffic
cmd = t.Context().CurlCommand(server, ipFam)
case requestICMPEcho:
// Ping the server from the client to generate some traffic
cmd = t.Context().PingCommand(server, ipFam)
default:
t.Fatalf("Invalid request type: %d", reqType)
}

_, err := client.K8sClient.ExecInPod(ctx, client.Pod.Namespace, client.Pod.Name, "", cmd)
if expectFail && err == nil {
t.Failf("Expected curl to fail, but it succeeded")
} else if !expectFail && err != nil {
t.Fatalf("Failed to curl server: %s", err)
}

// Wait until tcpdump has exited
killCmd()
<-bgExited

// Assert no traffic leak
checkPcapForLeak(ctx, t, clientHost)
}

// bytes.Buffer from the stdlib is non-thread safe, thus our custom
// implementation. Unfortunately, we cannot use io.Pipe, as Write() blocks until
// Read() has read all content, which makes it deadlock-prone when used with
Expand Down Expand Up @@ -288,10 +437,10 @@ func (s *nodeToNodeEncryption) Run(ctx context.Context, t *check.Test) {
t.ForEachIPFamily(func(ipFam check.IPFamily) {
// Test pod-to-remote-host (ICMP Echo instead of HTTP because a remote host
// does not have a HTTP server running)
testNoTrafficLeak(ctx, t, s, client, &serverHost, &clientHost, requestICMPEcho, ipFam)
testNoTrafficLeak(ctx, t, client, &serverHost, &clientHost, requestICMPEcho, ipFam, false)
// Test host-to-remote-host
testNoTrafficLeak(ctx, t, s, &clientHost, &serverHost, &clientHost, requestICMPEcho, ipFam)
testNoTrafficLeak(ctx, t, &clientHost, &serverHost, &clientHost, requestICMPEcho, ipFam, false)
// Test host-to-remote-pod
testNoTrafficLeak(ctx, t, s, &clientHost, &server, &clientHost, requestHTTP, ipFam)
testNoTrafficLeak(ctx, t, &clientHost, &server, &clientHost, requestHTTP, ipFam, false)
})
}
2 changes: 1 addition & 1 deletion internal/cli/cmd/connectivity.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ func newCmdConnectivityTest(hooks Hooks) *cobra.Command {
cmd.Flags().StringToStringVar(&params.JunitProperties, "junit-property", map[string]string{}, "Add key=value properties to the generated junit file")
cmd.Flags().BoolVar(&params.SkipIPCacheCheck, "skip-ip-cache-check", true, "Skip IPCache check")
cmd.Flags().MarkHidden("skip-ip-cache-check")
cmd.Flags().BoolVar(&params.IncludeUnsafeTests, "include-unsafe-tests", false, "Include tests which can modify cluster nodes state")
cmd.Flags().BoolVar(&params.IncludeUnsafeTests, "include-unsafe-tests", false, "Include tests which can modify cluster state")
cmd.Flags().MarkHidden("include-unsafe-tests")

cmd.Flags().StringVar(&params.K8sVersion, "k8s-version", "", "Kubernetes server version in case auto-detection fails")
Expand Down
Loading