From 5fc6a24e4144f27fdbbf81e9ceedb40b7e60b947 Mon Sep 17 00:00:00 2001 From: Hongliang Liu Date: Wed, 23 Mar 2022 16:52:59 +0800 Subject: [PATCH] Fix the issue of local probe bypassing flows on Windows When proxyAll is enabled, kube-proxy can be replaced by AntreaProxy, then Service traffic and non-Service traffic can be distinguished by ServiceCTMark and NotServiceCTMark. Service traffic with ServiceCTMark should not bypass Network Policies, and non-Service traffic generated by kubelet with NotServiceCTMark should bypass Network Policies. Signed-off-by: Hongliang Liu --- pkg/agent/openflow/client.go | 3 +- pkg/agent/openflow/fields.go | 9 ++--- pkg/agent/openflow/pipeline.go | 44 ++++++++++++++++--------- pkg/agent/openflow/pod_connectivity.go | 7 ++-- pkg/ovs/openflow/ofctrl_builder.go | 3 ++ test/integration/agent/openflow_test.go | 2 +- 6 files changed, 45 insertions(+), 23 deletions(-) diff --git a/pkg/agent/openflow/client.go b/pkg/agent/openflow/client.go index bb852c62cb9..1ef15b5b503 100644 --- a/pkg/agent/openflow/client.go +++ b/pkg/agent/openflow/client.go @@ -697,7 +697,8 @@ func (c *client) generatePipelines() { c.networkConfig, c.ovsDatapathType, c.connectUplinkToBridge, - c.enableMulticast) + c.enableMulticast, + c.proxyAll) c.activatedFeatures = append(c.activatedFeatures, c.featurePodConnectivity) c.traceableFeatures = append(c.traceableFeatures, c.featurePodConnectivity) diff --git a/pkg/agent/openflow/fields.go b/pkg/agent/openflow/fields.go index f6e756d5f5d..2ce0c313c78 100644 --- a/pkg/agent/openflow/fields.go +++ b/pkg/agent/openflow/fields.go @@ -173,11 +173,12 @@ var ( FromGatewayCTMark = binding.NewCTMark(ConnSourceCTMarkField, gatewayVal) FromBridgeCTMark = binding.NewCTMark(ConnSourceCTMarkField, bridgeVal) - // CTMark[4]: Mark to indicate DNAT is performed on the connection for Service. - // This CT mark is used in CtZone / CtZoneV6 and SNATCtZone / SNATCtZoneV6. - ServiceCTMark = binding.NewOneBitCTMark(4) + // CTMark[4]: Marks to indicate whether DNAT is performed on the connection for Service. + // These CT marks are used in CtZone / CtZoneV6 and SNATCtZone / SNATCtZoneV6. + ServiceCTMark = binding.NewOneBitCTMark(4) + NotServiceCTMark = binding.NewOneBitZeroCTMark(4) - // CTMark[5]: Mark to indicate SNAT should be performed on the connection for Service. + // CTMark[5]: Mark to indicate SNAT is performed on the connection for Service. // This CT mark is only used in CtZone / CtZoneV6. ConnSNATCTMark = binding.NewOneBitCTMark(5) diff --git a/pkg/agent/openflow/pipeline.go b/pkg/agent/openflow/pipeline.go index 8071497b402..42b9f480d07 100644 --- a/pkg/agent/openflow/pipeline.go +++ b/pkg/agent/openflow/pipeline.go @@ -2094,34 +2094,48 @@ func (f *featureNetworkPolicy) dnsPacketInFlow(conjunctionID uint32) binding.Flo Done() } -// localProbeFlow generates the flow to forward locally generated packets to ConntrackCommitTable, bypassing ingress -// rules of Network Policies. The packets are sent by kubelet to probe the liveness/readiness of local Pods. -// On Linux and when OVS kernel datapath is used, it identifies locally generated packets by matching the -// HostLocalSourceMark, otherwise it matches the source IP. The difference is because: -// 1. On Windows, kube-proxy userspace mode is used, and currently there is no way to distinguish kubelet generated -// traffic from kube-proxy proxied traffic. +// localProbeFlows generates the flows to forward locally generated request packets to stageConntrack directly, bypassing +// ingress rules of Network Policies. The packets are sent by kubelet to probe the liveness/readiness of local Pods. +// On Linux and when OVS kernel datapath is used, the probe packets are identified by matching the HostLocalSourceMark. +// On Windows or when OVS userspace (netdev) datapath is used, we need a different approach because: +// 1. On Windows, kube-proxy userspace mode is used, and currently there is no way to distinguish kubelet generated traffic +// from kube-proxy proxied traffic. // 2. pkt_mark field is not properly supported for OVS userspace (netdev) datapath. -// Note that there is a defect in the latter way that NodePort Service access by external clients will be masqueraded as -// a local gateway IP to bypass Network Policies. See https://github.com/antrea-io/antrea/issues/280. -// TODO: Fix it after replacing kube-proxy with AntreaProxy. -func (f *featurePodConnectivity) localProbeFlow() []binding.Flow { +// When proxyAll is disabled, the probe packets are identified by matching the source IP is the Antrea gateway IP; +// otherwise, the packets are identified by matching both the Antrea gateway IP and NotServiceCTMark. Note that, when +// proxyAll is disabled, currently there is no way to distinguish kubelet generated traffic from kube-proxy proxied traffic +// only by matching the Antrea gateway IP. There is a defect that NodePort Service access by external clients will be +// masqueraded as the Antrea gateway IP to bypass NetworkPolicies. See https://github.com/antrea-io/antrea/issues/280. +func (f *featurePodConnectivity) localProbeFlows() []binding.Flow { cookieID := f.cookieAllocator.Request(f.category).Raw() var flows []binding.Flow if runtime.IsWindowsPlatform() || f.ovsDatapathType == ovsconfig.OVSDatapathNetdev { + var ctMarksToMatch []*binding.CtMark + if f.proxyAll { + ctMarksToMatch = append(ctMarksToMatch, NotServiceCTMark) + } for ipProtocol, gatewayIP := range f.gatewayIPs { flows = append(flows, IngressSecurityClassifierTable.ofTable.BuildFlow(priorityHigh). Cookie(cookieID). MatchProtocol(ipProtocol). + MatchCTStateRpl(false). + MatchCTStateTrk(true). MatchSrcIP(gatewayIP). + MatchCTMark(ctMarksToMatch...). Action().GotoStage(stageConntrack). Done()) } } else { - flows = append(flows, IngressSecurityClassifierTable.ofTable.BuildFlow(priorityHigh). - Cookie(cookieID). - MatchPktMark(types.HostLocalSourceMark, &types.HostLocalSourceMark). - Action().GotoStage(stageConntrack). - Done()) + for _, ipProtocol := range f.ipProtocols { + flows = append(flows, IngressSecurityClassifierTable.ofTable.BuildFlow(priorityHigh). + Cookie(cookieID). + MatchProtocol(ipProtocol). + MatchCTStateRpl(false). + MatchCTStateTrk(true). + MatchPktMark(types.HostLocalSourceMark, &types.HostLocalSourceMark). + Action().GotoStage(stageConntrack). + Done()) + } } return flows } diff --git a/pkg/agent/openflow/pod_connectivity.go b/pkg/agent/openflow/pod_connectivity.go index d319edf94fd..7d482ca8932 100644 --- a/pkg/agent/openflow/pod_connectivity.go +++ b/pkg/agent/openflow/pod_connectivity.go @@ -44,6 +44,7 @@ type featurePodConnectivity struct { ctZoneSrcField *binding.RegField ipCtZoneTypeRegMarks map[binding.Protocol]*binding.RegMark enableMulticast bool + proxyAll bool category cookie.Category } @@ -59,7 +60,8 @@ func newFeaturePodConnectivity( networkConfig *config.NetworkConfig, ovsDatapathType ovsconfig.OVSDatapathType, connectUplinkToBridge bool, - enableMulticast bool) *featurePodConnectivity { + enableMulticast bool, + proxyAll bool) *featurePodConnectivity { ctZones := make(map[binding.Protocol]int) gatewayIPs := make(map[binding.Protocol]net.IP) localCIDRs := make(map[binding.Protocol]net.IPNet) @@ -101,6 +103,7 @@ func newFeaturePodConnectivity( ipCtZoneTypeRegMarks: ipCtZoneTypeRegMarks, ctZoneSrcField: getZoneSrcField(connectUplinkToBridge), enableMulticast: enableMulticast, + proxyAll: proxyAll, category: cookie.PodConnectivity, } } @@ -140,7 +143,7 @@ func (f *featurePodConnectivity) initFlows() []binding.Flow { flows = append(flows, f.gatewayIPSpoofGuardFlows()...) flows = append(flows, f.l3FwdFlowToGateway()...) // Add flow to ensure the liveliness check packet could be forwarded correctly. - flows = append(flows, f.localProbeFlow()...) + flows = append(flows, f.localProbeFlows()...) if f.networkConfig.TrafficEncapMode.SupportsEncap() { flows = append(flows, f.tunnelClassifierFlow(config.DefaultTunOFPort)) diff --git a/pkg/ovs/openflow/ofctrl_builder.go b/pkg/ovs/openflow/ofctrl_builder.go index 0dbf4efa168..d47e5b5c046 100644 --- a/pkg/ovs/openflow/ofctrl_builder.go +++ b/pkg/ovs/openflow/ofctrl_builder.go @@ -244,6 +244,9 @@ func (b *ofFlowBuilder) MatchCTStateSNAT(set bool) FlowBuilder { } func (b *ofFlowBuilder) MatchCTMark(marks ...*CtMark) FlowBuilder { + if len(marks) == 0 { + return b + } var value, mask uint32 for _, mark := range marks { value |= mark.GetValue() diff --git a/test/integration/agent/openflow_test.go b/test/integration/agent/openflow_test.go index b84f5c8a815..7bc59f477e4 100644 --- a/test/integration/agent/openflow_test.go +++ b/test/integration/agent/openflow_test.go @@ -1222,7 +1222,7 @@ func prepareGatewayFlows(gwIPs []net.IP, gwMAC net.HardwareAddr, vMAC net.Hardwa tableName: "IngressSecurityClassifier", flows: []*ofTestUtils.ExpectFlow{ { - MatchStr: fmt.Sprintf("priority=210,%s,%s=%s", ipProtoStr, nwSrcStr, gwIP.String()), + MatchStr: fmt.Sprintf("priority=210,ct_state=-rpl+trk,%s,%s=%s", ipProtoStr, nwSrcStr, gwIP.String()), ActStr: "goto_table:ConntrackCommit", }, },