Skip to content

Commit

Permalink
Fix Pod-to-external traffic on EKS in policyOnly mode
Browse files Browse the repository at this point in the history
When using Antrea in policyOnly mode on an EKS cluster, an additional
iptables rule is needed in the PREROUTING chain of the nat table. The
rule ensures that Pod-to-external traffic coming from Pods whose IP
address comes from a secondary network interface (secondary ENI) is
marked correctly, so that it hits the appropriate routing table. Without
this, traffic is SNATed with the source IP address of the primary
network interface, while being sent out of the secondary network
interface, causing the VPC to drop the traffic.

Relevant rules (before the fix):

```
-A PREROUTING -m comment --comment "kubernetes service portals" -j KUBE-SERVICES
-A PREROUTING -i eni+ -m comment --comment "AWS, outbound connections" -m state --state NEW -j AWS-CONNMARK-CHAIN-0
-A PREROUTING -m comment --comment "AWS, CONNMARK" -j CONNMARK --restore-mark --nfmask 0x80 --ctmask 0x80
-A OUTPUT -m comment --comment "kubernetes service portals" -j KUBE-SERVICES
-A POSTROUTING -m comment --comment "kubernetes postrouting rules" -j KUBE-POSTROUTING
-A POSTROUTING -m comment --comment "AWS SNAT CHAIN" -j AWS-SNAT-CHAIN-0
-A POSTROUTING -m comment --comment "Antrea: jump to Antrea postrouting rules" -j ANTREA-POSTROUTING
-A ANTREA-POSTROUTING -o antrea-gw0 -m comment --comment "Antrea: masquerade LOCAL traffic" -m addrtype ! --src-type LOCAL --limit-iface-out -m addrtype --src-type LOCAL -j MASQUERADE --random-fully
-A AWS-CONNMARK-CHAIN-0 ! -d 192.168.0.0/16 -m comment --comment "AWS CONNMARK CHAIN, VPC CIDR" -j AWS-CONNMARK-CHAIN-1
-A AWS-CONNMARK-CHAIN-1 -m comment --comment "AWS, CONNMARK" -j CONNMARK --set-xmark 0x80/0x80
-A AWS-SNAT-CHAIN-0 ! -d 192.168.0.0/16 -m comment --comment "AWS SNAT CHAIN" -j AWS-SNAT-CHAIN-1
-A AWS-SNAT-CHAIN-1 ! -o vlan+ -m comment --comment "AWS, SNAT" -m addrtype ! --dst-type LOCAL -j SNAT --to-source 192.168.18.153 --random-fully

0:	from all lookup local
512:	from all to 192.168.29.56 lookup main
512:	from all to 192.168.24.134 lookup main
512:	from all to 192.168.31.135 lookup main
512:	from all to 192.168.31.223 lookup main
512:	from all to 192.168.29.27 lookup main
512:	from all to 192.168.16.158 lookup main
512:	from all to 192.168.2.135 lookup main
1024:	from all fwmark 0x80/0x80 lookup main
1536:	from 192.168.31.223 lookup 2
1536:	from 192.168.29.27 lookup 2
1536:	from 192.168.16.158 lookup 2
1536:	from 192.168.2.135 lookup 2
32766:	from all lookup main
32767:	from all lookup default

default via 192.168.0.1 dev eth1
192.168.0.1 dev eth1 scope link
```

The fix is simply to add a new PREROUTING rule:

```
-A PREROUTING -m comment --comment "kubernetes service portals" -j KUBE-SERVICES
-A PREROUTING -i eni+ -m comment --comment "AWS, outbound connections" -m state --state NEW -j AWS-CONNMARK-CHAIN-0
-A PREROUTING -i antrea-gw0 -m comment --comment "Antrea: AWS, outbound connections" -m state --state NEW -j AWS-CONNMARK-CHAIN-0
-A PREROUTING -m comment --comment "AWS, CONNMARK" -j CONNMARK --restore-mark --nfmask 0x80 --ctmask 0x80
```

Fixes #3946

Signed-off-by: Antonin Bas <abas@vmware.com>
  • Loading branch information
antoninbas committed Jul 7, 2022
1 parent f022dd8 commit 688808f
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 3 deletions.
103 changes: 102 additions & 1 deletion pkg/agent/route/route_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"net"
"reflect"
"strconv"
"strings"
"sync"
"time"

Expand Down Expand Up @@ -379,7 +380,7 @@ func (c *Client) writeEKSMangleRule(iptablesData *bytes.Buffer) {
// table. If it does not exist, we do not need to install this rule. If
// it does exist we can scan for the mark value and use that in our
// rule.
klog.V(2).Infof("Add iptable mangle rule for EKS to ensure correct reverse path for NodePort Service traffic")
klog.V(2).InfoS("Add iptables mangle rule for EKS to ensure correct reverse path for NodePort Service traffic")
writeLine(iptablesData, []string{
"-A", antreaMangleChain,
"-m", "comment", "--comment", `"Antrea: AWS, primary ENI"`,
Expand All @@ -388,6 +389,96 @@ func (c *Client) writeEKSMangleRule(iptablesData *bytes.Buffer) {
}...)
}

// ensureEKSConnmarkRule ensures that an extra iptables rule is present in the
// nat table. This rule is required to mark Pod traffic, so that it is routed
// using the correct route table. It is especially important for Pods getting an
// IP address from a secondary network interface (ENI), as they are routed with
// a different route table by default (not "main"). Without this rule, these
// Pods don't have connectivity to the Internet.
// Note that this rule is only needed for IPv4.
// See https://docs.aws.amazon.com/eks/latest/userguide/external-snat.html for
// more details.
func (c *Client) ensureEKSConnmarkRule() error {
klog.V(2).InfoS("Ensure extra iptables nat rule for EKS is present so that Pod traffic is marked correctly")
existingRules, err := c.ipt.ListRules(iptables.NATTable, iptables.PreRoutingChain)
if err != nil {
return err
}

ruleSpecToRule := func(ruleSpec []string) string {
rule := make([]string, 0, len(ruleSpec))
for _, s := range ruleSpec {
if strings.Contains(s, " ") {
rule = append(rule, fmt.Sprintf(`"%s"`, s))
} else {
rule = append(rule, s)
}
}
return strings.Join(rule, " ")
}

awsRuleSpec := []string{
"-i", "eni+",
"-m", "comment", "--comment", "AWS, outbound connections",
"-m", "state", "--state", "NEW",
"-j", "AWS-CONNMARK-CHAIN-0",
}
awsRule := ruleSpecToRule(awsRuleSpec)
antreaRuleSpec := []string{
"-i", c.nodeConfig.GatewayConfig.Name,
"-m", "comment", "--comment", "Antrea: AWS, outbound connections",
"-m", "state", "--state", "NEW",
"-j", "AWS-CONNMARK-CHAIN-0",
}
antreaRule := ruleSpecToRule(antreaRuleSpec)
var awsRuleIdx, antreaRuleIdx int
// start at 1 to skip the default policy rule for the chain
for idx := 1; idx < len(existingRules); idx++ {
rule := existingRules[idx]
if strings.Contains(rule, awsRule) {
awsRuleIdx = idx
}
if strings.Contains(rule, antreaRule) {
antreaRuleIdx = idx
}
}

if awsRuleIdx == 0 && antreaRuleIdx == 0 {
// both rules missing, nothing to do
return nil
}

// We ensure that our rule is always right after the rule installed by
// aws-node in the PREROUTING chain.

var deleteAntreaRule, insertAntreaRule bool
if awsRuleIdx == 0 && antreaRuleIdx != 0 {
deleteAntreaRule = true
} else if antreaRuleIdx == 0 && awsRuleIdx != 0 {
insertAntreaRule = true
} else if antreaRuleIdx != awsRuleIdx+1 {
deleteAntreaRule = true
insertAntreaRule = true
}

if deleteAntreaRule {
if err := c.ipt.DeleteRule(iptables.ProtocolIPv4, iptables.NATTable, iptables.PreRoutingChain, antreaRuleSpec); err != nil {
return err
}
if awsRuleIdx > antreaRuleIdx {
// adjust index after deletion
awsRuleIdx -= 1
}
}
if insertAntreaRule {
if err := c.ipt.InsertRuleAtIndex(iptables.ProtocolIPv4, iptables.NATTable, iptables.PreRoutingChain, awsRuleIdx+1, antreaRuleSpec); err != nil {
return err
}
}

return nil
}

// syncIPTables ensure that the iptables infrastructure we use is set up.
// It's idempotent and can safely be called on every startup.
func (c *Client) syncIPTables() error {
Expand Down Expand Up @@ -438,6 +529,7 @@ func (c *Client) syncIPTables() error {
}
return true
})

// Use iptables-restore to configure IPv4 settings.
if c.networkConfig.IPv4Enabled {
iptablesData := c.restoreIptablesData(c.nodeConfig.PodIPv4CIDR,
Expand Down Expand Up @@ -467,6 +559,15 @@ func (c *Client) syncIPTables() error {
return err
}
}

// When Antrea is used to enforce NetworkPolicies in EKS, an additional iptables
// nat rule is required. See https://github.com/antrea-io/antrea/issues/3946.
if env.IsCloudEKS() && c.networkConfig.IPv4Enabled {
if err := c.ensureEKSConnmarkRule(); err != nil {
return err
}
}

return nil
}

Expand Down
21 changes: 19 additions & 2 deletions pkg/agent/util/iptables/iptables.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ func (c *Client) AppendRule(protocol Protocol, table string, chain string, ruleS
return nil
}

// InsertRule checks if target rule already exists, inserts it if not.
// InsertRule checks if target rule already exists, inserts it at the beginning of the chain if not.
func (c *Client) InsertRule(protocol Protocol, table string, chain string, ruleSpec []string) error {
for p := range c.ipts {
ipt := c.ipts[p]
Expand All @@ -190,7 +190,24 @@ func (c *Client) InsertRule(protocol Protocol, table string, chain string, ruleS
if err := ipt.Insert(table, chain, 1, ruleSpec...); err != nil {
return fmt.Errorf("error inserting rule %v to table %s chain %s: %v", ruleSpec, table, chain, err)
}
klog.V(2).InfoS("Inserted a rule", "rule", ruleSpec, "table", table, "chain", chain)
klog.V(2).InfoS("Inserted a rule", "rule", ruleSpec, "table", table, "chain", chain, "index", 1)
}
return nil
}

// InsertRuleAtIndex inserts the target rule at the provided index. Since an index is provided, we
// do not check for existence of the rule first. If you want to insert a rule at the beginning of a
// chain if and only if the target rule does not already exist (anywhere), then use InsertRule.
func (c *Client) InsertRuleAtIndex(protocol Protocol, table string, chain string, idx int, ruleSpec []string) error {
for p := range c.ipts {
ipt := c.ipts[p]
if !matchProtocol(ipt, protocol) {
continue
}
if err := ipt.Insert(table, chain, idx, ruleSpec...); err != nil {
return fmt.Errorf("error inserting rule %v to table %s chain %s at index %d: %v", ruleSpec, table, chain, idx, err)
}
klog.V(2).InfoS("Inserted a rule", "rule", ruleSpec, "table", table, "chain", chain, "index", idx)
}
return nil
}
Expand Down

0 comments on commit 688808f

Please sign in to comment.