From 0182d3f04565c28f5af06953fa1e8622d8b08fe7 Mon Sep 17 00:00:00 2001 From: Antonin Bas Date: Tue, 30 Jul 2024 17:07:25 -0700 Subject: [PATCH 1/2] Use same MTU as uplink for bridge port In bridging mode (on Linux), when moving the physical adapter to the bridge, we explictly set the MTU for the bridge port to the same value as for the physical adapter. Without this change, the MTU may default to a different (lower) value if some existing container ports have a lower MTU value. For example, this occurs when first installing Antrea in encap mode, then re-installing Antrea in noEncap mode with bridging mode enabled. We also do some minor documentation updates to indicate to users that they should consider restarting existing workloads when updating the Antrea datapath configuration. Fixes #6456 Signed-off-by: Antonin Bas --- build/charts/antrea/README.md | 2 +- build/charts/antrea/conf/antrea-agent.conf | 1 + build/charts/antrea/values.yaml | 3 ++- build/yamls/antrea-aks.yml | 5 +++-- build/yamls/antrea-eks.yml | 5 +++-- build/yamls/antrea-gke.yml | 5 +++-- build/yamls/antrea-ipsec.yml | 5 +++-- build/yamls/antrea.yml | 5 +++-- docs/noencap-hybrid-modes.md | 7 +++++++ pkg/agent/agent_linux.go | 5 +++++ pkg/agent/secondarynetwork/init_linux.go | 1 + pkg/agent/util/net_linux.go | 7 +++++++ 12 files changed, 39 insertions(+), 12 deletions(-) diff --git a/build/charts/antrea/README.md b/build/charts/antrea/README.md index 0e434cb5ece..72348a878f0 100644 --- a/build/charts/antrea/README.md +++ b/build/charts/antrea/README.md @@ -84,7 +84,7 @@ Kubernetes: `>= 1.19.0-0` | controller.selfSignedCert | bool | `true` | Indicates whether to use auto-generated self-signed TLS certificates. If false, a Secret named "antrea-controller-tls" must be provided with the following keys: ca.crt, tls.crt, tls.key. | | controller.tolerations | list | `[{"key":"CriticalAddonsOnly","operator":"Exists"},{"effect":"NoSchedule","key":"node-role.kubernetes.io/master"},{"effect":"NoSchedule","key":"node-role.kubernetes.io/control-plane"},{"effect":"NoExecute","key":"node.kubernetes.io/unreachable","operator":"Exists","tolerationSeconds":0}]` | Tolerations for the antrea-controller Pod. | | controllerImage | object | `{"pullPolicy":"IfNotPresent","repository":"antrea/antrea-controller-ubuntu","tag":""}` | Container image to use for the antrea-controller component. | -| defaultMTU | int | `0` | Default MTU to use for the host gateway interface and the network interface of each Pod. By default, antrea-agent will discover the MTU of the Node's primary interface and adjust it to accommodate for tunnel encapsulation overhead if applicable. | +| defaultMTU | int | `0` | Default MTU to use for the host gateway interface and the network interface of each Pod. By default, antrea-agent will discover the MTU of the Node's primary interface and adjust it to accommodate for tunnel encapsulation overhead if applicable. If the MTU is updated, the new value will only be applied to new workloads. | | disableTXChecksumOffload | bool | `false` | Disable TX checksum offloading for container network interfaces. It's supposed to be set to true when the datapath doesn't support TX checksum offloading, which causes packets to be dropped due to bad checksum. It affects Pods running on Linux Nodes only. | | dnsServerOverride | string | `""` | Address of DNS server, to override the kube-dns Service. It's used to resolve hostnames in a FQDN policy. | | egress.exceptCIDRs | list | `[]` | CIDR ranges to which outbound Pod traffic will not be SNAT'd by Egresses. | diff --git a/build/charts/antrea/conf/antrea-agent.conf b/build/charts/antrea/conf/antrea-agent.conf index 5d10f890134..665857a53ca 100644 --- a/build/charts/antrea/conf/antrea-agent.conf +++ b/build/charts/antrea/conf/antrea-agent.conf @@ -175,6 +175,7 @@ disableTXChecksumOffload: {{ .Values.disableTXChecksumOffload }} # Default MTU to use for the host gateway interface and the network interface of each Pod. # If omitted, antrea-agent will discover the MTU of the Node's primary interface and # also adjust MTU to accommodate for tunnel encapsulation overhead (if applicable). +# If the MTU is updated, the new value will only be applied to new workloads. defaultMTU: {{ .Values.defaultMTU }} # packetInRate defines the OVS controller packet rate limits for different diff --git a/build/charts/antrea/values.yaml b/build/charts/antrea/values.yaml index ce7923d197b..2fd7f436124 100644 --- a/build/charts/antrea/values.yaml +++ b/build/charts/antrea/values.yaml @@ -71,7 +71,8 @@ multicast: # -- Default MTU to use for the host gateway interface and the network interface # of each Pod. By default, antrea-agent will discover the MTU of the Node's # primary interface and adjust it to accommodate for tunnel encapsulation -# overhead if applicable. +# overhead if applicable. If the MTU is updated, the new value will only be +# applied to new workloads. defaultMTU: 0 # -- packetInRate defines the OVS controller packet rate limits for different diff --git a/build/yamls/antrea-aks.yml b/build/yamls/antrea-aks.yml index 5f54dc54b5d..6608bf894fd 100644 --- a/build/yamls/antrea-aks.yml +++ b/build/yamls/antrea-aks.yml @@ -3891,6 +3891,7 @@ data: # Default MTU to use for the host gateway interface and the network interface of each Pod. # If omitted, antrea-agent will discover the MTU of the Node's primary interface and # also adjust MTU to accommodate for tunnel encapsulation overhead (if applicable). + # If the MTU is updated, the new value will only be applied to new workloads. defaultMTU: 0 # packetInRate defines the OVS controller packet rate limits for different @@ -5125,7 +5126,7 @@ spec: kubectl.kubernetes.io/default-container: antrea-agent # Automatically restart Pods with a RollingUpdate if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: cce7d6644fb552607ebeda9bf30a5fafa871dd4382afc609500fcb493b61768c + checksum/config: f950d38c3e5f05b4e6290aae92fc46eeda9126a68a0ed6b88eee7f5c4c6fb491 labels: app: antrea component: antrea-agent @@ -5363,7 +5364,7 @@ spec: annotations: # Automatically restart Pod if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: cce7d6644fb552607ebeda9bf30a5fafa871dd4382afc609500fcb493b61768c + checksum/config: f950d38c3e5f05b4e6290aae92fc46eeda9126a68a0ed6b88eee7f5c4c6fb491 labels: app: antrea component: antrea-controller diff --git a/build/yamls/antrea-eks.yml b/build/yamls/antrea-eks.yml index c7114acb05a..9440f99cac0 100644 --- a/build/yamls/antrea-eks.yml +++ b/build/yamls/antrea-eks.yml @@ -3891,6 +3891,7 @@ data: # Default MTU to use for the host gateway interface and the network interface of each Pod. # If omitted, antrea-agent will discover the MTU of the Node's primary interface and # also adjust MTU to accommodate for tunnel encapsulation overhead (if applicable). + # If the MTU is updated, the new value will only be applied to new workloads. defaultMTU: 0 # packetInRate defines the OVS controller packet rate limits for different @@ -5125,7 +5126,7 @@ spec: kubectl.kubernetes.io/default-container: antrea-agent # Automatically restart Pods with a RollingUpdate if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: cce7d6644fb552607ebeda9bf30a5fafa871dd4382afc609500fcb493b61768c + checksum/config: f950d38c3e5f05b4e6290aae92fc46eeda9126a68a0ed6b88eee7f5c4c6fb491 labels: app: antrea component: antrea-agent @@ -5364,7 +5365,7 @@ spec: annotations: # Automatically restart Pod if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: cce7d6644fb552607ebeda9bf30a5fafa871dd4382afc609500fcb493b61768c + checksum/config: f950d38c3e5f05b4e6290aae92fc46eeda9126a68a0ed6b88eee7f5c4c6fb491 labels: app: antrea component: antrea-controller diff --git a/build/yamls/antrea-gke.yml b/build/yamls/antrea-gke.yml index e152b21dd6b..bfe6727389a 100644 --- a/build/yamls/antrea-gke.yml +++ b/build/yamls/antrea-gke.yml @@ -3891,6 +3891,7 @@ data: # Default MTU to use for the host gateway interface and the network interface of each Pod. # If omitted, antrea-agent will discover the MTU of the Node's primary interface and # also adjust MTU to accommodate for tunnel encapsulation overhead (if applicable). + # If the MTU is updated, the new value will only be applied to new workloads. defaultMTU: 0 # packetInRate defines the OVS controller packet rate limits for different @@ -5125,7 +5126,7 @@ spec: kubectl.kubernetes.io/default-container: antrea-agent # Automatically restart Pods with a RollingUpdate if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: e30c52c9fcb04d362d018e846cf72dc633c5e891e02b3ebb87fab4d7ee08e15a + checksum/config: 5bab13c466e83f8a14191bfb9aad49229945c442808ea135f80cafe5e21be5f3 labels: app: antrea component: antrea-agent @@ -5361,7 +5362,7 @@ spec: annotations: # Automatically restart Pod if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: e30c52c9fcb04d362d018e846cf72dc633c5e891e02b3ebb87fab4d7ee08e15a + checksum/config: 5bab13c466e83f8a14191bfb9aad49229945c442808ea135f80cafe5e21be5f3 labels: app: antrea component: antrea-controller diff --git a/build/yamls/antrea-ipsec.yml b/build/yamls/antrea-ipsec.yml index fcbdb4d0b2f..06f43fc412f 100644 --- a/build/yamls/antrea-ipsec.yml +++ b/build/yamls/antrea-ipsec.yml @@ -3904,6 +3904,7 @@ data: # Default MTU to use for the host gateway interface and the network interface of each Pod. # If omitted, antrea-agent will discover the MTU of the Node's primary interface and # also adjust MTU to accommodate for tunnel encapsulation overhead (if applicable). + # If the MTU is updated, the new value will only be applied to new workloads. defaultMTU: 0 # packetInRate defines the OVS controller packet rate limits for different @@ -5138,7 +5139,7 @@ spec: kubectl.kubernetes.io/default-container: antrea-agent # Automatically restart Pods with a RollingUpdate if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: 73a49a9a8508cc8fb94eb2c770bb3589e68d9623327231943cba60a48716568a + checksum/config: 7212fbcdde8fe4be00f31ebbbcd7b03a7335666b4af245eed7dac1ba9e99118c checksum/ipsec-secret: d0eb9c52d0cd4311b6d252a951126bf9bea27ec05590bed8a394f0f792dcb2a4 labels: app: antrea @@ -5420,7 +5421,7 @@ spec: annotations: # Automatically restart Pod if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: 73a49a9a8508cc8fb94eb2c770bb3589e68d9623327231943cba60a48716568a + checksum/config: 7212fbcdde8fe4be00f31ebbbcd7b03a7335666b4af245eed7dac1ba9e99118c labels: app: antrea component: antrea-controller diff --git a/build/yamls/antrea.yml b/build/yamls/antrea.yml index 0640d1114a8..bf640fd537d 100644 --- a/build/yamls/antrea.yml +++ b/build/yamls/antrea.yml @@ -3891,6 +3891,7 @@ data: # Default MTU to use for the host gateway interface and the network interface of each Pod. # If omitted, antrea-agent will discover the MTU of the Node's primary interface and # also adjust MTU to accommodate for tunnel encapsulation overhead (if applicable). + # If the MTU is updated, the new value will only be applied to new workloads. defaultMTU: 0 # packetInRate defines the OVS controller packet rate limits for different @@ -5125,7 +5126,7 @@ spec: kubectl.kubernetes.io/default-container: antrea-agent # Automatically restart Pods with a RollingUpdate if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: 20130c4a5dbfeec75182bc3053288f64c06d0350b34c86675ac88d5961c47853 + checksum/config: 616b79b8deedba740ff992ca870b346c64c1dde5e3381436dc2cb24c0bd98ead labels: app: antrea component: antrea-agent @@ -5361,7 +5362,7 @@ spec: annotations: # Automatically restart Pod if the ConfigMap changes # See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments - checksum/config: 20130c4a5dbfeec75182bc3053288f64c06d0350b34c86675ac88d5961c47853 + checksum/config: 616b79b8deedba740ff992ca870b346c64c1dde5e3381436dc2cb24c0bd98ead labels: app: antrea component: antrea-controller diff --git a/docs/noencap-hybrid-modes.md b/docs/noencap-hybrid-modes.md index 0c96e121c44..e976c464101 100644 --- a/docs/noencap-hybrid-modes.md +++ b/docs/noencap-hybrid-modes.md @@ -34,6 +34,13 @@ spec: value: "true" ``` +Note that changing the traffic mode in an existing cluster, where Antrea is +currently installed or was previously installed, may require restarting existing +workloads. In particular, the choice of traffic mode has an impact on the MTU +value used for Pod network interfaces. When changing the traffic mode from +`NoEncap` to `Encap`, existing workloads should be restarted, so that new +network interfaces with a lower MTU value can be created. + ## Hybrid Mode Let us start from `Hybrid` mode which is simpler to configure. `Hybrid` mode diff --git a/pkg/agent/agent_linux.go b/pkg/agent/agent_linux.go index 8a5201c2019..95c5e6311d1 100644 --- a/pkg/agent/agent_linux.go +++ b/pkg/agent/agent_linux.go @@ -129,11 +129,16 @@ func (i *Initializer) ConnectUplinkToOVSBridge() error { externalIDs := map[string]interface{}{ interfacestore.AntreaInterfaceTypeKey: interfacestore.AntreaHost, } + // We request the same MTU for the bridge interface as for the uplink adapter. If we don't, + // OVS will default to the lowest MTU among all existing bridge ports, including container + // ports. There may be some existing workloads with a lower MTU, and using that lower value + // may impact host connectivity. bridgedUplinkName, exists, err := util.PrepareHostInterfaceConnection( i.ovsBridgeClient, uplinkNetConfig.Name, int32(i.nodeConfig.HostInterfaceOFPort), externalIDs, + i.nodeConfig.NodeTransportInterfaceMTU, ) if err != nil { return err diff --git a/pkg/agent/secondarynetwork/init_linux.go b/pkg/agent/secondarynetwork/init_linux.go index 136a2d27cc5..ed131773d60 100644 --- a/pkg/agent/secondarynetwork/init_linux.go +++ b/pkg/agent/secondarynetwork/init_linux.go @@ -48,6 +48,7 @@ func (c *Controller) Initialize() error { map[string]interface{}{ interfacestore.AntreaInterfaceTypeKey: interfacestore.AntreaHost, }, + 0, // do not request a specific MTU ) if err != nil { return err diff --git a/pkg/agent/util/net_linux.go b/pkg/agent/util/net_linux.go index d8f1ef0b901..6971d08dc59 100644 --- a/pkg/agent/util/net_linux.go +++ b/pkg/agent/util/net_linux.go @@ -412,6 +412,7 @@ func PrepareHostInterfaceConnection( ifaceName string, ifaceOFPort int32, externalIDs map[string]interface{}, + mtu int, ) (string, bool, error) { bridgedName := GenerateUplinkInterfaceName(ifaceName) // If the port already exists, just return. @@ -453,6 +454,12 @@ func PrepareHostInterfaceConnection( return "", false, fmt.Errorf("failed to set link up: %v", err) } + if mtu > 0 { + if err := bridge.SetInterfaceMTU(ifaceName, mtu); err != nil { + return "", false, fmt.Errorf("failed to set bridge interface MTU: %w", err) + } + } + // Check if interface is configured with an IPv6 address: if it is, we need to ensure that IPv6 // is enabled on the OVS internal port as we need to move all IP addresses over. for _, ip := range ifaceIPs { From 7c844cc816dec655eb516e3bfe6639fa328e7a2e Mon Sep 17 00:00:00 2001 From: Antonin Bas Date: Fri, 2 Aug 2024 10:38:22 -0700 Subject: [PATCH 2/2] Address review comments Signed-off-by: Antonin Bas --- pkg/agent/util/net_linux.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/agent/util/net_linux.go b/pkg/agent/util/net_linux.go index 6971d08dc59..564462a628f 100644 --- a/pkg/agent/util/net_linux.go +++ b/pkg/agent/util/net_linux.go @@ -404,8 +404,9 @@ func removeInterfaceAltName(name string, altName string) error { // PrepareHostInterfaceConnection prepares host interface connection to the OVS bridge client by: // 1. Renaming the host interface (a bridged suffix will be added to it). // 2. Creating an internal port (original name of the host interface will be used here). -// 3. Moving IPs of host interface to this new link/internal-port. -// 4. Moving routes of host interface to the new link/internal-port. +// 3. Set the MTU of this new link/internal-port to the provided mtu parameter value, unless mtu is zero. +// 4. Moving IPs of host interface to this new link/internal-port. +// 5. Moving routes of host interface to the new link/internal-port. // and returns the bridged name, true if it already exists, and error. func PrepareHostInterfaceConnection( bridge ovsconfig.OVSBridgeClient,