Skip to content

Commit

Permalink
Add support to set ANP realization failure (#4248)
Browse files Browse the repository at this point in the history
Signed-off-by: wenyingd <wenyingd@vmware.com>
  • Loading branch information
wenyingd committed Oct 18, 2022
1 parent 5e96c8c commit 6b51f29
Show file tree
Hide file tree
Showing 11 changed files with 359 additions and 168 deletions.
5 changes: 3 additions & 2 deletions pkg/agent/controller/networkpolicy/status_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,8 +197,9 @@ func (c *StatusController) syncHandler(uid types.UID) error {
},
Nodes: []v1beta2.NetworkPolicyNodeStatus{
{
NodeName: c.nodeName,
Generation: policy.Generation,
NodeName: c.nodeName,
Generation: policy.Generation,
RealizationFailure: false,
},
},
}
Expand Down
5 changes: 3 additions & 2 deletions pkg/agent/controller/networkpolicy/status_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,9 @@ func TestSyncStatusForNewPolicy(t *testing.T) {
},
Nodes: []v1beta2.NetworkPolicyNodeStatus{
{
NodeName: testNode1,
Generation: 1,
NodeName: testNode1,
Generation: 1,
RealizationFailure: false,
},
},
},
Expand Down
4 changes: 4 additions & 0 deletions pkg/apis/controlplane/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,10 @@ type NetworkPolicyNodeStatus struct {
NodeName string
// The generation realized by the Node.
Generation int64
// The flag to mark the NetworkPolicy realization is failed on the Node or not.
RealizationFailure bool
// The error message to describe why the NetworkPolicy realization is failed on the Node.
Message string
}

type GroupReference struct {
Expand Down
348 changes: 211 additions & 137 deletions pkg/apis/controlplane/v1beta2/generated.pb.go

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions pkg/apis/controlplane/v1beta2/generated.proto

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions pkg/apis/controlplane/v1beta2/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,10 @@ type NetworkPolicyNodeStatus struct {
NodeName string `json:"nodeName,omitempty" protobuf:"bytes,1,opt,name=nodeName"`
// The generation realized by the Node.
Generation int64 `json:"generation,omitempty" protobuf:"varint,2,opt,name=generation"`
// The flag to mark the NetworkPolicy realization is failed on the Node or not.
RealizationFailure bool `json:"realizationFailure" protobuf:"varint,3,opt,name=realizationFailure"`
// The error message to describe why the NetworkPolicy realization is failed on the Node.
Message string `json:"message,omitempty" protobuf:"bytes,4,opt,name=message"`
}

type GroupReference struct {
Expand Down
4 changes: 4 additions & 0 deletions pkg/apis/controlplane/v1beta2/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions pkg/apis/crd/v1alpha1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -342,9 +342,10 @@ const (

// These are valid conditions of a deployment.
const (
// NetworkPolicyConditionRealizable means the condition stores information about
// various realizable conditions of the NetworkPolicy.
// NetworkPolicyConditionRealizable reports whether the NetworkPolicy is realizable and the reasons why it is not.
NetworkPolicyConditionRealizable NetworkPolicyConditionType = "Realizable"
// NetworkPolicyConditionRealizationFailure reports information about a failure when realizing the NetworkPolicy on a Node.
NetworkPolicyConditionRealizationFailure NetworkPolicyConditionType = "RealizationFailure"
)

// NetworkPolicyCondition describes the state of a NetworkPolicy at a certain point.
Expand Down
16 changes: 16 additions & 0 deletions pkg/apiserver/openapi/zz_generated.openapi.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 36 additions & 6 deletions pkg/controller/networkpolicy/status_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ package networkpolicy

import (
"context"
"fmt"
"sort"
"strings"
"sync"
"time"

Expand Down Expand Up @@ -44,6 +47,13 @@ const (
statusControllerName = "NetworkPolicyStatusController"
)

var (
// maxConditionMessageLength defines the max length of the message field in one Condition. If the actual message
// length is over size, truncate the string and use "..." in the end.
// Use a variable for test.
maxConditionMessageLength = 100
)

// StatusController is responsible for synchronizing the status of Antrea ClusterNetworkPolicy and Antrea NetworkPolicy.
type StatusController struct {
// npControlInterface knows how to update Antrea NetworkPolicy status.
Expand Down Expand Up @@ -268,13 +278,13 @@ func (c *StatusController) syncHandler(key string) error {
}
internalNP := internalNPObj.(*antreatypes.NetworkPolicy)

updateStatus := func(phase crdv1alpha1.NetworkPolicyPhase, currentNodes, desiredNodes int) error {
updateStatus := func(phase crdv1alpha1.NetworkPolicyPhase, currentNodes, desiredNodes int, conditions []crdv1alpha1.NetworkPolicyCondition) error {
status := &crdv1alpha1.NetworkPolicyStatus{
Phase: phase,
ObservedGeneration: internalNP.Generation,
CurrentNodesRealized: int32(currentNodes),
DesiredNodesRealized: int32(desiredNodes),
Conditions: GenerateNetworkPolicyCondition(internalNP.SyncError),
Conditions: conditions,
}
klog.V(2).Infof("Updating NetworkPolicy %s status: %v", internalNP.SourceRef.ToString(), status)
if internalNP.SourceRef.Type == controlplane.AntreaNetworkPolicy {
Expand All @@ -283,38 +293,58 @@ func (c *StatusController) syncHandler(key string) error {
return c.npControlInterface.UpdateAntreaClusterNetworkPolicyStatus(internalNP.SourceRef.Name, status)
}

conditions := GenerateNetworkPolicyCondition(internalNP.SyncError)
// It means the NetworkPolicy has been processed, and marked as unrealizable. It will enter unrealizable phase
// instead of being further realized. Antrea-agents will not process further.
if internalNP.SyncError != nil {
return updateStatus(crdv1alpha1.NetworkPolicyPending, 0, 0)
return updateStatus(crdv1alpha1.NetworkPolicyPending, 0, 0, conditions)
}

// It means the NetworkPolicy hasn't been processed once. Set it to Pending to differentiate from NetworkPolicies
// that spans 0 Node.
if internalNP.SpanMeta.NodeNames == nil {
return updateStatus(crdv1alpha1.NetworkPolicyPending, 0, 0)
return updateStatus(crdv1alpha1.NetworkPolicyPending, 0, 0, conditions)
}

desiredNodes := len(internalNP.SpanMeta.NodeNames)
currentNodes := 0
statuses := c.getNodeStatuses(key)
failedNodes := make([]string, 0)
for _, status := range statuses {
// The node is no longer in the span of this policy, delete its status.
if !internalNP.NodeNames.Has(status.NodeName) {
c.deleteNodeStatus(key, status.NodeName)
continue
}
if status.Generation == internalNP.Generation {
currentNodes += 1
if !status.RealizationFailure {
currentNodes += 1
} else {
failedNodes = append(failedNodes, fmt.Sprintf(`"%s":"%s"`, status.NodeName, status.Message))
}
}
}
if len(failedNodes) > 0 {
sort.Strings(failedNodes)
failureMessage := fmt.Sprintf("Failed Nodes count %d: %s", len(failedNodes), strings.Join(failedNodes, ", "))
if len(failureMessage) > maxConditionMessageLength {
failureMessage = fmt.Sprintf("%s...", failureMessage[:maxConditionMessageLength])
}
conditions = append(conditions, crdv1alpha1.NetworkPolicyCondition{
Type: crdv1alpha1.NetworkPolicyConditionRealizationFailure,
Status: v1.ConditionTrue,
LastTransitionTime: v1.Now(),
Reason: "NetworkPolicyRealizationFailedOnNode",
Message: failureMessage,
})
}

phase := crdv1alpha1.NetworkPolicyRealizing
if currentNodes == desiredNodes {
phase = crdv1alpha1.NetworkPolicyRealized
}

return updateStatus(phase, currentNodes, desiredNodes)
return updateStatus(phase, currentNodes, desiredNodes, conditions)
}

// networkPolicyControlInterface is an interface that knows how to update Antrea NetworkPolicy status.
Expand Down
88 changes: 69 additions & 19 deletions pkg/controller/networkpolicy/status_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,15 +97,21 @@ func newInternalNetworkPolicy(name string, generation int64, nodes []string, ref
}
}

func newNetworkPolicyStatus(name string, nodeName string, generation int64) *controlplane.NetworkPolicyStatus {
func newNetworkPolicyStatus(name string, nodeName string, generation int64, errorMessage string) *controlplane.NetworkPolicyStatus {
failed := false
if errorMessage != "" {
failed = true
}
return &controlplane.NetworkPolicyStatus{
ObjectMeta: v1.ObjectMeta{
Name: name,
},
Nodes: []controlplane.NetworkPolicyNodeStatus{
{
NodeName: nodeName,
Generation: generation,
NodeName: nodeName,
Generation: generation,
RealizationFailure: failed,
Message: errorMessage,
},
},
}
Expand Down Expand Up @@ -144,6 +150,19 @@ func newAntreaClusterNetworkPolicyReference(name string) *controlplane.NetworkPo
}
}

func generateRealizationFailureConditions(failedNodeCount int, failedNodeDetails string) []crdv1alpha1.NetworkPolicyCondition {
conditions := GenerateNetworkPolicyCondition(nil)
failureMessage := fmt.Sprintf("Failed Nodes count %d: %s", failedNodeCount, failedNodeDetails)
conditions = append(conditions, crdv1alpha1.NetworkPolicyCondition{
Type: crdv1alpha1.NetworkPolicyConditionRealizationFailure,
Status: v1.ConditionTrue,
LastTransitionTime: v1.Now(),
Reason: "NetworkPolicyRealizationFailedOnNode",
Message: failureMessage,
})
return conditions
}

func TestCreateAntreaNetworkPolicy(t *testing.T) {
tests := []struct {
name string
Expand Down Expand Up @@ -180,10 +199,10 @@ func TestCreateAntreaNetworkPolicy(t *testing.T) {
newInternalNetworkPolicy("cnp1", 3, []string{"node1", "node2"}, newAntreaClusterNetworkPolicyReference("cnp1")),
},
collectedNetworkPolicyStatus: []*controlplane.NetworkPolicyStatus{
newNetworkPolicyStatus("anp1", "node1", 1),
newNetworkPolicyStatus("anp1", "node2", 2),
newNetworkPolicyStatus("cnp1", "node1", 2),
newNetworkPolicyStatus("cnp1", "node2", 3),
newNetworkPolicyStatus("anp1", "node1", 1, ""),
newNetworkPolicyStatus("anp1", "node2", 2, ""),
newNetworkPolicyStatus("cnp1", "node1", 2, ""),
newNetworkPolicyStatus("cnp1", "node2", 3, ""),
},
expectedANPStatus: &crdv1alpha1.NetworkPolicyStatus{
Phase: crdv1alpha1.NetworkPolicyRealizing,
Expand All @@ -207,10 +226,10 @@ func TestCreateAntreaNetworkPolicy(t *testing.T) {
newInternalNetworkPolicy("cnp1", 4, []string{"node1", "node2"}, newAntreaClusterNetworkPolicyReference("cnp1")),
},
collectedNetworkPolicyStatus: []*controlplane.NetworkPolicyStatus{
newNetworkPolicyStatus("anp1", "node1", 3),
newNetworkPolicyStatus("anp1", "node2", 3),
newNetworkPolicyStatus("cnp1", "node1", 4),
newNetworkPolicyStatus("cnp1", "node2", 4),
newNetworkPolicyStatus("anp1", "node1", 3, ""),
newNetworkPolicyStatus("anp1", "node2", 3, ""),
newNetworkPolicyStatus("cnp1", "node1", 4, ""),
newNetworkPolicyStatus("cnp1", "node2", 4, ""),
},
expectedANPStatus: &crdv1alpha1.NetworkPolicyStatus{
Phase: crdv1alpha1.NetworkPolicyRealized,
Expand All @@ -227,7 +246,38 @@ func TestCreateAntreaNetworkPolicy(t *testing.T) {
Conditions: GenerateNetworkPolicyCondition(nil),
},
},
{
name: "failed realized",
networkPolicy: []*types.NetworkPolicy{
newInternalNetworkPolicy("anp1", 4, []string{"node1", "node2"}, newAntreaNetworkPolicyReference("ns1", "anp1")),
newInternalNetworkPolicy("cnp1", 5, []string{"node1", "node2"}, newAntreaClusterNetworkPolicyReference("cnp1")),
},
collectedNetworkPolicyStatus: []*controlplane.NetworkPolicyStatus{
newNetworkPolicyStatus("anp1", "node1", 4, "agent failure"),
newNetworkPolicyStatus("anp1", "node2", 4, ""),
newNetworkPolicyStatus("cnp1", "node1", 5, "agent failure"),
newNetworkPolicyStatus("cnp1", "node2", 5, "agent crash"),
},
expectedANPStatus: &crdv1alpha1.NetworkPolicyStatus{
Phase: crdv1alpha1.NetworkPolicyRealizing,
ObservedGeneration: 4,
CurrentNodesRealized: 1,
DesiredNodesRealized: 2,
Conditions: generateRealizationFailureConditions(1, `"node1":"agent failure"`),
},
expectedCNPStatus: &crdv1alpha1.NetworkPolicyStatus{
Phase: crdv1alpha1.NetworkPolicyRealizing,
ObservedGeneration: 5,
CurrentNodesRealized: 0,
DesiredNodesRealized: 2,
Conditions: generateRealizationFailureConditions(2, `"node1":"agent failure"...`),
},
},
}
maxConditionMessageLength = 45
defer func() {
maxConditionMessageLength = 100
}()
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var initObjects []runtime.Object
Expand Down Expand Up @@ -266,11 +316,11 @@ func TestUpdateAntreaNetworkPolicy(t *testing.T) {

networkPolicyStore.Create(anp1)
networkPolicyStore.Create(cnp1)
statusController.UpdateStatus(newNetworkPolicyStatus("anp1", "node1", 1))
statusController.UpdateStatus(newNetworkPolicyStatus("anp1", "node2", 1))
statusController.UpdateStatus(newNetworkPolicyStatus("cnp1", "node3", 2))
statusController.UpdateStatus(newNetworkPolicyStatus("cnp1", "node4", 2))
statusController.UpdateStatus(newNetworkPolicyStatus("cnp1", "node5", 2))
statusController.UpdateStatus(newNetworkPolicyStatus("anp1", "node1", 1, ""))
statusController.UpdateStatus(newNetworkPolicyStatus("anp1", "node2", 1, ""))
statusController.UpdateStatus(newNetworkPolicyStatus("cnp1", "node3", 2, ""))
statusController.UpdateStatus(newNetworkPolicyStatus("cnp1", "node4", 2, ""))
statusController.UpdateStatus(newNetworkPolicyStatus("cnp1", "node5", 2, ""))
// TODO: Use a determinate mechanism.
time.Sleep(500 * time.Millisecond)
assert.True(t, NetworkPolicyStatusEqual(crdv1alpha1.NetworkPolicyStatus{
Expand Down Expand Up @@ -321,8 +371,8 @@ func TestDeleteAntreaNetworkPolicy(t *testing.T) {

networkPolicyStore.Create(initialNetworkPolicy)
statuses := []*controlplane.NetworkPolicyStatus{
newNetworkPolicyStatus("anp1", "node1", 1),
newNetworkPolicyStatus("anp1", "node2", 1),
newNetworkPolicyStatus("anp1", "node1", 1, ""),
newNetworkPolicyStatus("anp1", "node2", 1, ""),
}
for _, status := range statuses {
statusController.UpdateStatus(status)
Expand All @@ -348,7 +398,7 @@ func BenchmarkSyncHandler(b *testing.B) {

networkPolicyStore.Create(networkPolicy)
for _, node := range nodes {
statusController.UpdateStatus(newNetworkPolicyStatus("anp1", node, 1))
statusController.UpdateStatus(newNetworkPolicyStatus("anp1", node, 1, ""))
}

b.ReportAllocs()
Expand Down

0 comments on commit 6b51f29

Please sign in to comment.