Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Store NetworkPolicy in filesystem as fallback data source #5739

Merged
merged 1 commit into from
Nov 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmd/antrea-agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"net"
"time"

"github.com/spf13/afero"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/util/sets"
Expand Down Expand Up @@ -459,6 +460,7 @@ func run(o *Options) error {
antreaClientProvider,
ofClient,
ifaceStore,
afero.NewOsFs(),
nodeKey,
podUpdateChannel,
externalEntityUpdateChannel,
Expand Down
32 changes: 26 additions & 6 deletions pkg/agent/controller/networkpolicy/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -551,13 +551,14 @@ func (c *ruleCache) addAddressGroupLocked(group *v1beta.AddressGroup) error {

// PatchAddressGroup updates a cached *v1beta.AddressGroup.
// The rules referencing it will be regarded as dirty.
func (c *ruleCache) PatchAddressGroup(patch *v1beta.AddressGroupPatch) error {
// It returns a copy of the patched AddressGroup, or an error if the AddressGroup doesn't exist.
func (c *ruleCache) PatchAddressGroup(patch *v1beta.AddressGroupPatch) (*v1beta.AddressGroup, error) {
c.addressSetLock.Lock()
defer c.addressSetLock.Unlock()

groupMemberSet, exists := c.addressSetByGroup[patch.Name]
if !exists {
return fmt.Errorf("AddressGroup %v doesn't exist in cache, can't be patched", patch.Name)
return nil, fmt.Errorf("AddressGroup %v doesn't exist in cache, can't be patched", patch.Name)
}
for i := range patch.AddedGroupMembers {
groupMemberSet.Insert(&patch.AddedGroupMembers[i])
Expand All @@ -567,7 +568,16 @@ func (c *ruleCache) PatchAddressGroup(patch *v1beta.AddressGroupPatch) error {
}

c.onAddressGroupUpdate(patch.Name)
return nil

members := make([]v1beta.GroupMember, 0, len(groupMemberSet))
for _, member := range groupMemberSet {
members = append(members, *member)
}
group := &v1beta.AddressGroup{
ObjectMeta: patch.ObjectMeta,
GroupMembers: members,
}
return group, nil
}

// DeleteAddressGroup deletes a cached *v1beta.AddressGroup.
Expand Down Expand Up @@ -639,13 +649,14 @@ func (c *ruleCache) addAppliedToGroupLocked(group *v1beta.AppliedToGroup) error

// PatchAppliedToGroup updates a cached *v1beta.AppliedToGroupPatch.
// The rules referencing it will be regarded as dirty.
func (c *ruleCache) PatchAppliedToGroup(patch *v1beta.AppliedToGroupPatch) error {
// It returns a copy of the patched AppliedToGroup, or an error if the AppliedToGroup doesn't exist.
func (c *ruleCache) PatchAppliedToGroup(patch *v1beta.AppliedToGroupPatch) (*v1beta.AppliedToGroup, error) {
c.appliedToSetLock.Lock()
defer c.appliedToSetLock.Unlock()

memberSet, exists := c.appliedToSetByGroup[patch.Name]
if !exists {
return fmt.Errorf("AppliedToGroup %v doesn't exist in cache, can't be patched", patch.Name)
return nil, fmt.Errorf("AppliedToGroup %v doesn't exist in cache, can't be patched", patch.Name)
}
for i := range patch.AddedGroupMembers {
memberSet.Insert(&patch.AddedGroupMembers[i])
Expand All @@ -654,7 +665,16 @@ func (c *ruleCache) PatchAppliedToGroup(patch *v1beta.AppliedToGroupPatch) error
memberSet.Delete(&patch.RemovedGroupMembers[i])
}
c.onAppliedToGroupUpdate(patch.Name)
return nil

members := make([]v1beta.GroupMember, 0, len(memberSet))
for _, member := range memberSet {
members = append(members, *member)
}
group := &v1beta.AppliedToGroup{
ObjectMeta: patch.ObjectMeta,
GroupMembers: members,
}
return group, nil
}

// DeleteAppliedToGroup deletes a cached *v1beta.AppliedToGroup.
Expand Down
10 changes: 8 additions & 2 deletions pkg/agent/controller/networkpolicy/cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1039,7 +1039,7 @@ func TestRuleCachePatchAppliedToGroup(t *testing.T) {
for _, rule := range tt.rules {
c.rules.Add(rule)
}
err := c.PatchAppliedToGroup(tt.args)
ret, err := c.PatchAppliedToGroup(tt.args)
if (err == nil) == tt.expectedErr {
t.Fatalf("Got error %v, expected %t", err, tt.expectedErr)
}
Expand All @@ -1048,6 +1048,9 @@ func TestRuleCachePatchAppliedToGroup(t *testing.T) {
}
actualPods, _ := c.appliedToSetByGroup[tt.args.Name]
assert.ElementsMatch(t, tt.expectedPods, actualPods.Items(), "stored Pods not equal")
if !tt.expectedErr {
assert.Equal(t, len(ret.GroupMembers), len(actualPods))
}
})
}
}
Expand Down Expand Up @@ -1116,7 +1119,7 @@ func TestRuleCachePatchAddressGroup(t *testing.T) {
for _, rule := range tt.rules {
c.rules.Add(rule)
}
err := c.PatchAddressGroup(tt.args)
ret, err := c.PatchAddressGroup(tt.args)
if (err == nil) == tt.expectedErr {
t.Fatalf("Got error %v, expected %t", err, tt.expectedErr)
}
Expand All @@ -1125,6 +1128,9 @@ func TestRuleCachePatchAddressGroup(t *testing.T) {
}
actualAddresses, _ := c.addressSetByGroup[tt.args.Name]
assert.ElementsMatch(t, tt.expectedAddresses, actualAddresses.Items(), "stored addresses not equal")
if !tt.expectedErr {
assert.Equal(t, len(ret.GroupMembers), len(actualAddresses))
}
})
}
}
Expand Down
134 changes: 134 additions & 0 deletions pkg/agent/controller/networkpolicy/filestore.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
// Copyright 2023 Antrea Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package networkpolicy

import (
"fmt"
"io"
"io/fs"
"os"
"path/filepath"

"github.com/spf13/afero"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/klog/v2"
)

// fileStore encodes and stores runtime.Objects in files. Each object will be stored in a separate file under the given
// directory.
type fileStore struct {
fs afero.Fs
// The directory to store the files.
dir string
// serializer knows how to encode and decode the objects.
serializer runtime.Serializer
}

func newFileStore(fs afero.Fs, dir string, serializer runtime.Serializer) (*fileStore, error) {
s := &fileStore{
fs: fs,
dir: dir,
serializer: serializer,
}
Dyanngg marked this conversation as resolved.
Show resolved Hide resolved
klog.V(2).InfoS("Creating directory for NetworkPolicy cache", "dir", dir)
if err := s.fs.MkdirAll(dir, 0o600); err != nil {
return nil, err
}
return s, nil
}

// save stores the given object in file with the object's UID as the file name, overwriting any existing content if the
// file already exists. Note the method may update the object's GroupVersionKind in-place during serialization.
func (s fileStore) save(item runtime.Object) error {
object := item.(metav1.Object)
path := filepath.Join(s.dir, string(object.GetUID()))
file, err := s.fs.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o600)
if err != nil {
return fmt.Errorf("error opening file for writing object %v: %w", object.GetUID(), err)
}
defer file.Close()
// Encode may update the object's GroupVersionKind in-place during serialization.
err = s.serializer.Encode(item, file)
if err != nil {
return fmt.Errorf("error writing object %v to file: %w", object.GetUID(), err)
}
return nil
}

// delete removes the file with the object's UID as the file name if it exists.
func (s fileStore) delete(item runtime.Object) error {
object := item.(metav1.Object)
path := filepath.Join(s.dir, string(object.GetUID()))
err := s.fs.Remove(path)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
return nil
}

// replaceAll replaces all files under the directory with the given objects. Existing files not in the given objects
// will be removed. Note the method may update the object's GroupVersionKind in-place during serialization.
func (s fileStore) replaceAll(items []runtime.Object) error {
if err := s.fs.RemoveAll(s.dir); err != nil {
return err
}
if err := s.fs.MkdirAll(s.dir, 0o600); err != nil {
return err
}
for _, item := range items {
if err := s.save(item); err != nil {
return err
}
}
return nil
}

func (s fileStore) loadAll() ([]runtime.Object, error) {
var objects []runtime.Object
err := afero.Walk(s.fs, s.dir, func(path string, info fs.FileInfo, err error) error {
if info.IsDir() {
return nil
}
file, err2 := s.fs.Open(path)
if err2 != nil {
return err2
}
defer file.Close()
data, err2 := io.ReadAll(file)
if err2 != nil {
return err2
}

object, gkv, err2 := s.serializer.Decode(data, nil, nil)
// If the data is corrupted somehow, we still want to load other data and continue the process.
if err2 != nil {
klog.ErrorS(err2, "Failed to decode data from file, ignore it", "file", path)
return nil
}
// Note: we haven't stored a different version so far but version conversion should be performed when the used
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we add a note or todo in the apis/controlplane package then? But I'm also wondering if we make sure that the appliedToGroup etc. structs are backward compatible (we kind of have to at this point since they're v1b2 already), maybe a version conversion is not strictly needed, except for the downgrade case where the agent restarts with a lower version.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree this could be overlooked but I feel no much difference where the note is added. To ensure backwards compatability, I added an unit test case, "compatible with v1beta2": If a PR changes the used version but doesn't take care of converting the stored version to the used one, the test would fail.

There is no extra requirement on the conversion between the storage version and the used version compared with what we do for API versions. As long as antrea-controller can talk with old agent using N-1 API and new agent using N API, the new agent can also get N-1 version data from files and convert them to N version.

Whether it really needs to do conversion will depend on how the API evolves, the PR just ensures the version information is stored so we know how to convert them when required.

The cost of conversion wouldn't be a problem. In the worst case each agent just does the conversion once and only when the API version changes and the controller happens to be unavailable.

// version is upgraded in the future.
klog.V(2).InfoS("Loaded object from file", "gkv", gkv, "object", object)
objects = append(objects, object)
return nil
})
if err != nil {
return nil, err
}
return objects, nil
}
Loading
Loading