Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add master resync period configurability #1139

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion cmd/nfd-master/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"flag"
"fmt"
"os"
"time"

"k8s.io/klog/v2"

Expand Down Expand Up @@ -67,6 +68,8 @@ func main() {
args.Overrides.EnableTaints = overrides.EnableTaints
case "no-publish":
args.Overrides.NoPublish = overrides.NoPublish
case "-resync-period":
args.Overrides.ResyncPeriod = overrides.ResyncPeriod
}
})

Expand Down Expand Up @@ -131,6 +134,7 @@ func initFlags(flagset *flag.FlagSet) (*master.Args, *master.ConfigOverrideArgs)
DenyLabelNs: &utils.StringSetVal{},
ExtraLabelNs: &utils.StringSetVal{},
ResourceLabels: &utils.StringSetVal{},
ResyncPeriod: &utils.DurationVal{Duration: time.Duration(1) * time.Hour},
}
flagset.Var(overrides.ExtraLabelNs, "extra-label-ns",
"Comma separated list of allowed extra label namespaces")
Expand All @@ -145,6 +149,8 @@ func initFlags(flagset *flag.FlagSet) (*master.Args, *master.ConfigOverrideArgs)
"Comma separated list of denied label namespaces")
flagset.Var(overrides.ResourceLabels, "resource-labels",
"Comma separated list of labels to be exposed as extended resources. DEPRECATED: use NodeFeatureRule objects instead")

flagset.Var(overrides.ResyncPeriod, "resync-period",
"Specify the NFD API controller resync period."+
"It has an effect when the NodeFeature API has been enabled (with -enable-nodefeature-api).")
return args, overrides
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
# resourceLabels: ["vendor-1.com/feature-1","vendor-2.io/feature-2"]
# enableTaints: false
# labelWhiteList: "foo"
# resyncPeriod: "2h"
3 changes: 3 additions & 0 deletions deployment/helm/node-feature-discovery/templates/master.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ spec:
{{- if .Values.master.featureRulesController | kindIs "invalid" | not }}
- "-featurerules-controller={{ .Values.master.featureRulesController }}"
{{- end }}
{{- if .Values.master.resyncPeriod }}
- "-resync-period={{ .Values.master.resyncPeriod }}"
{{- end }}
{{- if .Values.tls.enable }}
- "-ca-file=/etc/kubernetes/node-feature-discovery/certs/ca.crt"
- "-key-file=/etc/kubernetes/node-feature-discovery/certs/tls.key"
Expand Down
2 changes: 2 additions & 0 deletions deployment/helm/node-feature-discovery/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@ master:
# resourceLabels: ["vendor-1.com/feature-1","vendor-2.io/feature-2"]
# enableTaints: false
# labelWhiteList: "foo"
# resyncPeriod: "2h"
### <NFD-MASTER-CONF-END-DO-NOT-REMOVE>
# The TCP port that nfd-master listens for incoming requests. Default: 8080
port: 8080
instance:
featureApi:
resyncPeriod:
denyLabelNs: []
extraLabelNs: []
resourceLabels: []
Expand Down
1 change: 1 addition & 0 deletions docs/deployment/helm.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ We have introduced the following Chart parameters.
| `master.*` | dict | | NFD master deployment configuration |
| `master.port` | integer | | Specifies the TCP port that nfd-master listens for incoming requests. |
| `master.instance` | string | | Instance name. Used to separate annotation namespaces for multiple parallel deployments |
| `master.resyncPeriod` | string | | NFD API controller resync period. |
| `master.extraLabelNs` | array | [] | List of allowed extra label namespaces |
| `master.resourceLabels` | array | [] | List of labels to be registered as extended resources |
| `master.enableTaints` | bool | false | Specifies whether to enable or disable node tainting |
Expand Down
17 changes: 17 additions & 0 deletions docs/reference/master-commandline-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -376,3 +376,20 @@ Default: 0
Comma-separated list of `pattern=N` settings for file-filtered logging.

Default: *empty*

### -resync-period

The `-resync-period` flag specifies the NFD API controller resync period.
The resync means nfd-master replaying all NodeFeature and NodeFeatureRule objects,
thus effectively re-syncing all nodes in the cluster (i.e. ensuring labels, annotations,
extended resources and taints are in place).
Only has effect when the [NodeFeature](../usage/custom-resources.md#nodefeature)
CRD API has been enabled with [`-enable-nodefeature-api`](master-commandline-reference.md#-enable-nodefeature-api).

Default: 1 hour.

Example:

```bash
nfd-master -resync-period=2h
```
17 changes: 17 additions & 0 deletions docs/reference/master-configuration-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,20 @@ Example:
```yaml
labelWhiteList: "foo"
```

### resyncPeriod

The `resyncPeriod` option specifies the NFD API controller resync period.
The resync means nfd-master replaying all NodeFeature and NodeFeatureRule objects,
thus effectively re-syncing all nodes in the cluster (i.e. ensuring labels, annotations,
extended resources and taints are in place).
Only has effect when the [NodeFeature](../usage/custom-resources.md#nodefeature)
CRD API has been enabled with [`-enable-nodefeature-api`](master-commandline-reference.md#-enable-nodefeature-api).

Default: 1 hour.

Example:

```yaml
resyncPeriod=2h
```
18 changes: 11 additions & 7 deletions pkg/nfd-master/nfd-api-controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,19 +41,23 @@ type nfdController struct {
updateOneNodeChan chan string
}

func newNfdController(config *restclient.Config, disableNodeFeature bool) (*nfdController, error) {
type nfdApiControllerOptions struct {
disableNodeFeature bool
resyncPeriod time.Duration
}

func newNfdController(config *restclient.Config, nfdApiControllerOptions nfdApiControllerOptions) (*nfdController, error) {
c := &nfdController{
stopChan: make(chan struct{}, 1),
updateAllNodesChan: make(chan struct{}, 1),
updateOneNodeChan: make(chan string),
}

nfdClient := nfdclientset.NewForConfigOrDie(config)

informerFactory := nfdinformers.NewSharedInformerFactory(nfdClient, 1*time.Hour)
informerFactory := nfdinformers.NewSharedInformerFactory(nfdClient, nfdApiControllerOptions.resyncPeriod)

// Add informer for NodeFeature objects
if !disableNodeFeature {
if !nfdApiControllerOptions.disableNodeFeature {
featureInformer := informerFactory.Nfd().V1alpha1().NodeFeatures()
if _, err := featureInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
Expand Down Expand Up @@ -83,23 +87,23 @@ func newNfdController(config *restclient.Config, disableNodeFeature bool) (*nfdC
AddFunc: func(object interface{}) {
key, _ := cache.MetaNamespaceKeyFunc(object)
klog.V(2).Infof("NodeFeatureRule %v added", key)
if !disableNodeFeature {
if !nfdApiControllerOptions.disableNodeFeature {
c.updateAllNodes()
}
// else: rules will be processed only when gRPC requests are received
},
UpdateFunc: func(oldObject, newObject interface{}) {
key, _ := cache.MetaNamespaceKeyFunc(newObject)
klog.V(2).Infof("NodeFeatureRule %v updated", key)
if !disableNodeFeature {
if !nfdApiControllerOptions.disableNodeFeature {
c.updateAllNodes()
}
// else: rules will be processed only when gRPC requests are received
},
DeleteFunc: func(object interface{}) {
key, _ := cache.MetaNamespaceKeyFunc(object)
klog.V(2).Infof("NodeFeatureRule %v deleted", key)
if !disableNodeFeature {
if !nfdApiControllerOptions.disableNodeFeature {
c.updateAllNodes()
}
// else: rules will be processed only when gRPC requests are received
Expand Down
8 changes: 8 additions & 0 deletions pkg/nfd-master/nfd-master-internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -640,24 +640,32 @@ extraLabelNs: ["added.ns.io"]
// Update config and verify the effect
writeConfig(`
extraLabelNs: ["override.ns.io"]
resyncPeriod: '2h'
`)
So(func() interface{} { return master.config.ExtraLabelNs },
withTimeout, 2*time.Second, ShouldResemble, utils.StringSetVal{"override.ns.io": struct{}{}})
So(func() interface{} { return master.config.ResyncPeriod.Duration },
withTimeout, 2*time.Second, ShouldResemble, time.Duration(2)*time.Hour)

// Removing config file should get back our defaults
err = os.RemoveAll(tmpDir)
So(err, ShouldBeNil)
So(func() interface{} { return master.config.ExtraLabelNs },
withTimeout, 2*time.Second, ShouldResemble, utils.StringSetVal{})
So(func() interface{} { return master.config.ResyncPeriod.Duration },
withTimeout, 2*time.Second, ShouldResemble, time.Duration(1)*time.Hour)

// Re-creating config dir and file should change the config
err = os.MkdirAll(configDir, 0755)
So(err, ShouldBeNil)
writeConfig(`
extraLabelNs: ["another.override.ns"]
resyncPeriod: '3m'
`)
So(func() interface{} { return master.config.ExtraLabelNs },
withTimeout, 2*time.Second, ShouldResemble, utils.StringSetVal{"another.override.ns": struct{}{}})
So(func() interface{} { return master.config.ResyncPeriod.Duration },
withTimeout, 2*time.Second, ShouldResemble, time.Duration(3)*time.Minute)
})
})
}
Expand Down
12 changes: 11 additions & 1 deletion pkg/nfd-master/nfd-master.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ type NFDConfig struct {
NoPublish bool
ResourceLabels utils.StringSetVal
EnableTaints bool
ResyncPeriod utils.DurationVal
}

// ConfigOverrideArgs are args that override config file options
Expand All @@ -80,6 +81,7 @@ type ConfigOverrideArgs struct {
ResourceLabels *utils.StringSetVal
EnableTaints *bool
NoPublish *bool
ResyncPeriod *utils.DurationVal
}

// Args holds command line arguments
Expand All @@ -96,6 +98,7 @@ type Args struct {
Prune bool
VerifyNodeName bool
Options string
ResyncPeriod utils.DurationVal

Overrides ConfigOverrideArgs
}
Expand Down Expand Up @@ -172,6 +175,7 @@ func newDefaultConfig() *NFDConfig {
NoPublish: false,
ResourceLabels: utils.StringSetVal{},
EnableTaints: false,
ResyncPeriod: utils.DurationVal{Duration: time.Duration(1) * time.Hour},
}
}

Expand All @@ -195,7 +199,10 @@ func (m *nfdMaster) Run() error {
return err
}
klog.Info("starting nfd api controller")
m.nfdController, err = newNfdController(kubeconfig, !m.args.EnableNodeFeatureApi)
m.nfdController, err = newNfdController(kubeconfig, nfdApiControllerOptions{
disableNodeFeature: !m.args.EnableNodeFeatureApi,
resyncPeriod: m.args.ResyncPeriod.Duration,
})
if err != nil {
return fmt.Errorf("failed to initialize CRD controller: %w", err)
}
Expand Down Expand Up @@ -1112,6 +1119,9 @@ func (m *nfdMaster) configure(filepath string, overrides string) error {
if m.args.Overrides.LabelWhiteList != nil {
c.LabelWhiteList = *m.args.Overrides.LabelWhiteList
}
if m.args.Overrides.ResyncPeriod != nil {
c.ResyncPeriod = *m.args.Overrides.ResyncPeriod
}

m.config = c
if !c.NoPublish {
Expand Down
31 changes: 3 additions & 28 deletions pkg/nfd-worker/nfd-worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ type coreConfig struct {
FeatureSources []string
Sources *[]string
LabelSources []string
SleepInterval duration
SleepInterval utils.DurationVal
}

type sourcesConfig map[string]source.Config
Expand Down Expand Up @@ -127,10 +127,6 @@ type nfdWorker struct {
labelSources []source.LabelSource
}

type duration struct {
time.Duration
}

// This ticker can represent infinite and normal intervals.
type infiniteTicker struct {
*time.Ticker
Expand Down Expand Up @@ -169,7 +165,7 @@ func newDefaultConfig() *NFDConfig {
return &NFDConfig{
Core: coreConfig{
LabelWhiteList: utils.RegexpVal{Regexp: *regexp.MustCompile("")},
SleepInterval: duration{60 * time.Second},
SleepInterval: utils.DurationVal{Duration: 60 * time.Second},
marquiz marked this conversation as resolved.
Show resolved Hide resolved
FeatureSources: []string{"all"},
LabelSources: []string{"all"},
Klog: make(map[string]string),
Expand Down Expand Up @@ -369,7 +365,7 @@ func (c *coreConfig) sanitize() {
if c.SleepInterval.Duration > 0 && c.SleepInterval.Duration < time.Second {
klog.Warningf("too short sleep interval specified (%s), forcing to 1s",
c.SleepInterval.Duration.String())
c.SleepInterval = duration{time.Second}
c.SleepInterval = utils.DurationVal{Duration: time.Second}
}
}

Expand Down Expand Up @@ -756,27 +752,6 @@ func (m *nfdWorker) getNfdClient() (*nfdclient.Clientset, error) {
return c, nil
}

// UnmarshalJSON implements the Unmarshaler interface from "encoding/json"
func (d *duration) UnmarshalJSON(data []byte) error {
var v interface{}
if err := json.Unmarshal(data, &v); err != nil {
return err
}
switch val := v.(type) {
case float64:
d.Duration = time.Duration(val)
case string:
var err error
d.Duration, err = time.ParseDuration(val)
if err != nil {
return err
}
default:
return fmt.Errorf("invalid duration %s", data)
}
return nil
}

// UnmarshalJSON implements the Unmarshaler interface from "encoding/json"
func (c *sourcesConfig) UnmarshalJSON(data []byte) error {
// First do a raw parse to get the per-source data
Expand Down
37 changes: 37 additions & 0 deletions pkg/utils/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"regexp"
"sort"
"strings"
"time"
)

// RegexpVal is a wrapper for regexp command line flags
Expand Down Expand Up @@ -173,3 +174,39 @@ func NewKlogFlagVal(f *flag.Flag) *KlogFlagVal {
type boolFlag interface {
IsBoolFlag() bool
}

// DurationVal is a wrapper for handling time.Duration as a command line flag
type DurationVal struct {
AhmedGrati marked this conversation as resolved.
Show resolved Hide resolved
time.Duration
}

// UnmarshalJSON implements the Unmarshaler interface from "encoding/json"
func (d *DurationVal) UnmarshalJSON(data []byte) error {
var v interface{}
if err := json.Unmarshal(data, &v); err != nil {
return err
}
switch val := v.(type) {
case float64:
d.Duration = time.Duration(val)
case string:
var err error
d.Duration, err = time.ParseDuration(val)
if err != nil {
return err
}
default:
return fmt.Errorf("invalid duration %s", data)
}
return nil
}

// Set implements the flag.Value interface
func (d *DurationVal) Set(val string) error {
m, err := time.ParseDuration(val)
if err != nil {
return err
}
*d = DurationVal{m}
return nil
}