From a31f695426822d08acf1a14fc56fb931d4177072 Mon Sep 17 00:00:00 2001 From: lminzhw Date: Thu, 30 May 2019 10:50:38 +0800 Subject: [PATCH 1/6] limit the panic in resource_info --- cmd/kube-batch/main.go | 3 +++ pkg/scheduler/api/resource_info.go | 28 +++++++++----------- pkg/scheduler/util/assert/assert.go | 41 +++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 15 deletions(-) create mode 100644 pkg/scheduler/util/assert/assert.go diff --git a/cmd/kube-batch/main.go b/cmd/kube-batch/main.go index cf419ebfb6..60bfabd286 100644 --- a/cmd/kube-batch/main.go +++ b/cmd/kube-batch/main.go @@ -36,6 +36,9 @@ import ( // Import default actions/plugins. _ "github.com/kubernetes-sigs/kube-batch/pkg/scheduler/actions" _ "github.com/kubernetes-sigs/kube-batch/pkg/scheduler/plugins" + + // init assert + _ "github.com/kubernetes-sigs/kube-batch/pkg/scheduler/util/assert" ) var logFlushFreq = pflag.Duration("log-flush-frequency", 5*time.Second, "Maximum number of seconds between log flushes") diff --git a/pkg/scheduler/api/resource_info.go b/pkg/scheduler/api/resource_info.go index b575e49174..27ec0b94ce 100644 --- a/pkg/scheduler/api/resource_info.go +++ b/pkg/scheduler/api/resource_info.go @@ -22,6 +22,8 @@ import ( v1 "k8s.io/api/core/v1" v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" + + "github.com/kubernetes-sigs/kube-batch/pkg/scheduler/util/assert" ) // Resource struct defines all the resource type @@ -117,9 +119,8 @@ func (r *Resource) IsZero(rn v1.ResourceName) bool { return true } - if _, ok := r.ScalarResources[rn]; !ok { - panic("unknown resource") - } + _, found := r.ScalarResources[rn] + assert.Assertf(found, "unknown resource %s", rn) return r.ScalarResources[rn] < minMilliScalarResources } @@ -142,22 +143,19 @@ func (r *Resource) Add(rr *Resource) *Resource { //Sub subtracts two Resource objects. func (r *Resource) Sub(rr *Resource) *Resource { - if rr.LessEqual(r) { - r.MilliCPU -= rr.MilliCPU - r.Memory -= rr.Memory + assert.Assertf(rr.LessEqual(r), "resource is not sufficient to do operation: <%v> sub <%v>", r, rr) - for rrName, rrQuant := range rr.ScalarResources { - if r.ScalarResources == nil { - return r - } - r.ScalarResources[rrName] -= rrQuant - } + r.MilliCPU -= rr.MilliCPU + r.Memory -= rr.Memory - return r + for rrName, rrQuant := range rr.ScalarResources { + if r.ScalarResources == nil { + return r + } + r.ScalarResources[rrName] -= rrQuant } - panic(fmt.Errorf("resource is not sufficient to do operation: <%v> sub <%v>", - r, rr)) + return r } // SetMaxResource compares with ResourceList and takes max value for each Resource. diff --git a/pkg/scheduler/util/assert/assert.go b/pkg/scheduler/util/assert/assert.go new file mode 100644 index 0000000000..9593b00fb3 --- /dev/null +++ b/pkg/scheduler/util/assert/assert.go @@ -0,0 +1,41 @@ +package assert + +import ( + "fmt" + "os" + "runtime/debug" + + "github.com/golang/glog" +) + +const ( + EnvPanicOnError = "PANIC_ON_ERROR" +) + +var ( + panicOnError = true +) + +func init() { + env := os.Getenv(EnvPanicOnError) + if env == "false" { + panicOnError = false + } +} + +func Assert(condition bool, message string) { + if condition { + return + } + if panicOnError { + panic(message) + } + glog.Errorf("%s, %s", message, debug.Stack()) +} + +func Assertf(condition bool, format string, args ...interface{}) { + if condition { + return + } + Assert(condition, fmt.Sprintf(format, args...)) +} From f41ba10799bc4d7a340849ebfd0e530c34088570 Mon Sep 17 00:00:00 2001 From: sivanzcw Date: Fri, 31 May 2019 16:21:22 +0800 Subject: [PATCH 2/6] allow job to Inqueue if queue capability of job was not set --- pkg/scheduler/plugins/proportion/proportion.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pkg/scheduler/plugins/proportion/proportion.go b/pkg/scheduler/plugins/proportion/proportion.go index 06f4c1c4cc..1f570ce01b 100644 --- a/pkg/scheduler/plugins/proportion/proportion.go +++ b/pkg/scheduler/plugins/proportion/proportion.go @@ -220,6 +220,11 @@ func (pp *proportionPlugin) OnSessionOpen(ssn *framework.Session) { } pgResource := api.NewResource(*job.PodGroup.Spec.MinResources) + if len(queue.Queue.Spec.Capability) == 0 { + glog.V(4).Infof("Capability of queue <%s> was not set, allow job <%s/%s> to Inqueue.", + queue.Name, job.Namespace, job.Name) + return true + } // The queue resource quota limit has not reached if pgResource.Clone().Add(attr.allocated).LessEqual(api.NewResource(queue.Queue.Spec.Capability)) { return true From c941e03086c9fb4bb17f40eb5e7735777857ba61 Mon Sep 17 00:00:00 2001 From: lminzhw Date: Mon, 3 Jun 2019 14:22:46 +0800 Subject: [PATCH 3/6] add performance code to master --- cmd/kube-batch/app/server.go | 1 + 1 file changed, 1 insertion(+) diff --git a/cmd/kube-batch/app/server.go b/cmd/kube-batch/app/server.go index a2d7d493f3..600d7fd0a1 100644 --- a/cmd/kube-batch/app/server.go +++ b/cmd/kube-batch/app/server.go @@ -20,6 +20,7 @@ import ( "context" "fmt" "net/http" + _ "net/http/pprof" "os" "time" From ead014d2fad42f526ac65a9827edf09ef332b325 Mon Sep 17 00:00:00 2001 From: dingtsh1 Date: Fri, 14 Jun 2019 17:48:45 +0800 Subject: [PATCH 4/6] add pod.annotations in binding --- pkg/scheduler/cache/cache.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/scheduler/cache/cache.go b/pkg/scheduler/cache/cache.go index 474b00f5a8..5726f629be 100644 --- a/pkg/scheduler/cache/cache.go +++ b/pkg/scheduler/cache/cache.go @@ -114,7 +114,7 @@ type defaultBinder struct { //Bind will send bind request to api server func (db *defaultBinder) Bind(p *v1.Pod, hostname string) error { if err := db.kubeclient.CoreV1().Pods(p.Namespace).Bind(&v1.Binding{ - ObjectMeta: metav1.ObjectMeta{Namespace: p.Namespace, Name: p.Name, UID: p.UID}, + ObjectMeta: metav1.ObjectMeta{Namespace: p.Namespace, Name: p.Name, UID: p.UID, Annotations: p.Annotations}, Target: v1.ObjectReference{ Kind: "Node", Name: hostname, From 00fb1ceef480c95a3797e3b4c87a8de3286454b2 Mon Sep 17 00:00:00 2001 From: wangyuqing4 Date: Wed, 19 Jun 2019 14:29:59 +0800 Subject: [PATCH 5/6] change nodeInfo Other to Others --- pkg/scheduler/api/node_info.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/scheduler/api/node_info.go b/pkg/scheduler/api/node_info.go index 679818db51..77625a41ef 100644 --- a/pkg/scheduler/api/node_info.go +++ b/pkg/scheduler/api/node_info.go @@ -46,7 +46,7 @@ type NodeInfo struct { Tasks map[TaskID]*TaskInfo // Used to store custom information - Other interface{} + Others map[string]interface{} } // NodeState defines the current state of node. @@ -98,7 +98,7 @@ func (ni *NodeInfo) Clone() *NodeInfo { for _, p := range ni.Tasks { res.AddTask(p) } - res.Other = ni.Other + res.Others = ni.Others return res } From 4de758b93ecf5a8769b91a930c245f30d7327c08 Mon Sep 17 00:00:00 2001 From: lminzhw Date: Wed, 26 Jun 2019 21:10:14 +0800 Subject: [PATCH 6/6] fix golint / UT --- cmd/kube-batch/app/server.go | 1 - pkg/scheduler/util/assert/assert.go | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/cmd/kube-batch/app/server.go b/cmd/kube-batch/app/server.go index 600d7fd0a1..a2d7d493f3 100644 --- a/cmd/kube-batch/app/server.go +++ b/cmd/kube-batch/app/server.go @@ -20,7 +20,6 @@ import ( "context" "fmt" "net/http" - _ "net/http/pprof" "os" "time" diff --git a/pkg/scheduler/util/assert/assert.go b/pkg/scheduler/util/assert/assert.go index 9593b00fb3..0ff84f0db0 100644 --- a/pkg/scheduler/util/assert/assert.go +++ b/pkg/scheduler/util/assert/assert.go @@ -9,6 +9,7 @@ import ( ) const ( + // EnvPanicOnError is the env name to determine panic on assertion failed or not EnvPanicOnError = "PANIC_ON_ERROR" ) @@ -23,6 +24,7 @@ func init() { } } +// Assert check condition, if condition is false, print message by log or panic func Assert(condition bool, message string) { if condition { return @@ -33,6 +35,7 @@ func Assert(condition bool, message string) { glog.Errorf("%s, %s", message, debug.Stack()) } +// Assertf check condition, if condition is false, print message using Assert func Assertf(condition bool, format string, args ...interface{}) { if condition { return