Skip to content

Commit

Permalink
feat(operator): Introduce Prometheus evaluation (#183)
Browse files Browse the repository at this point in the history
Co-authored-by: RealAnna <anna.reale@dynatrace.com>
  • Loading branch information
odubajDT and RealAnna committed Oct 18, 2022
1 parent 1ce044a commit c2ab773
Show file tree
Hide file tree
Showing 7 changed files with 111 additions and 38 deletions.
5 changes: 3 additions & 2 deletions operator/api/v1alpha1/keptnevaluation_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,9 @@ type KeptnEvaluationStatus struct {
}

type EvaluationStatusItem struct {
Value string `json:"value"`
Status common.KeptnState `json:"status"`
Value string `json:"value"`
Status common.KeptnState `json:"status"`
Message string `json:"message,omitempty"`
}

//+kubebuilder:object:root=true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ spec:
evaluationStatus:
additionalProperties:
properties:
message:
type: string
status:
type: string
value:
Expand Down
2 changes: 1 addition & 1 deletion operator/config/manager/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ kind: Kustomization
images:
- name: controller
newName: docker.io/annadreal/keptn-lifecycle-operator
newTag: "202210171665998375"
newTag: "202210171665999134"
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ metadata:
spec:
source: prometheus
objectives:
- name: query-1 #string
query: "xxxx" #string: promQL query
evaluationTarget: <20 #string: can only be starting with < or >
- name: query-2
query: "yyyy"
evaluationTarget: >4
- name: prometheus
query: "sum(prometheus_engine_query_duration_seconds_count)"
evaluationTarget: ">1000" #string: can only be starting with < or >
- name: prometheus2
query: "sum(prometheus_engine_query_duration_seconds_count)"
evaluationTarget: "<1000" #string: can only be starting with < or >

124 changes: 96 additions & 28 deletions operator/controllers/keptnevaluation/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,13 @@ import (
"fmt"
"time"

"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"math"
"net/http"
"strconv"

promapi "github.com/prometheus/client_golang/api"
prometheus "github.com/prometheus/client_golang/api/prometheus/v1"
"github.com/prometheus/common/model"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/codes"
"go.opentelemetry.io/otel/propagation"
Expand All @@ -33,7 +37,9 @@ import (
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/record"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/predicate"

"github.com/go-logr/logr"
klcv1alpha1 "github.com/keptn-sandbox/lifecycle-controller/operator/api/v1alpha1"
Expand Down Expand Up @@ -96,12 +102,11 @@ func (r *KeptnEvaluationReconciler) Reconcile(ctx context.Context, req ctrl.Requ

if evaluation.Status.RetryCount >= evaluation.Spec.Retries {
r.recordEvent("Warning", evaluation, "ReconcileTimeOut", "retryCount exceeded")
err := fmt.Errorf("RetryCount for evaluation exceeded")
err := fmt.Errorf("retryCount for evaluation exceeded")
span.SetStatus(codes.Error, err.Error())
evaluation.Status.OverallStatus = common.StateFailed
r.updateFinishedEvaluationMetrics(ctx, evaluation, span)

return ctrl.Result{}, err
return ctrl.Result{}, nil
}

if !evaluation.Status.OverallStatus.IsSucceeded() {
Expand All @@ -111,10 +116,11 @@ func (r *KeptnEvaluationReconciler) Reconcile(ctx context.Context, req ctrl.Requ
}
evaluationDefinition, evaluationProvider, err := r.fetchDefinitionAndProvider(ctx, namespacedDefinition)
if err != nil {
return ctrl.Result{Requeue: true, RequeueAfter: 30 * time.Second}, nil
}

if evaluationDefinition == nil || evaluationProvider == nil {
if errors.IsNotFound(err) {
r.Log.Info(err.Error() + ", ignoring error since object must be deleted")
return ctrl.Result{Requeue: true, RequeueAfter: 30 * time.Second}, nil
}
r.Log.Error(err, "Failed to retrieve a resource")
return ctrl.Result{}, nil
}

Expand Down Expand Up @@ -213,12 +219,6 @@ func (r *KeptnEvaluationReconciler) fetchDefinitionAndProvider(ctx context.Conte
evaluationDefinition := &klcv1alpha1.KeptnEvaluationDefinition{}

if err := r.Client.Get(ctx, namespacedDefinition, evaluationDefinition); err != nil {
if errors.IsNotFound(err) {
// taking down all associated K8s resources is handled by K8s
r.Log.Info("KeptnEvaluationDefinition resource not found. Ignoring since object must be deleted")
return nil, nil, nil
}
r.Log.Error(err, "Failed to get the KeptnEvaluationDefinition")
return nil, nil, err
}

Expand All @@ -230,12 +230,6 @@ func (r *KeptnEvaluationReconciler) fetchDefinitionAndProvider(ctx context.Conte
evaluationProvider := &klcv1alpha1.KeptnEvaluationProvider{}

if err := r.Client.Get(ctx, namespacedProvider, evaluationProvider); err != nil {
if errors.IsNotFound(err) {
// taking down all associated K8s resources is handled by K8s
r.Log.Info("KeptnEvaluationProvider resource not found. Ignoring since object must be deleted")
return nil, nil, nil
}
r.Log.Error(err, "Failed to get the KeptnEvaluationProvider")
return nil, nil, err
}

Expand All @@ -245,19 +239,93 @@ func (r *KeptnEvaluationReconciler) fetchDefinitionAndProvider(ctx context.Conte
func (r *KeptnEvaluationReconciler) queryEvaluation(objective klcv1alpha1.Objective, provider klcv1alpha1.KeptnEvaluationProvider) *klcv1alpha1.EvaluationStatusItem {
query := &klcv1alpha1.EvaluationStatusItem{
Value: "",
Status: common.StateSucceeded, //setting status per default to failed
Status: common.StateFailed, //setting status per default to failed
}

queryTime := time.Now().UTC()
r.Log.Info("Running query: /api/v1/query?query=" + objective.Query + "&time=" + queryTime.String())

client, err := promapi.NewClient(promapi.Config{Address: provider.Spec.TargetServer, Client: &http.Client{}})
api := prometheus.NewAPI(client)
result, w, err := api.Query(
context.Background(),
objective.Query,
queryTime,
[]prometheus.Option{}...,
)

if err != nil {
query.Message = err.Error()
return query
}

//TODO query provider like prometheus service does, save result in value THIS SHALL BE SOLVED IN TICKET #163
// it will be something hardcoded like
// import apiv1 "github.com/prometheus/client_golang/api/prometheus/v1"
// if provider ==prometheus { result, w, err := apiv1.PrometheusAPI.Query(context.Background(), query, endUnix) if err != nil { return 0, fmt.Errorf("unable to query prometheus api: %w", err)}}
//TODO check value with evaluation target and update status in query
// result, w, err := prometheus.API().Query(context.Background(), query, time.Now())
if len(w) != 0 {
query.Message = w[0]
r.Log.Info("Prometheus API returned warnings: " + w[0])
}

// check if we can cast the result to a vector, it might be another data struct which we can't process
resultVector, ok := result.(model.Vector)
if !ok {
query.Message = "could not cast result"
return query
}

// We are only allowed to return one value, if not the query may be malformed
// we are using two different errors to give the user more information about the result
if len(resultVector) == 0 {
r.Log.Info("No values in query result")
query.Message = "No values in query result"
return query
} else if len(resultVector) > 1 {
r.Log.Info("Too many values in the query result")
query.Message = "Too many values in the query result"
return query
}

query.Value = resultVector[0].Value.String()
check, err := r.checkValue(objective, query)

if err != nil {
query.Message = err.Error()
r.Log.Error(err, "Could not check query result")
}
if check {
query.Status = common.StateSucceeded
}
return query
}

func (r *KeptnEvaluationReconciler) checkValue(objective klcv1alpha1.Objective, query *klcv1alpha1.EvaluationStatusItem) (bool, error) {

if len(query.Value) == 0 || len(objective.EvaluationTarget) == 0 {
return false, fmt.Errorf("no values")
}

eval := objective.EvaluationTarget[1:]
sign := objective.EvaluationTarget[:1]

resultValue, err := strconv.ParseFloat(query.Value, 64)
if err != nil || math.IsNaN(resultValue) {
return false, err
}

compareValue, err := strconv.ParseFloat(eval, 64)
if err != nil || math.IsNaN(compareValue) {
return false, err
}

// choose comparator
switch sign {
case ">":
return resultValue > compareValue, nil
case "<":
return resultValue < compareValue, nil
default:
return false, fmt.Errorf("invalid operator")
}
}

func (r *KeptnEvaluationReconciler) recordEvent(eventType string, evaluation *klcv1alpha1.KeptnEvaluation, shortReason string, longReason string) {
r.Recorder.Event(evaluation, eventType, shortReason, fmt.Sprintf("%s / Namespace: %s, Name: %s, WorkloadVersion: %s ", longReason, evaluation.Namespace, evaluation.Name, evaluation.Spec.WorkloadVersion))
}
2 changes: 1 addition & 1 deletion operator/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ require (
github.com/onsi/ginkgo v1.16.5
github.com/onsi/gomega v1.18.1
github.com/prometheus/client_golang v1.13.0
github.com/prometheus/common v0.37.0
github.com/stretchr/testify v1.7.1
go.opentelemetry.io/otel v1.10.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.10.0
Expand Down Expand Up @@ -67,7 +68,6 @@ require (
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_model v0.2.0 // indirect
github.com/prometheus/common v0.37.0 // indirect
github.com/prometheus/procfs v0.8.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.10.0 // indirect
Expand Down
2 changes: 2 additions & 0 deletions operator/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,7 @@ github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22
github.com/jonboulle/clockwork v0.2.2/go.mod h1:Pkfl5aHPm1nk2H9h0bjmnJD/BcgbGXUBGnn1kMkgxc8=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA=
github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
Expand Down Expand Up @@ -376,6 +377,7 @@ github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8m
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
Expand Down

0 comments on commit c2ab773

Please sign in to comment.