From a036337c9bfeb50dcf8da6ec31fa86e81d622011 Mon Sep 17 00:00:00 2001 From: Max Leonard Inden Date: Wed, 11 Apr 2018 11:38:46 +0200 Subject: [PATCH] crd: Enrich Prometheus operator CRD registration error handling If a user: 1. deploys the Prometheus operator without CRD validation 2. creates an invalid CRD instance (e.g. Servicemonitor) 3. restarts the Prometheus operator it will crash-loop with a difficult to understand error message, failing to parse the invalid CRD instance. This patch improves the error feedback to the user, to identify the invalid CRD more easily. The Prometheus operator will keep crash-looping to enforce a valid CRD state on startup. --- pkg/alertmanager/operator.go | 5 ++++- pkg/k8sutil/k8sutil.go | 4 ++-- pkg/prometheus/operator.go | 21 ++++++++++++++++----- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/pkg/alertmanager/operator.go b/pkg/alertmanager/operator.go index 266daf4a2..552ca6cdf 100644 --- a/pkg/alertmanager/operator.go +++ b/pkg/alertmanager/operator.go @@ -554,5 +554,8 @@ func (c *Operator) createCRDs() error { } // We have to wait for the CRDs to be ready. Otherwise the initial watch may fail. - return k8sutil.WaitForCRDReady(c.mclient.MonitoringV1().Alertmanagers(c.config.Namespace).List) + return errors.Wrap( + k8sutil.WaitForCRDReady(c.mclient.MonitoringV1().Alertmanagers(c.config.Namespace).List), + "waiting for Alertmanager crd failed", + ) } diff --git a/pkg/k8sutil/k8sutil.go b/pkg/k8sutil/k8sutil.go index 4e51b0072..9829eb7f6 100644 --- a/pkg/k8sutil/k8sutil.go +++ b/pkg/k8sutil/k8sutil.go @@ -41,7 +41,7 @@ var CustomResourceDefinitionTypeMeta metav1.TypeMeta = metav1.TypeMeta{ APIVersion: "apiextensions.k8s.io/v1beta1", } -// WaitForCRDReady waits for a third party resource to be available for use. +// WaitForCRDReady waits for a custom resource definition to be available for use. func WaitForCRDReady(listFunc func(opts metav1.ListOptions) (runtime.Object, error)) error { err := wait.Poll(3*time.Second, 10*time.Minute, func() (bool, error) { _, err := listFunc(metav1.ListOptions{}) @@ -51,7 +51,7 @@ func WaitForCRDReady(listFunc func(opts metav1.ListOptions) (runtime.Object, err return false, nil } } - return false, err + return false, errors.Wrap(err, "failed to list CRD") } return true, nil }) diff --git a/pkg/prometheus/operator.go b/pkg/prometheus/operator.go index 6c643fef9..27eee2bf2 100644 --- a/pkg/prometheus/operator.go +++ b/pkg/prometheus/operator.go @@ -37,6 +37,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" @@ -1079,10 +1080,20 @@ func (c *Operator) createCRDs() error { c.logger.Log("msg", "CRD created", "crd", crd.Spec.Names.Kind) } - // We have to wait for the CRDs to be ready. Otherwise the initial watch may fail. - err := k8sutil.WaitForCRDReady(c.mclient.MonitoringV1().Prometheuses(c.config.Namespace).List) - if err != nil { - return err + crdListFuncs := []struct { + name string + listFunc func(opts metav1.ListOptions) (runtime.Object, error) + }{ + {"Prometheus", c.mclient.MonitoringV1().Prometheuses(c.config.Namespace).List}, + {"ServiceMonitor", c.mclient.MonitoringV1().ServiceMonitors(c.config.Namespace).List}, } - return k8sutil.WaitForCRDReady(c.mclient.MonitoringV1().ServiceMonitors(c.config.Namespace).List) + + for _, crdListFunc := range crdListFuncs { + err := k8sutil.WaitForCRDReady(crdListFunc.listFunc) + if err != nil { + return errors.Wrapf(err, "waiting for %v crd failed", crdListFunc.name) + } + } + + return nil }