mirror of
https://github.com/coreos/prometheus-operator.git
synced 2026-02-05 06:45:27 +01:00
fix: update status even without statefulset
The controllers aren't able to create the statefulsets if the input spec contains invalid fields (like missing secret/configmap key references). In this case, they should still update the object's status to reflect the issue to the end-users. Signed-off-by: Simon Pasquier <spasquie@redhat.com>
This commit is contained in:
@@ -28,6 +28,7 @@ import (
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/stretchr/testify/require"
|
||||
"golang.org/x/net/http2"
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
@@ -68,6 +69,70 @@ func testAMCreateDeleteCluster(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func testAlertmanagerWithStatefulsetCreationFailure(t *testing.T) {
|
||||
// Don't run Alertmanager tests in parallel. See
|
||||
// https://github.com/prometheus/alertmanager/issues/1835 for details.
|
||||
ctx := context.Background()
|
||||
testCtx := framework.NewTestCtx(t)
|
||||
defer testCtx.Cleanup(t)
|
||||
|
||||
ns := framework.CreateNamespace(context.Background(), t, testCtx)
|
||||
framework.SetupPrometheusRBAC(context.Background(), t, testCtx, ns)
|
||||
|
||||
a := framework.MakeBasicAlertmanager(ns, "test", 1)
|
||||
// Invalid spec which prevents the creation of the statefulset
|
||||
a.Spec.Web = &monitoringv1.AlertmanagerWebSpec{
|
||||
WebConfigFileFields: monitoringv1.WebConfigFileFields{
|
||||
TLSConfig: &monitoringv1.WebTLSConfig{
|
||||
Cert: monitoringv1.SecretOrConfigMap{
|
||||
ConfigMap: &v1.ConfigMapKeySelector{},
|
||||
Secret: &v1.SecretKeySelector{
|
||||
LocalObjectReference: v1.LocalObjectReference{
|
||||
Name: "tls-cert",
|
||||
},
|
||||
Key: "tls.crt",
|
||||
},
|
||||
},
|
||||
KeySecret: v1.SecretKeySelector{
|
||||
LocalObjectReference: v1.LocalObjectReference{
|
||||
Name: "tls-cert",
|
||||
},
|
||||
Key: "tls.key",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
_, err := framework.MonClientV1.Alertmanagers(a.Namespace).Create(ctx, a, metav1.CreateOptions{})
|
||||
require.NoError(t, err)
|
||||
|
||||
var loopError error
|
||||
err = wait.PollUntilContextTimeout(ctx, time.Second, framework.DefaultTimeout, true, func(ctx context.Context) (bool, error) {
|
||||
current, err := framework.MonClientV1.Alertmanagers(ns).Get(ctx, "test", metav1.GetOptions{})
|
||||
if err != nil {
|
||||
loopError = fmt.Errorf("failed to get object: %w", err)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
if err := framework.AssertCondition(current.Status.Conditions, monitoringv1.Reconciled, monitoringv1.ConditionFalse); err != nil {
|
||||
loopError = err
|
||||
return false, nil
|
||||
}
|
||||
|
||||
if err := framework.AssertCondition(current.Status.Conditions, monitoringv1.Available, monitoringv1.ConditionFalse); err != nil {
|
||||
loopError = err
|
||||
return false, nil
|
||||
}
|
||||
|
||||
return true, nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("%v: %v", err, loopError)
|
||||
}
|
||||
|
||||
require.NoError(t, framework.DeleteAlertmanagerAndWaitUntilGone(context.Background(), ns, "test"))
|
||||
}
|
||||
|
||||
func testAMScaling(t *testing.T) {
|
||||
// Don't run Alertmanager tests in parallel. See
|
||||
// https://github.com/prometheus/alertmanager/issues/1835 for details.
|
||||
|
||||
@@ -234,6 +234,7 @@ func testAllNSAlertmanager(t *testing.T) {
|
||||
testFuncs := map[string]func(t *testing.T){
|
||||
"AlertmanagerCRD": testAlertmanagerCRDValidation,
|
||||
"AMCreateDeleteCluster": testAMCreateDeleteCluster,
|
||||
"AMWithStatefulsetCreationFailure": testAlertmanagerWithStatefulsetCreationFailure,
|
||||
"AMScaling": testAMScaling,
|
||||
"AMVersionMigration": testAMVersionMigration,
|
||||
"AMStorageUpdate": testAMStorageUpdate,
|
||||
@@ -311,6 +312,7 @@ func testAllNSPrometheus(t *testing.T) {
|
||||
"PrometheusAgentAndServerNameColision": testAgentAndServerNameColision,
|
||||
"ScrapeConfigKubeNode": testScrapeConfigKubernetesNodeRole,
|
||||
"ScrapeConfigDNSSD": testScrapeConfigDNSSDConfig,
|
||||
"PrometheusWithStatefulsetCreationFailure": testPrometheusWithStatefulsetCreationFailure,
|
||||
}
|
||||
|
||||
for name, f := range testFuncs {
|
||||
@@ -322,6 +324,7 @@ func testAllNSThanosRuler(t *testing.T) {
|
||||
skipThanosRulerTests(t)
|
||||
testFuncs := map[string]func(t *testing.T){
|
||||
"ThanosRulerCreateDeleteCluster": testThanosRulerCreateDeleteCluster,
|
||||
"ThanosRulerWithStatefulsetCreationFailure": testThanosRulerWithStatefulsetCreationFailure,
|
||||
"ThanosRulerPrometheusRuleInDifferentNamespace": testThanosRulerPrometheusRuleInDifferentNamespace,
|
||||
"ThanosRulerPreserveUserAddedMetadata": testTRPreserveUserAddedMetadata,
|
||||
"ThanosRulerMinReadySeconds": testTRMinReadySeconds,
|
||||
|
||||
@@ -4935,6 +4935,68 @@ func testPromStrategicMergePatch(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func testPrometheusWithStatefulsetCreationFailure(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
testCtx := framework.NewTestCtx(t)
|
||||
defer testCtx.Cleanup(t)
|
||||
|
||||
ns := framework.CreateNamespace(context.Background(), t, testCtx)
|
||||
framework.SetupPrometheusRBAC(context.Background(), t, testCtx, ns)
|
||||
|
||||
p := framework.MakeBasicPrometheus(ns, "test", "", 1)
|
||||
// Invalid spec which prevents the creation of the statefulset
|
||||
p.Spec.Web = &monitoringv1.PrometheusWebSpec{
|
||||
WebConfigFileFields: monitoringv1.WebConfigFileFields{
|
||||
TLSConfig: &monitoringv1.WebTLSConfig{
|
||||
Cert: monitoringv1.SecretOrConfigMap{
|
||||
ConfigMap: &v1.ConfigMapKeySelector{},
|
||||
Secret: &v1.SecretKeySelector{
|
||||
LocalObjectReference: v1.LocalObjectReference{
|
||||
Name: "tls-cert",
|
||||
},
|
||||
Key: "tls.crt",
|
||||
},
|
||||
},
|
||||
KeySecret: v1.SecretKeySelector{
|
||||
LocalObjectReference: v1.LocalObjectReference{
|
||||
Name: "tls-cert",
|
||||
},
|
||||
Key: "tls.key",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
_, err := framework.MonClientV1.Prometheuses(p.Namespace).Create(ctx, p, metav1.CreateOptions{})
|
||||
require.NoError(t, err)
|
||||
|
||||
var loopError error
|
||||
err = wait.PollUntilContextTimeout(ctx, time.Second, framework.DefaultTimeout, true, func(ctx context.Context) (bool, error) {
|
||||
current, err := framework.MonClientV1.Prometheuses(ns).Get(ctx, "test", metav1.GetOptions{})
|
||||
if err != nil {
|
||||
loopError = fmt.Errorf("failed to get object: %w", err)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
if err := framework.AssertCondition(current.Status.Conditions, monitoringv1.Reconciled, monitoringv1.ConditionFalse); err != nil {
|
||||
loopError = err
|
||||
return false, nil
|
||||
}
|
||||
|
||||
if err := framework.AssertCondition(current.Status.Conditions, monitoringv1.Available, monitoringv1.ConditionFalse); err != nil {
|
||||
loopError = err
|
||||
return false, nil
|
||||
}
|
||||
|
||||
return true, nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("%v: %v", err, loopError)
|
||||
}
|
||||
|
||||
require.NoError(t, framework.DeletePrometheusAndWaitUntilGone(context.Background(), ns, "test"))
|
||||
}
|
||||
|
||||
func isAlertmanagerDiscoveryWorking(ns, promSVCName, alertmanagerName string) func(ctx context.Context) (bool, error) {
|
||||
return func(ctx context.Context) (bool, error) {
|
||||
pods, err := framework.KubeClient.CoreV1().Pods(ns).List(ctx, alertmanager.ListOptions(alertmanagerName))
|
||||
|
||||
@@ -20,11 +20,13 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"google.golang.org/protobuf/proto"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
|
||||
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
|
||||
)
|
||||
|
||||
func testThanosRulerCreateDeleteCluster(t *testing.T) {
|
||||
@@ -45,6 +47,50 @@ func testThanosRulerCreateDeleteCluster(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func testThanosRulerWithStatefulsetCreationFailure(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
testCtx := framework.NewTestCtx(t)
|
||||
defer testCtx.Cleanup(t)
|
||||
|
||||
ns := framework.CreateNamespace(ctx, t, testCtx)
|
||||
framework.SetupPrometheusRBAC(ctx, t, testCtx, ns)
|
||||
|
||||
tr := framework.MakeBasicThanosRuler("test", 1, "")
|
||||
// Empty queryEndpoints and queryConfigFile prevent the controller from
|
||||
// creating the statefulset.
|
||||
tr.Spec.QueryEndpoints = []string{}
|
||||
|
||||
_, err := framework.MonClientV1.ThanosRulers(ns).Create(ctx, tr, metav1.CreateOptions{})
|
||||
require.NoError(t, err)
|
||||
|
||||
var loopError error
|
||||
err = wait.PollUntilContextTimeout(ctx, time.Second, framework.DefaultTimeout, true, func(ctx context.Context) (bool, error) {
|
||||
current, err := framework.MonClientV1.ThanosRulers(ns).Get(ctx, "test", metav1.GetOptions{})
|
||||
if err != nil {
|
||||
loopError = fmt.Errorf("failed to get object: %w", err)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
if err := framework.AssertCondition(current.Status.Conditions, monitoringv1.Reconciled, monitoringv1.ConditionFalse); err != nil {
|
||||
loopError = err
|
||||
return false, nil
|
||||
}
|
||||
|
||||
if err := framework.AssertCondition(current.Status.Conditions, monitoringv1.Available, monitoringv1.ConditionFalse); err != nil {
|
||||
loopError = err
|
||||
return false, nil
|
||||
}
|
||||
|
||||
return true, nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("%v: %v", err, loopError)
|
||||
}
|
||||
|
||||
require.NoError(t, framework.DeleteThanosRulerAndWaitUntilGone(ctx, ns, "test"))
|
||||
}
|
||||
|
||||
func testThanosRulerPrometheusRuleInDifferentNamespace(t *testing.T) {
|
||||
testCtx := framework.NewTestCtx(t)
|
||||
defer testCtx.Cleanup(t)
|
||||
@@ -278,19 +324,19 @@ func testTRAlertmanagerConfig(t *testing.T) {
|
||||
|
||||
// Create Alertmanager resource and service
|
||||
alertmanager, err := framework.CreateAlertmanagerAndWaitUntilReady(context.Background(), framework.MakeBasicAlertmanager(ns, name, 1))
|
||||
assert.NoError(t, err)
|
||||
require.NoError(t, err)
|
||||
|
||||
amSVC := framework.MakeAlertmanagerService(alertmanager.Name, group, v1.ServiceTypeClusterIP)
|
||||
_, err = framework.CreateOrUpdateServiceAndWaitUntilReady(context.Background(), ns, amSVC)
|
||||
assert.NoError(t, err)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create a Prometheus resource because Thanos ruler needs a query API.
|
||||
prometheus, err := framework.CreatePrometheusAndWaitUntilReady(context.Background(), ns, framework.MakeBasicPrometheus(ns, name, name, 1))
|
||||
assert.NoError(t, err)
|
||||
require.NoError(t, err)
|
||||
|
||||
svc := framework.MakePrometheusService(prometheus.Name, name, v1.ServiceTypeClusterIP)
|
||||
_, err = framework.CreateOrUpdateServiceAndWaitUntilReady(context.Background(), ns, svc)
|
||||
assert.NoError(t, err)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create Secret with Alermanager config,
|
||||
trAmConfigSecret := &v1.Secret{
|
||||
@@ -308,7 +354,7 @@ alertmanagers:
|
||||
},
|
||||
}
|
||||
_, err = framework.KubeClient.CoreV1().Secrets(ns).Create(context.Background(), trAmConfigSecret, metav1.CreateOptions{})
|
||||
assert.NoError(t, err)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create Thanos ruler resource and service
|
||||
thanos := framework.MakeBasicThanosRuler(name, 1, fmt.Sprintf("http://%s:%d/", svc.Name, svc.Spec.Ports[0].Port))
|
||||
@@ -321,17 +367,17 @@ alertmanagers:
|
||||
}
|
||||
|
||||
_, err = framework.CreateThanosRulerAndWaitUntilReady(context.Background(), ns, thanos)
|
||||
assert.NoError(t, err)
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = framework.CreateOrUpdateServiceAndWaitUntilReady(context.Background(), ns, framework.MakeThanosRulerService(thanos.Name, group, v1.ServiceTypeClusterIP))
|
||||
assert.NoError(t, err)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create firing rule
|
||||
_, err = framework.MakeAndCreateFiringRule(context.Background(), ns, "rule1", testAlert)
|
||||
assert.NoError(t, err)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = framework.WaitForAlertmanagerFiringAlert(context.Background(), ns, amSVC.Name, testAlert)
|
||||
assert.NoError(t, err)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
// Tests Thanos ruler query Config
|
||||
@@ -354,11 +400,11 @@ func testTRQueryConfig(t *testing.T) {
|
||||
|
||||
// Create a Prometheus resource because Thanos ruler needs a query API.
|
||||
prometheus, err := framework.CreatePrometheusAndWaitUntilReady(context.Background(), ns, framework.MakeBasicPrometheus(ns, name, name, 1))
|
||||
assert.NoError(t, err)
|
||||
require.NoError(t, err)
|
||||
|
||||
promSVC := framework.MakePrometheusService(prometheus.Name, name, v1.ServiceTypeClusterIP)
|
||||
_, err = framework.CreateOrUpdateServiceAndWaitUntilReady(context.Background(), ns, promSVC)
|
||||
assert.NoError(t, err)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create Secret with query config,
|
||||
trQueryConfSecret := &v1.Secret{
|
||||
@@ -374,7 +420,7 @@ func testTRQueryConfig(t *testing.T) {
|
||||
},
|
||||
}
|
||||
_, err = framework.KubeClient.CoreV1().Secrets(ns).Create(context.Background(), trQueryConfSecret, metav1.CreateOptions{})
|
||||
assert.NoError(t, err)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create Thanos ruler resource and service
|
||||
// setting queryEndpoint to "" as it will be ignored because we set QueryConfig
|
||||
@@ -388,15 +434,15 @@ func testTRQueryConfig(t *testing.T) {
|
||||
}
|
||||
|
||||
_, err = framework.CreateThanosRulerAndWaitUntilReady(context.Background(), ns, thanos)
|
||||
assert.NoError(t, err)
|
||||
require.NoError(t, err)
|
||||
|
||||
svc := framework.MakeThanosRulerService(thanos.Name, group, v1.ServiceTypeClusterIP)
|
||||
_, err = framework.CreateOrUpdateServiceAndWaitUntilReady(context.Background(), ns, svc)
|
||||
assert.NoError(t, err)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create firing rule
|
||||
_, err = framework.MakeAndCreateFiringRule(context.Background(), ns, "rule1", testAlert)
|
||||
assert.NoError(t, err)
|
||||
require.NoError(t, err)
|
||||
|
||||
if err := framework.WaitForThanosFiringAlert(context.Background(), ns, svc.Name, testAlert); err != nil {
|
||||
t.Fatal(err)
|
||||
|
||||
@@ -29,6 +29,7 @@ import (
|
||||
"github.com/prometheus/alertmanager/api/v2/models"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
extv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
@@ -321,7 +322,12 @@ func (f *Framework) DeleteAlertmanagerAndWaitUntilGone(ctx context.Context, ns,
|
||||
return errors.Wrap(err, fmt.Sprintf("waiting for Alertmanager tpr (%s) to vanish timed out", name))
|
||||
}
|
||||
|
||||
return f.KubeClient.CoreV1().Secrets(ns).Delete(ctx, fmt.Sprintf("alertmanager-%s", name), metav1.DeleteOptions{})
|
||||
err = f.KubeClient.CoreV1().Secrets(ns).Delete(ctx, fmt.Sprintf("alertmanager-%s", name), metav1.DeleteOptions{})
|
||||
if err != nil && !apierrors.IsNotFound(err) {
|
||||
return fmt.Errorf("failed to delete Alertmanager secret: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *Framework) WaitForAlertmanagerPodInitialized(ctx context.Context, ns, name string, amountPeers int, forceEnableClusterMode, https bool) error {
|
||||
|
||||
@@ -16,6 +16,7 @@ package framework
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
@@ -31,6 +32,22 @@ type resourceStatus struct {
|
||||
conditions []monitoringv1.Condition
|
||||
}
|
||||
|
||||
func (f *Framework) AssertCondition(conds []monitoringv1.Condition, expectedType monitoringv1.ConditionType, expectedStatus monitoringv1.ConditionStatus) error {
|
||||
for _, c := range conds {
|
||||
if c.Type != expectedType {
|
||||
continue
|
||||
}
|
||||
|
||||
if c.Status != expectedStatus {
|
||||
return fmt.Errorf("expected condition %q to be %q but got %q", c.Type, expectedStatus, c.Status)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
return fmt.Errorf("condition %q not found", expectedType)
|
||||
}
|
||||
|
||||
// WaitForResourceAvailable waits for a monitoring resource to report itself as being reconciled & available.
|
||||
// If the resource isn't available within the given timeout, it returns an error.
|
||||
func (f *Framework) WaitForResourceAvailable(ctx context.Context, getResourceStatus func(context.Context) (resourceStatus, error), timeout time.Duration) error {
|
||||
|
||||
Reference in New Issue
Block a user