1
0
mirror of https://github.com/coreos/prometheus-operator.git synced 2026-02-05 06:45:27 +01:00

fix: update status even without statefulset

The controllers aren't able to create the statefulsets if the input spec
contains invalid fields (like missing secret/configmap key references).

In this case, they should still update the object's status to reflect
the issue to the end-users.

Signed-off-by: Simon Pasquier <spasquie@redhat.com>
This commit is contained in:
Simon Pasquier
2023-09-05 11:47:28 +02:00
parent a6d3bd8529
commit cd748b1483
10 changed files with 274 additions and 40 deletions

View File

@@ -28,6 +28,7 @@ import (
"github.com/google/go-cmp/cmp"
"github.com/pkg/errors"
"github.com/stretchr/testify/require"
"golang.org/x/net/http2"
appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
@@ -68,6 +69,70 @@ func testAMCreateDeleteCluster(t *testing.T) {
}
}
func testAlertmanagerWithStatefulsetCreationFailure(t *testing.T) {
// Don't run Alertmanager tests in parallel. See
// https://github.com/prometheus/alertmanager/issues/1835 for details.
ctx := context.Background()
testCtx := framework.NewTestCtx(t)
defer testCtx.Cleanup(t)
ns := framework.CreateNamespace(context.Background(), t, testCtx)
framework.SetupPrometheusRBAC(context.Background(), t, testCtx, ns)
a := framework.MakeBasicAlertmanager(ns, "test", 1)
// Invalid spec which prevents the creation of the statefulset
a.Spec.Web = &monitoringv1.AlertmanagerWebSpec{
WebConfigFileFields: monitoringv1.WebConfigFileFields{
TLSConfig: &monitoringv1.WebTLSConfig{
Cert: monitoringv1.SecretOrConfigMap{
ConfigMap: &v1.ConfigMapKeySelector{},
Secret: &v1.SecretKeySelector{
LocalObjectReference: v1.LocalObjectReference{
Name: "tls-cert",
},
Key: "tls.crt",
},
},
KeySecret: v1.SecretKeySelector{
LocalObjectReference: v1.LocalObjectReference{
Name: "tls-cert",
},
Key: "tls.key",
},
},
},
}
_, err := framework.MonClientV1.Alertmanagers(a.Namespace).Create(ctx, a, metav1.CreateOptions{})
require.NoError(t, err)
var loopError error
err = wait.PollUntilContextTimeout(ctx, time.Second, framework.DefaultTimeout, true, func(ctx context.Context) (bool, error) {
current, err := framework.MonClientV1.Alertmanagers(ns).Get(ctx, "test", metav1.GetOptions{})
if err != nil {
loopError = fmt.Errorf("failed to get object: %w", err)
return false, nil
}
if err := framework.AssertCondition(current.Status.Conditions, monitoringv1.Reconciled, monitoringv1.ConditionFalse); err != nil {
loopError = err
return false, nil
}
if err := framework.AssertCondition(current.Status.Conditions, monitoringv1.Available, monitoringv1.ConditionFalse); err != nil {
loopError = err
return false, nil
}
return true, nil
})
if err != nil {
t.Fatalf("%v: %v", err, loopError)
}
require.NoError(t, framework.DeleteAlertmanagerAndWaitUntilGone(context.Background(), ns, "test"))
}
func testAMScaling(t *testing.T) {
// Don't run Alertmanager tests in parallel. See
// https://github.com/prometheus/alertmanager/issues/1835 for details.

View File

@@ -234,6 +234,7 @@ func testAllNSAlertmanager(t *testing.T) {
testFuncs := map[string]func(t *testing.T){
"AlertmanagerCRD": testAlertmanagerCRDValidation,
"AMCreateDeleteCluster": testAMCreateDeleteCluster,
"AMWithStatefulsetCreationFailure": testAlertmanagerWithStatefulsetCreationFailure,
"AMScaling": testAMScaling,
"AMVersionMigration": testAMVersionMigration,
"AMStorageUpdate": testAMStorageUpdate,
@@ -311,6 +312,7 @@ func testAllNSPrometheus(t *testing.T) {
"PrometheusAgentAndServerNameColision": testAgentAndServerNameColision,
"ScrapeConfigKubeNode": testScrapeConfigKubernetesNodeRole,
"ScrapeConfigDNSSD": testScrapeConfigDNSSDConfig,
"PrometheusWithStatefulsetCreationFailure": testPrometheusWithStatefulsetCreationFailure,
}
for name, f := range testFuncs {
@@ -322,6 +324,7 @@ func testAllNSThanosRuler(t *testing.T) {
skipThanosRulerTests(t)
testFuncs := map[string]func(t *testing.T){
"ThanosRulerCreateDeleteCluster": testThanosRulerCreateDeleteCluster,
"ThanosRulerWithStatefulsetCreationFailure": testThanosRulerWithStatefulsetCreationFailure,
"ThanosRulerPrometheusRuleInDifferentNamespace": testThanosRulerPrometheusRuleInDifferentNamespace,
"ThanosRulerPreserveUserAddedMetadata": testTRPreserveUserAddedMetadata,
"ThanosRulerMinReadySeconds": testTRMinReadySeconds,

View File

@@ -4935,6 +4935,68 @@ func testPromStrategicMergePatch(t *testing.T) {
}
}
func testPrometheusWithStatefulsetCreationFailure(t *testing.T) {
ctx := context.Background()
testCtx := framework.NewTestCtx(t)
defer testCtx.Cleanup(t)
ns := framework.CreateNamespace(context.Background(), t, testCtx)
framework.SetupPrometheusRBAC(context.Background(), t, testCtx, ns)
p := framework.MakeBasicPrometheus(ns, "test", "", 1)
// Invalid spec which prevents the creation of the statefulset
p.Spec.Web = &monitoringv1.PrometheusWebSpec{
WebConfigFileFields: monitoringv1.WebConfigFileFields{
TLSConfig: &monitoringv1.WebTLSConfig{
Cert: monitoringv1.SecretOrConfigMap{
ConfigMap: &v1.ConfigMapKeySelector{},
Secret: &v1.SecretKeySelector{
LocalObjectReference: v1.LocalObjectReference{
Name: "tls-cert",
},
Key: "tls.crt",
},
},
KeySecret: v1.SecretKeySelector{
LocalObjectReference: v1.LocalObjectReference{
Name: "tls-cert",
},
Key: "tls.key",
},
},
},
}
_, err := framework.MonClientV1.Prometheuses(p.Namespace).Create(ctx, p, metav1.CreateOptions{})
require.NoError(t, err)
var loopError error
err = wait.PollUntilContextTimeout(ctx, time.Second, framework.DefaultTimeout, true, func(ctx context.Context) (bool, error) {
current, err := framework.MonClientV1.Prometheuses(ns).Get(ctx, "test", metav1.GetOptions{})
if err != nil {
loopError = fmt.Errorf("failed to get object: %w", err)
return false, nil
}
if err := framework.AssertCondition(current.Status.Conditions, monitoringv1.Reconciled, monitoringv1.ConditionFalse); err != nil {
loopError = err
return false, nil
}
if err := framework.AssertCondition(current.Status.Conditions, monitoringv1.Available, monitoringv1.ConditionFalse); err != nil {
loopError = err
return false, nil
}
return true, nil
})
if err != nil {
t.Fatalf("%v: %v", err, loopError)
}
require.NoError(t, framework.DeletePrometheusAndWaitUntilGone(context.Background(), ns, "test"))
}
func isAlertmanagerDiscoveryWorking(ns, promSVCName, alertmanagerName string) func(ctx context.Context) (bool, error) {
return func(ctx context.Context) (bool, error) {
pods, err := framework.KubeClient.CoreV1().Pods(ns).List(ctx, alertmanager.ListOptions(alertmanagerName))

View File

@@ -20,11 +20,13 @@ import (
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"google.golang.org/protobuf/proto"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
)
func testThanosRulerCreateDeleteCluster(t *testing.T) {
@@ -45,6 +47,50 @@ func testThanosRulerCreateDeleteCluster(t *testing.T) {
}
}
func testThanosRulerWithStatefulsetCreationFailure(t *testing.T) {
ctx := context.Background()
testCtx := framework.NewTestCtx(t)
defer testCtx.Cleanup(t)
ns := framework.CreateNamespace(ctx, t, testCtx)
framework.SetupPrometheusRBAC(ctx, t, testCtx, ns)
tr := framework.MakeBasicThanosRuler("test", 1, "")
// Empty queryEndpoints and queryConfigFile prevent the controller from
// creating the statefulset.
tr.Spec.QueryEndpoints = []string{}
_, err := framework.MonClientV1.ThanosRulers(ns).Create(ctx, tr, metav1.CreateOptions{})
require.NoError(t, err)
var loopError error
err = wait.PollUntilContextTimeout(ctx, time.Second, framework.DefaultTimeout, true, func(ctx context.Context) (bool, error) {
current, err := framework.MonClientV1.ThanosRulers(ns).Get(ctx, "test", metav1.GetOptions{})
if err != nil {
loopError = fmt.Errorf("failed to get object: %w", err)
return false, nil
}
if err := framework.AssertCondition(current.Status.Conditions, monitoringv1.Reconciled, monitoringv1.ConditionFalse); err != nil {
loopError = err
return false, nil
}
if err := framework.AssertCondition(current.Status.Conditions, monitoringv1.Available, monitoringv1.ConditionFalse); err != nil {
loopError = err
return false, nil
}
return true, nil
})
if err != nil {
t.Fatalf("%v: %v", err, loopError)
}
require.NoError(t, framework.DeleteThanosRulerAndWaitUntilGone(ctx, ns, "test"))
}
func testThanosRulerPrometheusRuleInDifferentNamespace(t *testing.T) {
testCtx := framework.NewTestCtx(t)
defer testCtx.Cleanup(t)
@@ -278,19 +324,19 @@ func testTRAlertmanagerConfig(t *testing.T) {
// Create Alertmanager resource and service
alertmanager, err := framework.CreateAlertmanagerAndWaitUntilReady(context.Background(), framework.MakeBasicAlertmanager(ns, name, 1))
assert.NoError(t, err)
require.NoError(t, err)
amSVC := framework.MakeAlertmanagerService(alertmanager.Name, group, v1.ServiceTypeClusterIP)
_, err = framework.CreateOrUpdateServiceAndWaitUntilReady(context.Background(), ns, amSVC)
assert.NoError(t, err)
require.NoError(t, err)
// Create a Prometheus resource because Thanos ruler needs a query API.
prometheus, err := framework.CreatePrometheusAndWaitUntilReady(context.Background(), ns, framework.MakeBasicPrometheus(ns, name, name, 1))
assert.NoError(t, err)
require.NoError(t, err)
svc := framework.MakePrometheusService(prometheus.Name, name, v1.ServiceTypeClusterIP)
_, err = framework.CreateOrUpdateServiceAndWaitUntilReady(context.Background(), ns, svc)
assert.NoError(t, err)
require.NoError(t, err)
// Create Secret with Alermanager config,
trAmConfigSecret := &v1.Secret{
@@ -308,7 +354,7 @@ alertmanagers:
},
}
_, err = framework.KubeClient.CoreV1().Secrets(ns).Create(context.Background(), trAmConfigSecret, metav1.CreateOptions{})
assert.NoError(t, err)
require.NoError(t, err)
// Create Thanos ruler resource and service
thanos := framework.MakeBasicThanosRuler(name, 1, fmt.Sprintf("http://%s:%d/", svc.Name, svc.Spec.Ports[0].Port))
@@ -321,17 +367,17 @@ alertmanagers:
}
_, err = framework.CreateThanosRulerAndWaitUntilReady(context.Background(), ns, thanos)
assert.NoError(t, err)
require.NoError(t, err)
_, err = framework.CreateOrUpdateServiceAndWaitUntilReady(context.Background(), ns, framework.MakeThanosRulerService(thanos.Name, group, v1.ServiceTypeClusterIP))
assert.NoError(t, err)
require.NoError(t, err)
// Create firing rule
_, err = framework.MakeAndCreateFiringRule(context.Background(), ns, "rule1", testAlert)
assert.NoError(t, err)
require.NoError(t, err)
err = framework.WaitForAlertmanagerFiringAlert(context.Background(), ns, amSVC.Name, testAlert)
assert.NoError(t, err)
require.NoError(t, err)
}
// Tests Thanos ruler query Config
@@ -354,11 +400,11 @@ func testTRQueryConfig(t *testing.T) {
// Create a Prometheus resource because Thanos ruler needs a query API.
prometheus, err := framework.CreatePrometheusAndWaitUntilReady(context.Background(), ns, framework.MakeBasicPrometheus(ns, name, name, 1))
assert.NoError(t, err)
require.NoError(t, err)
promSVC := framework.MakePrometheusService(prometheus.Name, name, v1.ServiceTypeClusterIP)
_, err = framework.CreateOrUpdateServiceAndWaitUntilReady(context.Background(), ns, promSVC)
assert.NoError(t, err)
require.NoError(t, err)
// Create Secret with query config,
trQueryConfSecret := &v1.Secret{
@@ -374,7 +420,7 @@ func testTRQueryConfig(t *testing.T) {
},
}
_, err = framework.KubeClient.CoreV1().Secrets(ns).Create(context.Background(), trQueryConfSecret, metav1.CreateOptions{})
assert.NoError(t, err)
require.NoError(t, err)
// Create Thanos ruler resource and service
// setting queryEndpoint to "" as it will be ignored because we set QueryConfig
@@ -388,15 +434,15 @@ func testTRQueryConfig(t *testing.T) {
}
_, err = framework.CreateThanosRulerAndWaitUntilReady(context.Background(), ns, thanos)
assert.NoError(t, err)
require.NoError(t, err)
svc := framework.MakeThanosRulerService(thanos.Name, group, v1.ServiceTypeClusterIP)
_, err = framework.CreateOrUpdateServiceAndWaitUntilReady(context.Background(), ns, svc)
assert.NoError(t, err)
require.NoError(t, err)
// Create firing rule
_, err = framework.MakeAndCreateFiringRule(context.Background(), ns, "rule1", testAlert)
assert.NoError(t, err)
require.NoError(t, err)
if err := framework.WaitForThanosFiringAlert(context.Background(), ns, svc.Name, testAlert); err != nil {
t.Fatal(err)

View File

@@ -29,6 +29,7 @@ import (
"github.com/prometheus/alertmanager/api/v2/models"
v1 "k8s.io/api/core/v1"
extv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
@@ -321,7 +322,12 @@ func (f *Framework) DeleteAlertmanagerAndWaitUntilGone(ctx context.Context, ns,
return errors.Wrap(err, fmt.Sprintf("waiting for Alertmanager tpr (%s) to vanish timed out", name))
}
return f.KubeClient.CoreV1().Secrets(ns).Delete(ctx, fmt.Sprintf("alertmanager-%s", name), metav1.DeleteOptions{})
err = f.KubeClient.CoreV1().Secrets(ns).Delete(ctx, fmt.Sprintf("alertmanager-%s", name), metav1.DeleteOptions{})
if err != nil && !apierrors.IsNotFound(err) {
return fmt.Errorf("failed to delete Alertmanager secret: %w", err)
}
return nil
}
func (f *Framework) WaitForAlertmanagerPodInitialized(ctx context.Context, ns, name string, amountPeers int, forceEnableClusterMode, https bool) error {

View File

@@ -16,6 +16,7 @@ package framework
import (
"context"
"fmt"
"time"
"github.com/pkg/errors"
@@ -31,6 +32,22 @@ type resourceStatus struct {
conditions []monitoringv1.Condition
}
func (f *Framework) AssertCondition(conds []monitoringv1.Condition, expectedType monitoringv1.ConditionType, expectedStatus monitoringv1.ConditionStatus) error {
for _, c := range conds {
if c.Type != expectedType {
continue
}
if c.Status != expectedStatus {
return fmt.Errorf("expected condition %q to be %q but got %q", c.Type, expectedStatus, c.Status)
}
return nil
}
return fmt.Errorf("condition %q not found", expectedType)
}
// WaitForResourceAvailable waits for a monitoring resource to report itself as being reconciled & available.
// If the resource isn't available within the given timeout, it returns an error.
func (f *Framework) WaitForResourceAvailable(ctx context.Context, getResourceStatus func(context.Context) (resourceStatus, error), timeout time.Duration) error {