1
0
mirror of https://github.com/coreos/prometheus-operator.git synced 2026-02-05 06:45:27 +01:00
Files
prometheus-operator/test/framework/alertmanager.go
Hélia Barroso 69076602c7 feat: add spell check workflow (#7909)
* feat: add spell check workflow

Signed-off-by: Hélia Barroso <helia_barroso@hotmail.com>

* fix: test names

Signed-off-by: Hélia Barroso <helia_barroso@hotmail.com>

---------

Signed-off-by: Hélia Barroso <helia_barroso@hotmail.com>
2025-09-22 14:29:59 +02:00

588 lines
17 KiB
Go

// Copyright 2016 The prometheus-operator Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package framework
import (
"bytes"
"context"
"encoding/json"
"fmt"
"os"
"strconv"
"strings"
"time"
"github.com/prometheus/alertmanager/api/v2/client/silence"
"github.com/prometheus/alertmanager/api/v2/models"
v1 "k8s.io/api/core/v1"
extv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apimachinery/pkg/util/yaml"
"k8s.io/utils/ptr"
"github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
monitoringv1alpha1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1alpha1"
"github.com/prometheus-operator/prometheus-operator/pkg/operator"
)
var ValidAlertmanagerConfig = `global:
resolve_timeout: 5m
route:
group_by: ['job']
group_wait: 30s
group_interval: 5m
repeat_interval: 12h
receiver: 'webhook'
receivers:
- name: 'webhook'
webhook_configs:
- url: 'http://alertmanagerwh:30500/'
`
func (f *Framework) MakeBasicAlertmanager(ns, name string, replicas int32) *monitoringv1.Alertmanager {
return &monitoringv1.Alertmanager{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: ns,
Annotations: map[string]string{},
},
Spec: monitoringv1.AlertmanagerSpec{
Replicas: &replicas,
LogLevel: "debug",
},
}
}
func (f *Framework) CreateAlertmanagerConfig(ctx context.Context, ns, name string) (*monitoringv1alpha1.AlertmanagerConfig, error) {
subRoute := monitoringv1alpha1.Route{
Receiver: "null",
Matchers: []monitoringv1alpha1.Matcher{
{
Name: "mykey",
Value: "myvalue-1",
Regex: false,
},
},
}
subRouteJSON, err := json.Marshal(subRoute)
if err != nil {
return nil, fmt.Errorf("failed to marshal subroute: %w", err)
}
amConfig := &monitoringv1alpha1.AlertmanagerConfig{
ObjectMeta: metav1.ObjectMeta{
Namespace: ns,
Name: name,
},
Spec: monitoringv1alpha1.AlertmanagerConfigSpec{
InhibitRules: []monitoringv1alpha1.InhibitRule{
{
SourceMatch: []monitoringv1alpha1.Matcher{
{
Name: "mykey",
Value: "myvalue-1",
Regex: false,
},
},
TargetMatch: []monitoringv1alpha1.Matcher{
{
Name: "mykey",
Value: "myvalue-2",
Regex: false,
},
},
Equal: []string{"equalkey"},
},
},
Receivers: []monitoringv1alpha1.Receiver{
{
Name: "null",
},
},
Route: &monitoringv1alpha1.Route{
Receiver: "null",
Routes: []extv1.JSON{
{
Raw: subRouteJSON,
},
},
},
},
}
return f.MonClientV1alpha1.AlertmanagerConfigs(ns).Create(ctx, amConfig, metav1.CreateOptions{})
}
func (f *Framework) MakeAlertmanagerService(name, group string, serviceType v1.ServiceType) *v1.Service {
service := &v1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("alertmanager-%s", name),
Labels: map[string]string{
"group": group,
},
},
Spec: v1.ServiceSpec{
Type: serviceType,
Ports: []v1.ServicePort{
{
Name: "web",
Port: 9093,
TargetPort: intstr.FromString("web"),
},
},
Selector: map[string]string{
"alertmanager": name,
},
},
}
return service
}
func (f *Framework) SecretFromYaml(filepath string) (*v1.Secret, error) {
manifest, err := os.Open(filepath)
if err != nil {
return nil, err
}
s := v1.Secret{}
err = yaml.NewYAMLOrJSONDecoder(manifest, 100).Decode(&s)
if err != nil {
return nil, err
}
return &s, nil
}
func (f *Framework) AlertmanagerConfigSecret(ns, name string) (*v1.Secret, error) {
s, err := f.SecretFromYaml("../../test/framework/resources/alertmanager-main-secret.yaml")
if err != nil {
return nil, err
}
s.Name = name
s.Namespace = ns
return s, nil
}
func (f *Framework) CreateAlertmanagerAndWaitUntilReady(ctx context.Context, a *monitoringv1.Alertmanager) (*monitoringv1.Alertmanager, error) {
amConfigSecretName := fmt.Sprintf("alertmanager-%s", a.Name)
s, err := f.AlertmanagerConfigSecret(a.Namespace, amConfigSecretName)
if err != nil {
return nil, fmt.Errorf("making alertmanager config secret %v failed: %w", amConfigSecretName, err)
}
_, err = f.KubeClient.CoreV1().Secrets(a.Namespace).Create(ctx, s, metav1.CreateOptions{})
if err != nil {
return nil, fmt.Errorf("creating alertmanager config secret %v failed: %w", s.Name, err)
}
a, err = f.MonClientV1.Alertmanagers(a.Namespace).Create(ctx, a, metav1.CreateOptions{})
if err != nil {
return nil, fmt.Errorf("creating alertmanager %v failed: %w", a.Name, err)
}
a, err = f.WaitForAlertmanagerReady(ctx, a)
if err != nil {
return nil, err
}
return a, nil
}
// WaitForAlertmanagerReady waits for each individual pod as well as the
// cluster as a whole to be ready.
func (f *Framework) WaitForAlertmanagerReady(ctx context.Context, a *monitoringv1.Alertmanager) (*monitoringv1.Alertmanager, error) {
replicas := int(*a.Spec.Replicas)
var current *monitoringv1.Alertmanager
var getErr error
if err := f.WaitForResourceAvailable(
ctx,
func(context.Context) (resourceStatus, error) {
current, getErr = f.MonClientV1.Alertmanagers(a.Namespace).Get(ctx, a.Name, metav1.GetOptions{})
if getErr != nil {
return resourceStatus{}, getErr
}
return resourceStatus{
expectedReplicas: int32(replicas),
generation: current.Generation,
replicas: current.Status.UpdatedReplicas,
conditions: current.Status.Conditions,
}, nil
},
5*time.Minute,
); err != nil {
return nil, fmt.Errorf("alertmanager %v/%v failed to become available: %w", a.Namespace, a.Name, err)
}
// Check that all pods report the expected number of peers.
isAMHTTPS := a.Spec.Web != nil && a.Spec.Web.TLSConfig != nil
for i := range replicas {
name := fmt.Sprintf("alertmanager-%v-%v", a.Name, strconv.Itoa(i))
if err := f.WaitForAlertmanagerPodInitialized(ctx, a.Namespace, name, replicas, a.Spec.ForceEnableClusterMode, isAMHTTPS); err != nil {
return nil, fmt.Errorf(
"failed to wait for an Alertmanager cluster (%s) with %d instances to become ready: %w",
name, replicas, err,
)
}
}
return current, nil
}
func (f *Framework) PatchAlertmanagerAndWaitUntilReady(ctx context.Context, name, ns string, spec monitoringv1.AlertmanagerSpec) (*monitoringv1.Alertmanager, error) {
a, err := f.PatchAlertmanager(ctx, name, ns, spec)
if err != nil {
return nil, fmt.Errorf("failed to patch Alertmanager %s/%s: %w", ns, name, err)
}
a, err = f.WaitForAlertmanagerReady(ctx, a)
if err != nil {
return nil, fmt.Errorf("failed to update Alertmanager: %v", err)
}
return a, nil
}
func (f *Framework) PatchAlertmanager(ctx context.Context, name, ns string, spec monitoringv1.AlertmanagerSpec) (*monitoringv1.Alertmanager, error) {
b, err := json.Marshal(
&monitoringv1.Alertmanager{
TypeMeta: metav1.TypeMeta{
Kind: monitoringv1.AlertmanagersKind,
APIVersion: schema.GroupVersion{Group: monitoring.GroupName, Version: monitoringv1.Version}.String(),
},
Spec: spec,
},
)
if err != nil {
return nil, fmt.Errorf("failed to marshal Alertmanager spec: %w", err)
}
p, err := f.MonClientV1.Alertmanagers(ns).Patch(
ctx,
name,
types.ApplyPatchType,
b,
metav1.PatchOptions{
Force: ptr.To(true),
FieldManager: "e2e-test",
},
)
if err != nil {
return nil, err
}
return p, nil
}
func (f *Framework) UpdateAlertmanagerReplicasAndWaitUntilReady(ctx context.Context, name, ns string, replicas int32) (*monitoringv1.Alertmanager, error) {
return f.PatchAlertmanagerAndWaitUntilReady(
ctx,
name,
ns,
monitoringv1.AlertmanagerSpec{
Replicas: ptr.To(replicas),
},
)
}
func (f *Framework) ScaleAlertmanagerAndWaitUntilReady(ctx context.Context, name, ns string, replicas int32) (*monitoringv1.Alertmanager, error) {
aclient := f.MonClientV1.Alertmanagers(ns)
scale, err := aclient.GetScale(ctx, name, metav1.GetOptions{})
if err != nil {
return nil, fmt.Errorf("failed to get Alertmanager scale: %w", err)
}
scale.Spec.Replicas = replicas
_, err = aclient.UpdateScale(ctx, name, scale, metav1.UpdateOptions{})
if err != nil {
return nil, fmt.Errorf("failed to update Alertmanager scale: %w", err)
}
a, err := aclient.Get(ctx, name, metav1.GetOptions{})
if err != nil {
return nil, fmt.Errorf("failed to get Alertmanager: %w", err)
}
a, err = f.WaitForAlertmanagerReady(ctx, a)
if err != nil {
return nil, err
}
return a, nil
}
func (f *Framework) DeleteAlertmanagerAndWaitUntilGone(ctx context.Context, ns, name string) error {
_, err := f.MonClientV1.Alertmanagers(ns).Get(ctx, name, metav1.GetOptions{})
if err != nil {
return fmt.Errorf("requesting Alertmanager tpr %v failed: %w", name, err)
}
if err := f.MonClientV1.Alertmanagers(ns).Delete(ctx, name, metav1.DeleteOptions{}); err != nil {
return fmt.Errorf("deleting Alertmanager tpr %v failed: %w", name, err)
}
if err := f.WaitForPodsReady(
ctx,
ns,
f.DefaultTimeout,
0,
metav1.ListOptions{
LabelSelector: fields.SelectorFromSet(fields.Set(map[string]string{
operator.ApplicationNameLabelKey: "alertmanager",
"alertmanager": name,
})).String(),
}); err != nil {
return fmt.Errorf("waiting for Alertmanager tpr (%s) to vanish timed out: %w", name, err)
}
err = f.KubeClient.CoreV1().Secrets(ns).Delete(ctx, fmt.Sprintf("alertmanager-%s", name), metav1.DeleteOptions{})
if err != nil && !apierrors.IsNotFound(err) {
return fmt.Errorf("failed to delete Alertmanager secret: %w", err)
}
return nil
}
func (f *Framework) WaitForAlertmanagerPodInitialized(ctx context.Context, ns, name string, amountPeers int, forceEnableClusterMode, https bool) error {
var pollError error
err := wait.PollUntilContextTimeout(ctx, time.Second, time.Minute*5, false, func(ctx context.Context) (bool, error) {
amStatus, err := f.GetAlertmanagerPodStatus(ctx, ns, name, https)
if err != nil {
pollError = fmt.Errorf("failed to query Alertmanager: %s", err)
return false, nil
}
isAlertmanagerInClusterMode := amountPeers > 1 || forceEnableClusterMode
if !isAlertmanagerInClusterMode {
return true, nil
}
if amStatus.Cluster == nil {
pollError = fmt.Errorf("do not have a cluster status")
return false, nil
}
if *amStatus.Cluster.Status != "ready" {
pollError = fmt.Errorf("failed to get cluster status, expected ready, got %s", *amStatus.Cluster.Status)
return false, nil
}
if len(amStatus.Cluster.Peers) != amountPeers {
var addrs = make([]string, len(amStatus.Cluster.Peers))
for i := range amStatus.Cluster.Peers {
addrs[i] = *amStatus.Cluster.Peers[i].Name
}
pollError = fmt.Errorf("failed to get correct amount of peers, expected %d, got %d, addresses %v", amountPeers, len(amStatus.Cluster.Peers), addrs)
return false, nil
}
return true, nil
})
if err != nil {
return fmt.Errorf("failed to wait for initialized alertmanager cluster: %v: %v", err, pollError)
}
return nil
}
func (f *Framework) GetAlertmanagerPodStatus(ctx context.Context, ns, pod string, https bool) (models.AlertmanagerStatus, error) {
var amStatus models.AlertmanagerStatus
var scheme string
if https {
scheme = "https"
}
b, err := f.ProxyGetPod(ctx, scheme, ns, pod, "/api/v2/status")
if err != nil {
return amStatus, err
}
if err := json.Unmarshal(b, &amStatus); err != nil {
return amStatus, err
}
return amStatus, nil
}
func (f *Framework) CreateSilence(ctx context.Context, ns, n string) (string, error) {
var createSilenceResponse silence.PostSilencesOKBody
request := f.ProxyPostPod(
ns, n,
"/api/v2/silences",
`{"createdBy":"Max Mustermann","comment":"1234","startsAt":"2030-04-09T09:16:15.114Z","endsAt":"2031-04-09T11:16:15.114Z","matchers":[{"name":"test","value":"123","isRegex":false}]}`,
)
resp, err := request.DoRaw(ctx)
if err != nil {
return "", err
}
if err := json.Unmarshal(resp, &createSilenceResponse); err != nil {
return "", err
}
return createSilenceResponse.SilenceID, nil
}
// SendAlertToAlertmanager sends an alert to the alertmanager in the given
// namespace (ns) with the given name (n).
func (f *Framework) SendAlertToAlertmanager(ctx context.Context, ns, n string) error {
alerts := models.PostableAlerts{{
Alert: models.Alert{
GeneratorURL: "http://prometheus-test-0:9090/graph?g0.expr=vector%281%29\u0026g0.tab=1",
Labels: map[string]string{
"alertname": "ExampleAlert", "prometheus": "my-prometheus",
},
},
}}
b, err := json.Marshal(alerts)
if err != nil {
return err
}
request := f.ProxyPostPod(ns, n, "api/v2/alerts", string(b))
_, err = request.DoRaw(ctx)
if err != nil {
return err
}
return nil
}
func (f *Framework) GetSilences(ctx context.Context, ns, pod string) (models.GettableSilences, error) {
var getSilencesResponse models.GettableSilences
b, err := f.ProxyGetPod(ctx, "", ns, pod, "/api/v2/silences")
if err != nil {
return getSilencesResponse, err
}
if err := json.Unmarshal(b, &getSilencesResponse); err != nil {
return getSilencesResponse, err
}
return getSilencesResponse, nil
}
func (f *Framework) WaitForAlertmanagerFiringAlert(ctx context.Context, ns, svcName, alertName string) error {
err := wait.PollUntilContextTimeout(ctx, 10*time.Second, time.Minute*5, false, func(ctx context.Context) (bool, error) {
alerts := models.GettableAlerts{}
resp, err := f.AlertmanagerSVCGetRequest(ctx, ns, svcName, "/api/v2/alerts", map[string]string{
"state": "active",
"filter": "alertname=" + alertName,
})
if err != nil {
return false, err
}
if err := json.NewDecoder(bytes.NewBuffer(resp)).Decode(&alerts); err != nil {
return false, fmt.Errorf("failed to decode alerts from Alertmanager API: %w", err)
}
if len(alerts) != 1 {
return false, nil
}
for _, alert := range alerts {
if alert.Labels["alertname"] == alertName && alert.Status.State != ptr.To("firing") {
return true, nil
}
}
return false, nil
})
if err != nil {
return fmt.Errorf("failed to wait for alert %s to fire: %v", alertName, err)
}
return nil
}
func (f *Framework) AlertmanagerSVCGetRequest(ctx context.Context, ns, svcName, endpoint string, query map[string]string) ([]byte, error) {
ProxyGet := f.KubeClient.CoreV1().Services(ns).ProxyGet
request := ProxyGet("", svcName, "web", endpoint, query)
return request.DoRaw(ctx)
}
// PollAlertmanagerConfiguration retrieves the Alertmanager configuration via
// the Alertmanager's API and checks that all conditions return without error.
// It will retry every 10 second for 5 minutes before giving up.
func (f *Framework) PollAlertmanagerConfiguration(ctx context.Context, ns, amName string, conditions ...func(config string) error) error {
var pollError error
err := wait.PollUntilContextTimeout(ctx, 10*time.Second, time.Minute*5, false, func(ctx context.Context) (bool, error) {
amStatus, err := f.GetAlertmanagerPodStatus(ctx, ns, "alertmanager-"+amName+"-0", false)
if err != nil {
pollError = fmt.Errorf("failed to query Alertmanager: %s", err)
return false, nil
}
for _, c := range conditions {
pollError = c(*amStatus.Config.Original)
if pollError != nil {
return false, nil
}
}
return true, nil
})
if err != nil {
return fmt.Errorf("failed to wait for alertmanager config: %v: %v", err, pollError)
}
return nil
}
func (f *Framework) WaitForAlertmanagerConfigToContainString(ctx context.Context, ns, amName, expected string) error {
return f.PollAlertmanagerConfiguration(ctx, ns, amName, func(config string) error {
if !strings.Contains(config, expected) {
return fmt.Errorf("failed to get matching config expected %q but got %q", expected, config)
}
return nil
})
}
func (f *Framework) WaitForAlertmanagerConfigToBeReloaded(ctx context.Context, ns, amName string, previousReloadTimestamp time.Time) error {
const configReloadMetricName = "alertmanager_config_last_reload_success_timestamp_seconds"
err := wait.PollUntilContextTimeout(ctx, 10*time.Second, time.Minute*5, false, func(ctx context.Context) (bool, error) {
timestampSec, err := f.GetMetricValueFromPod(ctx, "", ns, "alertmanager-"+amName+"-0", "", configReloadMetricName)
if err != nil {
return false, err
}
timestamp := time.Unix(int64(timestampSec), 0)
return timestamp.After(previousReloadTimestamp), nil
})
if err != nil {
return fmt.Errorf("failed to wait for alertmanager config to have been reloaded after %v: %v", previousReloadTimestamp, err)
}
return nil
}