1
0
mirror of https://github.com/coreos/prometheus-operator.git synced 2026-02-05 06:45:27 +01:00

test: improve e2e test troubleshooting

This change gathers and dumps useful information when an end-to-end test
fails.

Signed-off-by: Simon Pasquier <spasquie@redhat.com>
This commit is contained in:
Simon Pasquier
2024-09-23 17:09:20 +02:00
parent 6d1d405e0b
commit 255238205b
10 changed files with 202 additions and 57 deletions

View File

@@ -330,8 +330,13 @@ func testAMClusterInitialization(t *testing.T) {
}
for i := 0; i < amClusterSize; i++ {
err := framework.PrintPodLogs(context.Background(), ns, fmt.Sprintf("alertmanager-test-%v", strconv.Itoa(i)))
require.NoError(t, err)
b := &bytes.Buffer{}
err := framework.WritePodLogs(context.Background(), b, ns, fmt.Sprintf("alertmanager-test-%v", strconv.Itoa(i)), testFramework.LogOptions{})
if err != nil {
t.Logf("failed to get logs: %v", err)
}
t.Log(b.String())
}
}()
@@ -822,10 +827,11 @@ inhibit_rules:
require.Len(t, pl.Items, 1)
podName := pl.Items[0].Name
logs, err := framework.GetLogs(context.Background(), ns, podName, "webhook-server")
b := &bytes.Buffer{}
err = framework.WritePodLogs(context.Background(), b, ns, podName, testFramework.LogOptions{Container: "webhook-server"})
require.NoError(t, err)
c := strings.Count(logs, "Alertmanager Notification Payload Received")
c := strings.Count(b.String(), "Alertmanager Notification Payload Received")
require.Equal(t, 1, c)
// We need to force a rolling update, e.g. by changing one of the command
@@ -845,10 +851,11 @@ inhibit_rules:
time.Sleep(time.Minute)
logs, err = framework.GetLogs(context.Background(), ns, podName, "webhook-server")
b.Reset()
err = framework.WritePodLogs(context.Background(), b, ns, podName, testFramework.LogOptions{Container: "webhook-server"})
require.NoError(t, err)
c = strings.Count(logs, "Alertmanager Notification Payload Received")
c = strings.Count(b.String(), "Alertmanager Notification Payload Received")
require.Equal(t, 1, c)
}

View File

@@ -1553,11 +1553,15 @@ func testPromRulesExceedingConfigMapLimit(t *testing.T) {
}
defer func() {
if t.Failed() {
if err := framework.PrintPodLogs(context.Background(), ns, "prometheus-"+p.Name+"-0"); err != nil {
t.Fatal(err)
}
if !t.Failed() {
return
}
b := &bytes.Buffer{}
if err := framework.WritePodLogs(context.Background(), b, ns, "prometheus-"+p.Name+"-0", testFramework.LogOptions{}); err != nil {
t.Logf("failed to get logs: %v", err)
}
t.Log(b.String())
}()
pSVC := framework.MakePrometheusService(p.Name, "not-relevant", v1.ServiceTypeClusterIP)

View File

@@ -15,16 +15,24 @@
package framework
import (
"bytes"
"context"
"fmt"
"io"
"strconv"
"strings"
"testing"
"time"
"golang.org/x/sync/errgroup"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
)
type TestCtx struct {
id string
namespaces []string
cleanUpFns []FinalizerFn
}
@@ -42,9 +50,122 @@ func (f *Framework) NewTestCtx(t *testing.T) *TestCtx {
"test",
)
id := prefix + "-" + strconv.FormatInt(time.Now().Unix(), 36)
return &TestCtx{
id: id,
tc := &TestCtx{
id: prefix + "-" + strconv.FormatInt(time.Now().Unix(), 36),
}
tc.cleanUpFns = []FinalizerFn{
func() error {
t.Helper()
if !t.Failed() {
return nil
}
// We can collect more information as we see fit over time.
b := &bytes.Buffer{}
tc.collectAlertmanagers(b, f)
tc.collectPrometheuses(b, f)
tc.collectThanosRulers(b, f)
tc.collectLogs(b, f)
tc.collectEvents(b, f)
t.Logf("=== %s (start)", t.Name())
t.Log("")
t.Log(b.String())
t.Logf("=== %s (end)", t.Name())
return nil
},
}
return tc
}
func (ctx *TestCtx) collectLogs(w io.Writer, f *Framework) {
for _, ns := range ctx.namespaces {
pods, err := f.KubeClient.CoreV1().Pods(ns).List(context.Background(), metav1.ListOptions{})
if err != nil {
fmt.Fprintf(w, "%s: failed to get pods: %v\n", ns, err)
continue
}
for _, pod := range pods.Items {
err := f.WritePodLogs(context.Background(), w, ns, pod.Name, LogOptions{})
if err != nil {
fmt.Fprintf(w, "%s: failed to get pod logs: %v\n", ns, err)
continue
}
}
}
}
func (ctx *TestCtx) collectEvents(w io.Writer, f *Framework) {
fmt.Fprintln(w, "=== Events")
for _, ns := range ctx.namespaces {
b := &bytes.Buffer{}
err := f.WriteEvents(context.Background(), b, ns)
if err != nil {
fmt.Fprintf(w, "%s: failed to get events: %v\n", ns, err)
}
}
}
func collectConditions(w io.Writer, prefix string, conditions []monitoringv1.Condition) {
for _, c := range conditions {
fmt.Fprintf(
w,
"%s: condition type=%q status=%q reason=%q message=%q\n",
prefix,
c.Type,
c.Status,
c.Reason,
c.Message,
)
}
}
func (ctx *TestCtx) collectAlertmanagers(w io.Writer, f *Framework) {
fmt.Fprintln(w, "=== Alertmanagers")
for _, ns := range ctx.namespaces {
ams, err := f.MonClientV1.Alertmanagers(ns).List(context.Background(), metav1.ListOptions{})
if err != nil {
fmt.Fprintf(w, "%s: failed to get alertmanagers: %v\n", ns, err)
continue
}
for _, am := range ams.Items {
collectConditions(w, fmt.Sprintf("Alertmanager=%s/%s", am.Namespace, am.Name), am.Status.Conditions)
}
}
}
func (ctx *TestCtx) collectPrometheuses(w io.Writer, f *Framework) {
fmt.Fprintln(w, "=== Prometheuses")
for _, ns := range ctx.namespaces {
ps, err := f.MonClientV1.Prometheuses(ns).List(context.Background(), metav1.ListOptions{})
if err != nil {
fmt.Fprintf(w, "%s: failed to get prometheuses: %v\n", ns, err)
continue
}
for _, p := range ps.Items {
collectConditions(w, fmt.Sprintf("Prometheus=%s/%s", p.Namespace, p.Name), p.Status.Conditions)
}
}
}
func (ctx *TestCtx) collectThanosRulers(w io.Writer, f *Framework) {
fmt.Fprintln(w, "=== ThanosRulers")
for _, ns := range ctx.namespaces {
trs, err := f.MonClientV1.ThanosRulers(ns).List(context.Background(), metav1.ListOptions{})
if err != nil {
fmt.Fprintf(w, "%s: failed to get thanosrulers: %v\n", ns, err)
continue
}
for _, tr := range trs.Items {
collectConditions(w, fmt.Sprintf("ThanosRuler=%s/%s", tr.Namespace, tr.Name), tr.Status.Conditions)
}
}
}

View File

@@ -17,21 +17,21 @@ package framework
import (
"context"
"fmt"
"io"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
// PrintEvents prints the Kubernetes events to standard out.
func (f *Framework) PrintEvents(ctx context.Context) error {
events, err := f.KubeClient.CoreV1().Events("").List(ctx, metav1.ListOptions{})
// WriteEvents writes the Kubernetes events for the given namespace.
// If the namespace is empty, all events are written.
func (f *Framework) WriteEvents(ctx context.Context, w io.Writer, ns string) error {
events, err := f.KubeClient.CoreV1().Events(ns).List(ctx, metav1.ListOptions{})
if err != nil {
return err
}
if events != nil {
fmt.Println("=== Kubernetes events:")
for _, e := range events.Items {
fmt.Printf("FirstTimestamp: '%v', Reason: '%v', Message: '%v'\n", e.FirstTimestamp, e.Reason, e.Message)
}
for _, e := range events.Items {
fmt.Fprintf(w, "timestamp='%v' namespace=%q reason=%q message=%q\n", e.FirstTimestamp, e.Namespace, e.Reason, e.Message)
}
return nil

View File

@@ -161,20 +161,6 @@ func podRunsImage(p v1.Pod, image string) bool {
return false
}
func (f *Framework) GetLogs(ctx context.Context, namespace string, podName, containerName string) (string, error) {
logs, err := f.KubeClient.CoreV1().RESTClient().Get().
Resource("pods").
Namespace(namespace).
Name(podName).SubResource("log").
Param("container", containerName).
Do(ctx).
Raw()
if err != nil {
return "", err
}
return string(logs), err
}
// ProxyGetPod expects resourceName as "[protocol:]podName[:portNameOrNumber]".
// protocol is optional and the valid values are "http" and "https".
// Without specifying protocol, "http" will be used.

View File

@@ -17,7 +17,6 @@ package framework
import (
"context"
"encoding/json"
"fmt"
"testing"
v1 "k8s.io/api/core/v1"
@@ -32,7 +31,7 @@ func (f *Framework) CreateNamespace(ctx context.Context, t *testing.T, testCtx *
rn := k8sutil.ResourceNamer{}
name, err := rn.UniqueDNS1123Label(name)
if err != nil {
t.Fatal(fmt.Errorf("failed to generate a namespace name %v: %w", name, err))
t.Fatalf("failed to generate namespace %v: %v", name, err)
}
_, err = f.KubeClient.CoreV1().Namespaces().Create(ctx, &v1.Namespace{
@@ -40,11 +39,12 @@ func (f *Framework) CreateNamespace(ctx context.Context, t *testing.T, testCtx *
Name: name,
},
}, metav1.CreateOptions{})
if err != nil {
t.Fatal(fmt.Errorf("failed to create namespace with name %v: %w", name, err))
t.Fatalf("failed to create namespace %q: %v", name, err)
}
testCtx.namespaces = append(testCtx.namespaces, name)
namespaceFinalizerFn := func() error {
return f.DeleteNamespace(ctx, name)
}

View File

@@ -32,22 +32,48 @@ import (
"k8s.io/client-go/transport/spdy"
)
type LogOptions struct {
Container string
TailLines int64
SinceSeconds int64
}
// PrintPodLogs prints the logs of a specified Pod.
func (f *Framework) PrintPodLogs(ctx context.Context, ns, p string) error {
pod, err := f.KubeClient.CoreV1().Pods(ns).Get(ctx, p, metav1.GetOptions{})
func (f *Framework) WritePodLogs(ctx context.Context, w io.Writer, ns, pod string, opts LogOptions) error {
p, err := f.KubeClient.CoreV1().Pods(ns).Get(ctx, pod, metav1.GetOptions{})
if err != nil {
return fmt.Errorf("failed to print logs of pod '%v': failed to get pod: %w", p, err)
return fmt.Errorf("failed to get pod %s/%s: %w", ns, pod, err)
}
for _, c := range pod.Spec.Containers {
req := f.KubeClient.CoreV1().Pods(ns).GetLogs(p, &v1.PodLogOptions{Container: c.Name})
var containers []string
for _, c := range p.Spec.Containers {
if opts.Container != "" && c.Name != opts.Container {
continue
}
containers = append(containers, c.Name)
}
plo := v1.PodLogOptions{}
if opts.TailLines > 0 {
plo.TailLines = &opts.TailLines
}
if opts.SinceSeconds > 0 {
plo.SinceSeconds = &opts.SinceSeconds
}
for _, c := range containers {
plo.Container = c
req := f.KubeClient.CoreV1().Pods(ns).GetLogs(pod, &plo)
resp, err := req.DoRaw(ctx)
if err != nil {
return fmt.Errorf("failed to retrieve logs of pod '%v': %w", p, err)
return fmt.Errorf("failed to retrieve logs of container %q (pod %s/%s): %w", c, ns, pod, err)
}
fmt.Printf("=== Logs of %v/%v/%v:", ns, p, c.Name)
fmt.Println(string(resp))
fmt.Fprintf(w, "=== Logs (pod=%s/%s container=%s)\n", ns, pod, c)
_, err = w.Write(resp)
if err != nil {
return fmt.Errorf("failed to write logs: %w", err)
}
}
return nil

View File

@@ -804,13 +804,14 @@ func (f *Framework) PrintPrometheusLogs(ctx context.Context, t *testing.T, p *mo
replicas := int(*p.Spec.Replicas)
for i := 0; i < replicas; i++ {
l, err := f.GetLogs(ctx, p.Namespace, fmt.Sprintf("prometheus-%s-%d", p.Name, i), "prometheus")
b := &bytes.Buffer{}
err := f.WritePodLogs(ctx, b, p.Namespace, fmt.Sprintf("prometheus-%s-%d", p.Name, i), LogOptions{Container: "prometheus"})
if err != nil {
t.Logf("failed to retrieve logs for replica[%d]: %v", i, err)
continue
}
t.Logf("Prometheus %q/%q (replica #%d) logs:", p.Namespace, p.Name, i)
t.Logf("%s", l)
t.Log(b.String())
}
}