mirror of
https://github.com/coreos/prometheus-operator.git
synced 2026-02-05 06:45:27 +01:00
test: improve e2e test troubleshooting
This change gathers and dumps useful information when an end-to-end test fails. Signed-off-by: Simon Pasquier <spasquie@redhat.com>
This commit is contained in:
@@ -330,8 +330,13 @@ func testAMClusterInitialization(t *testing.T) {
|
||||
}
|
||||
|
||||
for i := 0; i < amClusterSize; i++ {
|
||||
err := framework.PrintPodLogs(context.Background(), ns, fmt.Sprintf("alertmanager-test-%v", strconv.Itoa(i)))
|
||||
require.NoError(t, err)
|
||||
b := &bytes.Buffer{}
|
||||
err := framework.WritePodLogs(context.Background(), b, ns, fmt.Sprintf("alertmanager-test-%v", strconv.Itoa(i)), testFramework.LogOptions{})
|
||||
if err != nil {
|
||||
t.Logf("failed to get logs: %v", err)
|
||||
}
|
||||
|
||||
t.Log(b.String())
|
||||
}
|
||||
}()
|
||||
|
||||
@@ -822,10 +827,11 @@ inhibit_rules:
|
||||
require.Len(t, pl.Items, 1)
|
||||
|
||||
podName := pl.Items[0].Name
|
||||
logs, err := framework.GetLogs(context.Background(), ns, podName, "webhook-server")
|
||||
b := &bytes.Buffer{}
|
||||
err = framework.WritePodLogs(context.Background(), b, ns, podName, testFramework.LogOptions{Container: "webhook-server"})
|
||||
require.NoError(t, err)
|
||||
|
||||
c := strings.Count(logs, "Alertmanager Notification Payload Received")
|
||||
c := strings.Count(b.String(), "Alertmanager Notification Payload Received")
|
||||
require.Equal(t, 1, c)
|
||||
|
||||
// We need to force a rolling update, e.g. by changing one of the command
|
||||
@@ -845,10 +851,11 @@ inhibit_rules:
|
||||
|
||||
time.Sleep(time.Minute)
|
||||
|
||||
logs, err = framework.GetLogs(context.Background(), ns, podName, "webhook-server")
|
||||
b.Reset()
|
||||
err = framework.WritePodLogs(context.Background(), b, ns, podName, testFramework.LogOptions{Container: "webhook-server"})
|
||||
require.NoError(t, err)
|
||||
|
||||
c = strings.Count(logs, "Alertmanager Notification Payload Received")
|
||||
c = strings.Count(b.String(), "Alertmanager Notification Payload Received")
|
||||
require.Equal(t, 1, c)
|
||||
}
|
||||
|
||||
|
||||
@@ -1553,11 +1553,15 @@ func testPromRulesExceedingConfigMapLimit(t *testing.T) {
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if t.Failed() {
|
||||
if err := framework.PrintPodLogs(context.Background(), ns, "prometheus-"+p.Name+"-0"); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if !t.Failed() {
|
||||
return
|
||||
}
|
||||
|
||||
b := &bytes.Buffer{}
|
||||
if err := framework.WritePodLogs(context.Background(), b, ns, "prometheus-"+p.Name+"-0", testFramework.LogOptions{}); err != nil {
|
||||
t.Logf("failed to get logs: %v", err)
|
||||
}
|
||||
t.Log(b.String())
|
||||
}()
|
||||
|
||||
pSVC := framework.MakePrometheusService(p.Name, "not-relevant", v1.ServiceTypeClusterIP)
|
||||
|
||||
@@ -15,16 +15,24 @@
|
||||
package framework
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"golang.org/x/sync/errgroup"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
||||
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
|
||||
)
|
||||
|
||||
type TestCtx struct {
|
||||
id string
|
||||
namespaces []string
|
||||
cleanUpFns []FinalizerFn
|
||||
}
|
||||
|
||||
@@ -42,9 +50,122 @@ func (f *Framework) NewTestCtx(t *testing.T) *TestCtx {
|
||||
"test",
|
||||
)
|
||||
|
||||
id := prefix + "-" + strconv.FormatInt(time.Now().Unix(), 36)
|
||||
return &TestCtx{
|
||||
id: id,
|
||||
tc := &TestCtx{
|
||||
id: prefix + "-" + strconv.FormatInt(time.Now().Unix(), 36),
|
||||
}
|
||||
|
||||
tc.cleanUpFns = []FinalizerFn{
|
||||
func() error {
|
||||
t.Helper()
|
||||
if !t.Failed() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// We can collect more information as we see fit over time.
|
||||
b := &bytes.Buffer{}
|
||||
tc.collectAlertmanagers(b, f)
|
||||
tc.collectPrometheuses(b, f)
|
||||
tc.collectThanosRulers(b, f)
|
||||
tc.collectLogs(b, f)
|
||||
tc.collectEvents(b, f)
|
||||
|
||||
t.Logf("=== %s (start)", t.Name())
|
||||
t.Log("")
|
||||
t.Log(b.String())
|
||||
t.Logf("=== %s (end)", t.Name())
|
||||
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
return tc
|
||||
}
|
||||
|
||||
func (ctx *TestCtx) collectLogs(w io.Writer, f *Framework) {
|
||||
for _, ns := range ctx.namespaces {
|
||||
pods, err := f.KubeClient.CoreV1().Pods(ns).List(context.Background(), metav1.ListOptions{})
|
||||
if err != nil {
|
||||
fmt.Fprintf(w, "%s: failed to get pods: %v\n", ns, err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, pod := range pods.Items {
|
||||
err := f.WritePodLogs(context.Background(), w, ns, pod.Name, LogOptions{})
|
||||
if err != nil {
|
||||
fmt.Fprintf(w, "%s: failed to get pod logs: %v\n", ns, err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (ctx *TestCtx) collectEvents(w io.Writer, f *Framework) {
|
||||
fmt.Fprintln(w, "=== Events")
|
||||
for _, ns := range ctx.namespaces {
|
||||
b := &bytes.Buffer{}
|
||||
err := f.WriteEvents(context.Background(), b, ns)
|
||||
if err != nil {
|
||||
fmt.Fprintf(w, "%s: failed to get events: %v\n", ns, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func collectConditions(w io.Writer, prefix string, conditions []monitoringv1.Condition) {
|
||||
for _, c := range conditions {
|
||||
fmt.Fprintf(
|
||||
w,
|
||||
"%s: condition type=%q status=%q reason=%q message=%q\n",
|
||||
prefix,
|
||||
c.Type,
|
||||
c.Status,
|
||||
c.Reason,
|
||||
c.Message,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
func (ctx *TestCtx) collectAlertmanagers(w io.Writer, f *Framework) {
|
||||
fmt.Fprintln(w, "=== Alertmanagers")
|
||||
for _, ns := range ctx.namespaces {
|
||||
ams, err := f.MonClientV1.Alertmanagers(ns).List(context.Background(), metav1.ListOptions{})
|
||||
if err != nil {
|
||||
fmt.Fprintf(w, "%s: failed to get alertmanagers: %v\n", ns, err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, am := range ams.Items {
|
||||
collectConditions(w, fmt.Sprintf("Alertmanager=%s/%s", am.Namespace, am.Name), am.Status.Conditions)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (ctx *TestCtx) collectPrometheuses(w io.Writer, f *Framework) {
|
||||
fmt.Fprintln(w, "=== Prometheuses")
|
||||
for _, ns := range ctx.namespaces {
|
||||
ps, err := f.MonClientV1.Prometheuses(ns).List(context.Background(), metav1.ListOptions{})
|
||||
if err != nil {
|
||||
fmt.Fprintf(w, "%s: failed to get prometheuses: %v\n", ns, err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, p := range ps.Items {
|
||||
collectConditions(w, fmt.Sprintf("Prometheus=%s/%s", p.Namespace, p.Name), p.Status.Conditions)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (ctx *TestCtx) collectThanosRulers(w io.Writer, f *Framework) {
|
||||
fmt.Fprintln(w, "=== ThanosRulers")
|
||||
for _, ns := range ctx.namespaces {
|
||||
trs, err := f.MonClientV1.ThanosRulers(ns).List(context.Background(), metav1.ListOptions{})
|
||||
if err != nil {
|
||||
fmt.Fprintf(w, "%s: failed to get thanosrulers: %v\n", ns, err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, tr := range trs.Items {
|
||||
collectConditions(w, fmt.Sprintf("ThanosRuler=%s/%s", tr.Namespace, tr.Name), tr.Status.Conditions)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -17,21 +17,21 @@ package framework
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
// PrintEvents prints the Kubernetes events to standard out.
|
||||
func (f *Framework) PrintEvents(ctx context.Context) error {
|
||||
events, err := f.KubeClient.CoreV1().Events("").List(ctx, metav1.ListOptions{})
|
||||
// WriteEvents writes the Kubernetes events for the given namespace.
|
||||
// If the namespace is empty, all events are written.
|
||||
func (f *Framework) WriteEvents(ctx context.Context, w io.Writer, ns string) error {
|
||||
events, err := f.KubeClient.CoreV1().Events(ns).List(ctx, metav1.ListOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if events != nil {
|
||||
fmt.Println("=== Kubernetes events:")
|
||||
for _, e := range events.Items {
|
||||
fmt.Printf("FirstTimestamp: '%v', Reason: '%v', Message: '%v'\n", e.FirstTimestamp, e.Reason, e.Message)
|
||||
}
|
||||
|
||||
for _, e := range events.Items {
|
||||
fmt.Fprintf(w, "timestamp='%v' namespace=%q reason=%q message=%q\n", e.FirstTimestamp, e.Namespace, e.Reason, e.Message)
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
@@ -161,20 +161,6 @@ func podRunsImage(p v1.Pod, image string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (f *Framework) GetLogs(ctx context.Context, namespace string, podName, containerName string) (string, error) {
|
||||
logs, err := f.KubeClient.CoreV1().RESTClient().Get().
|
||||
Resource("pods").
|
||||
Namespace(namespace).
|
||||
Name(podName).SubResource("log").
|
||||
Param("container", containerName).
|
||||
Do(ctx).
|
||||
Raw()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(logs), err
|
||||
}
|
||||
|
||||
// ProxyGetPod expects resourceName as "[protocol:]podName[:portNameOrNumber]".
|
||||
// protocol is optional and the valid values are "http" and "https".
|
||||
// Without specifying protocol, "http" will be used.
|
||||
|
||||
@@ -17,7 +17,6 @@ package framework
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
@@ -32,7 +31,7 @@ func (f *Framework) CreateNamespace(ctx context.Context, t *testing.T, testCtx *
|
||||
rn := k8sutil.ResourceNamer{}
|
||||
name, err := rn.UniqueDNS1123Label(name)
|
||||
if err != nil {
|
||||
t.Fatal(fmt.Errorf("failed to generate a namespace name %v: %w", name, err))
|
||||
t.Fatalf("failed to generate namespace %v: %v", name, err)
|
||||
}
|
||||
|
||||
_, err = f.KubeClient.CoreV1().Namespaces().Create(ctx, &v1.Namespace{
|
||||
@@ -40,11 +39,12 @@ func (f *Framework) CreateNamespace(ctx context.Context, t *testing.T, testCtx *
|
||||
Name: name,
|
||||
},
|
||||
}, metav1.CreateOptions{})
|
||||
|
||||
if err != nil {
|
||||
t.Fatal(fmt.Errorf("failed to create namespace with name %v: %w", name, err))
|
||||
t.Fatalf("failed to create namespace %q: %v", name, err)
|
||||
}
|
||||
|
||||
testCtx.namespaces = append(testCtx.namespaces, name)
|
||||
|
||||
namespaceFinalizerFn := func() error {
|
||||
return f.DeleteNamespace(ctx, name)
|
||||
}
|
||||
|
||||
@@ -32,22 +32,48 @@ import (
|
||||
"k8s.io/client-go/transport/spdy"
|
||||
)
|
||||
|
||||
type LogOptions struct {
|
||||
Container string
|
||||
TailLines int64
|
||||
SinceSeconds int64
|
||||
}
|
||||
|
||||
// PrintPodLogs prints the logs of a specified Pod.
|
||||
func (f *Framework) PrintPodLogs(ctx context.Context, ns, p string) error {
|
||||
pod, err := f.KubeClient.CoreV1().Pods(ns).Get(ctx, p, metav1.GetOptions{})
|
||||
func (f *Framework) WritePodLogs(ctx context.Context, w io.Writer, ns, pod string, opts LogOptions) error {
|
||||
p, err := f.KubeClient.CoreV1().Pods(ns).Get(ctx, pod, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to print logs of pod '%v': failed to get pod: %w", p, err)
|
||||
return fmt.Errorf("failed to get pod %s/%s: %w", ns, pod, err)
|
||||
}
|
||||
|
||||
for _, c := range pod.Spec.Containers {
|
||||
req := f.KubeClient.CoreV1().Pods(ns).GetLogs(p, &v1.PodLogOptions{Container: c.Name})
|
||||
var containers []string
|
||||
for _, c := range p.Spec.Containers {
|
||||
if opts.Container != "" && c.Name != opts.Container {
|
||||
continue
|
||||
}
|
||||
containers = append(containers, c.Name)
|
||||
}
|
||||
|
||||
plo := v1.PodLogOptions{}
|
||||
if opts.TailLines > 0 {
|
||||
plo.TailLines = &opts.TailLines
|
||||
}
|
||||
if opts.SinceSeconds > 0 {
|
||||
plo.SinceSeconds = &opts.SinceSeconds
|
||||
}
|
||||
|
||||
for _, c := range containers {
|
||||
plo.Container = c
|
||||
req := f.KubeClient.CoreV1().Pods(ns).GetLogs(pod, &plo)
|
||||
resp, err := req.DoRaw(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to retrieve logs of pod '%v': %w", p, err)
|
||||
return fmt.Errorf("failed to retrieve logs of container %q (pod %s/%s): %w", c, ns, pod, err)
|
||||
}
|
||||
|
||||
fmt.Printf("=== Logs of %v/%v/%v:", ns, p, c.Name)
|
||||
fmt.Println(string(resp))
|
||||
fmt.Fprintf(w, "=== Logs (pod=%s/%s container=%s)\n", ns, pod, c)
|
||||
_, err = w.Write(resp)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write logs: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
@@ -804,13 +804,14 @@ func (f *Framework) PrintPrometheusLogs(ctx context.Context, t *testing.T, p *mo
|
||||
|
||||
replicas := int(*p.Spec.Replicas)
|
||||
for i := 0; i < replicas; i++ {
|
||||
l, err := f.GetLogs(ctx, p.Namespace, fmt.Sprintf("prometheus-%s-%d", p.Name, i), "prometheus")
|
||||
b := &bytes.Buffer{}
|
||||
err := f.WritePodLogs(ctx, b, p.Namespace, fmt.Sprintf("prometheus-%s-%d", p.Name, i), LogOptions{Container: "prometheus"})
|
||||
if err != nil {
|
||||
t.Logf("failed to retrieve logs for replica[%d]: %v", i, err)
|
||||
continue
|
||||
}
|
||||
t.Logf("Prometheus %q/%q (replica #%d) logs:", p.Namespace, p.Name, i)
|
||||
t.Logf("%s", l)
|
||||
t.Log(b.String())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user