diff --git a/pkg/prometheus/server/operator.go b/pkg/prometheus/server/operator.go index fb96ac1ab..a32f57db8 100644 --- a/pkg/prometheus/server/operator.go +++ b/pkg/prometheus/server/operator.go @@ -1055,7 +1055,10 @@ func (c *Operator) updateConfigResourcesStatus(ctx context.Context, p *monitorin return nil } - configResourceSyncer := prompkg.NewConfigResourceSyncer(p, c.dclient) + var ( + configResourceSyncer = prompkg.NewConfigResourceSyncer(p, c.dclient) + getErr error + ) // Update the status of selected serviceMonitors. for key, configResource := range resources.sMons { @@ -1071,9 +1074,8 @@ func (c *Operator) updateConfigResourcesStatus(ctx context.Context, p *monitorin } } - // Remove bindings from configuration resources which reference the + // Remove bindings from serviceMonitors which reference the // workload but aren't selected anymore. - var getErr error if err := c.smonInfs.ListAll(labels.Everything(), func(obj any) { if getErr != nil { // Skip all subsequent updates after the first error. @@ -1105,6 +1107,43 @@ func (c *Operator) updateConfigResourcesStatus(ctx context.Context, p *monitorin }); err != nil { return fmt.Errorf("listing all ServiceMonitors from cache failed: %w", err) } + if getErr != nil { + return getErr + } + + // Remove bindings from podMonitors which reference the + // workload but aren't selected anymore. + if err := c.pmonInfs.ListAll(labels.Everything(), func(obj any) { + if getErr != nil { + // Skip all subsequent updates after the first error. + return + } + + k, ok := c.accessor.MetaNamespaceKey(obj) + if !ok { + return + } + + if _, ok = resources.pMons[k]; ok { + return + } + + pm, ok := obj.(*monitoringv1.PodMonitor) + if !ok { + return + } + + if err := k8sutil.AddTypeInformationToObject(pm); err != nil { + getErr = fmt.Errorf("failed to add type information to PodMonitor %s: %w", k, err) + return + } + + if err := configResourceSyncer.RemoveBinding(ctx, pm); err != nil { + getErr = fmt.Errorf("failed to remove Prometheus binding from PodMonitor %s status: %w", k, err) + } + }); err != nil { + return fmt.Errorf("listing all PodMonitors from cache failed: %w", err) + } return getErr } @@ -1118,6 +1157,8 @@ func (c *Operator) configResStatusCleanup(ctx context.Context, p *monitoringv1.P configResourceSyncer = prompkg.NewConfigResourceSyncer(p, c.dclient) getErr error ) + + // Remove bindings from all serviceMonitors which reference the workload. if err := c.smonInfs.ListAll(labels.Everything(), func(obj any) { if getErr != nil { // Skip all subsequent updates after the first error. @@ -1138,7 +1179,31 @@ func (c *Operator) configResStatusCleanup(ctx context.Context, p *monitoringv1.P }); err != nil { return fmt.Errorf("listing all ServiceMonitors from cache failed: %w", err) } + if getErr != nil { + return getErr + } + // Remove bindings from all podMonitors which reference the workload. + if err := c.pmonInfs.ListAll(labels.Everything(), func(obj any) { + if getErr != nil { + // Skip all subsequent updates after the first error. + return + } + + pm, ok := obj.(*monitoringv1.PodMonitor) + if !ok { + return + } + + if err := k8sutil.AddTypeInformationToObject(pm); err != nil { + getErr = fmt.Errorf("failed to add type information to PodMonitor: %w", err) + return + } + + getErr = configResourceSyncer.RemoveBinding(ctx, pm) + }); err != nil { + return fmt.Errorf("listing all PodMonitors from cache failed: %w", err) + } return getErr } diff --git a/test/e2e/main_test.go b/test/e2e/main_test.go index 8cabc167f..0663ff5f9 100644 --- a/test/e2e/main_test.go +++ b/test/e2e/main_test.go @@ -439,6 +439,8 @@ func TestGatedFeatures(t *testing.T) { "GarbageCollectionOfServiceMonitorBinding": testGarbageCollectionOfServiceMonitorBinding, "RmServiceMonitorBindingDuringWorkloadDelete": testRmServiceMonitorBindingDuringWorkloadDelete, "PodMonitorStatusSubresource": testPodMonitorStatusSubresource, + "GarbageCollectionOfPodMonitorBinding": testGarbageCollectionOfPodMonitorBinding, + "RmPodMonitorBindingDuringWorkloadDelete": testRmPodMonitorBindingDuringWorkloadDelete, "FinalizerForPromAgentWhenStatusForConfigResEnabled": testFinalizerForPromAgentWhenStatusForConfigResEnabled, } diff --git a/test/e2e/status_subresource_test.go b/test/e2e/status_subresource_test.go index e3037f59f..2906f8e01 100644 --- a/test/e2e/status_subresource_test.go +++ b/test/e2e/status_subresource_test.go @@ -343,6 +343,84 @@ func testPodMonitorStatusSubresource(t *testing.T) { require.Equal(t, ts, cond.LastTransitionTime.String()) } +// testGarbageCollectionOfPodMonitorBinding validates that the operator removes the reference to the Prometheus resource when the PodMonitor isn't selected anymore by the workload. +func testGarbageCollectionOfPodMonitorBinding(t *testing.T) { + t.Parallel() + ctx := context.Background() + testCtx := framework.NewTestCtx(t) + defer testCtx.Cleanup(t) + + ns := framework.CreateNamespace(ctx, t, testCtx) + framework.SetupPrometheusRBAC(ctx, t, testCtx, ns) + _, err := framework.CreateOrUpdatePrometheusOperatorWithOpts( + ctx, testFramework.PrometheusOperatorOpts{ + Namespace: ns, + AllowedNamespaces: []string{ns}, + EnabledFeatureGates: []operator.FeatureGateName{operator.StatusForConfigurationResourcesFeature}, + }, + ) + require.NoError(t, err) + + name := "pmon-status-binding-cleanup-test" + p := framework.MakeBasicPrometheus(ns, name, name, 1) + + _, err = framework.CreatePrometheusAndWaitUntilReady(ctx, ns, p) + require.NoError(t, err) + + pm := framework.MakeBasicPodMonitor(name) + pm, err = framework.MonClientV1.PodMonitors(ns).Create(ctx, pm, v1.CreateOptions{}) + require.NoError(t, err) + + pm, err = framework.WaitForPodMonitorCondition(ctx, pm, p, monitoringv1.PrometheusName, monitoringv1.Accepted, monitoringv1.ConditionTrue, 1*time.Minute) + require.NoError(t, err) + + // Update the PodMonitor's labels, Prometheus doesn't select the resource anymore. + pm.Labels = map[string]string{} + pm, err = framework.MonClientV1.PodMonitors(ns).Update(ctx, pm, v1.UpdateOptions{}) + require.NoError(t, err) + + _, err = framework.WaitForPodMonitorWorkloadBindingCleanup(ctx, pm, p, monitoringv1.PrometheusName, 1*time.Minute) + require.NoError(t, err) +} + +// testRmPodMonitorBindingDuringWorkloadDelete validates that the operator removes the reference to the Prometheus resource from PodMonitor's status when workload is deleted. +func testRmPodMonitorBindingDuringWorkloadDelete(t *testing.T) { + t.Parallel() + ctx := context.Background() + testCtx := framework.NewTestCtx(t) + defer testCtx.Cleanup(t) + + ns := framework.CreateNamespace(ctx, t, testCtx) + framework.SetupPrometheusRBAC(ctx, t, testCtx, ns) + _, err := framework.CreateOrUpdatePrometheusOperatorWithOpts( + ctx, testFramework.PrometheusOperatorOpts{ + Namespace: ns, + AllowedNamespaces: []string{ns}, + EnabledFeatureGates: []operator.FeatureGateName{operator.StatusForConfigurationResourcesFeature}, + }, + ) + require.NoError(t, err) + + name := "workload-del-pmon-test" + p := framework.MakeBasicPrometheus(ns, name, name, 1) + + _, err = framework.CreatePrometheusAndWaitUntilReady(ctx, ns, p) + require.NoError(t, err, "failed to create Prometheus") + pmon := framework.MakeBasicPodMonitor(name) + + pm, err := framework.MonClientV1.PodMonitors(ns).Create(ctx, pmon, v1.CreateOptions{}) + require.NoError(t, err) + + pm, err = framework.WaitForPodMonitorCondition(ctx, pm, p, monitoringv1.PrometheusName, monitoringv1.Accepted, monitoringv1.ConditionTrue, 1*time.Minute) + require.NoError(t, err) + + err = framework.DeletePrometheusAndWaitUntilGone(ctx, ns, name) + require.NoError(t, err) + + _, err = framework.WaitForPodMonitorWorkloadBindingCleanup(ctx, pm, p, monitoringv1.PrometheusName, 1*time.Minute) + require.NoError(t, err) +} + // testFinalizerForPromAgentWhenStatusForConfigResEnabled tests the adding/removing of status-cleanup finalizer for PrometheusAgent when StatusForConfigurationResourcesFeature is enabled. func testFinalizerForPromAgentWhenStatusForConfigResEnabled(t *testing.T) { t.Parallel() diff --git a/test/framework/pod_monitor.go b/test/framework/pod_monitor.go index cc427d2b0..37fbef432 100644 --- a/test/framework/pod_monitor.go +++ b/test/framework/pod_monitor.go @@ -47,3 +47,25 @@ func (f *Framework) WaitForPodMonitorCondition(ctx context.Context, pm *monitori } return current, nil } + +func (f *Framework) WaitForPodMonitorWorkloadBindingCleanup(ctx context.Context, pm *monitoringv1.PodMonitor, workload metav1.Object, resource string, timeout time.Duration) (*monitoringv1.PodMonitor, error) { + var current *monitoringv1.PodMonitor + + if err := f.WaitForConfigResWorkloadBindingCleanup( + ctx, + func(ctx context.Context) ([]monitoringv1.WorkloadBinding, error) { + var err error + current, err = f.MonClientV1.PodMonitors(pm.Namespace).Get(ctx, pm.Name, metav1.GetOptions{}) + if err != nil { + return nil, err + } + return current.Status.Bindings, nil + }, + workload, + resource, + timeout, + ); err != nil { + return nil, fmt.Errorf("podMonitor status %v/%v failed to reach expected condition: %w", pm.Namespace, pm.Name, err) + } + return current, nil +}