From 0ecdf4786290c4a06dcfb85cec972a995d465ccc Mon Sep 17 00:00:00 2001 From: "Y@&h" <121890726+yp969803@users.noreply.github.com> Date: Fri, 17 Oct 2025 20:55:49 +0530 Subject: [PATCH] Cleanup of bindings for PrometheusRules in prometheus (#8024) --- pkg/prometheus/server/operator.go | 11 +++ test/e2e/main_test.go | 2 + test/e2e/status_subresource_test.go | 105 ++++++++++++++++++++++++++++ test/framework/prometheus_rule.go | 22 ++++++ 4 files changed, 140 insertions(+) diff --git a/pkg/prometheus/server/operator.go b/pkg/prometheus/server/operator.go index a4c872969..4d0f6f12b 100644 --- a/pkg/prometheus/server/operator.go +++ b/pkg/prometheus/server/operator.go @@ -1127,6 +1127,12 @@ func (c *Operator) updateConfigResourcesStatus(ctx context.Context, p *monitorin if err := operator.CleanupBindings(ctx, c.probeInfs.ListAll, resources.bMons, configResourceSyncer); err != nil { return fmt.Errorf("failed to remove bindings for probes: %w", err) } + + // Remove bindings from prometheusRules which reference the + // workload but aren't selected anymore. + if err := operator.CleanupBindings(ctx, c.ruleInfs.ListAll, resources.rules.Selected(c.accessor), configResourceSyncer); err != nil { + return fmt.Errorf("failed to remove bindings for prometheusRules: %w", err) + } return nil } @@ -1157,6 +1163,11 @@ func (c *Operator) configResStatusCleanup(ctx context.Context, p *monitoringv1.P if err := operator.CleanupBindings(ctx, c.probeInfs.ListAll, operator.TypedResourcesSelection[*monitoringv1.Probe]{}, configResourceSyncer); err != nil { return fmt.Errorf("failed to remove bindings for probes: %w", err) } + + // Remove bindings from all prometheusRules which reference the workload. + if err := operator.CleanupBindings(ctx, c.ruleInfs.ListAll, operator.TypedResourcesSelection[*monitoringv1.PrometheusRule]{}, configResourceSyncer); err != nil { + return fmt.Errorf("failed to remove bindings for prometheusRule: %w", err) + } return nil } diff --git a/test/e2e/main_test.go b/test/e2e/main_test.go index 223a3f228..130c788db 100644 --- a/test/e2e/main_test.go +++ b/test/e2e/main_test.go @@ -451,6 +451,8 @@ func TestGatedFeatures(t *testing.T) { "RmScrapeConfigBindingDuringWorkloadDelete": testRmScrapeConfigBindingDuringWorkloadDelete, "PrometheusRuleStatusSubresource": testPrometheusRuleStatusSubresource, "FinalizerForPromAgentWhenStatusForConfigResEnabled": testFinalizerForPromAgentWhenStatusForConfigResEnabled, + "GarbageCollectionOfPrometheusRuleBinding": testGarbageCollectionOfPrometheusRuleBinding, + "RmPrometheusRuleBindingDuringWorkloadDelete": testRmPrometheusRuleBindingDuringWorkloadDelete, } for name, f := range testFuncs { diff --git a/test/e2e/status_subresource_test.go b/test/e2e/status_subresource_test.go index 315239727..d52333722 100644 --- a/test/e2e/status_subresource_test.go +++ b/test/e2e/status_subresource_test.go @@ -948,3 +948,108 @@ func testPrometheusRuleStatusSubresource(t *testing.T) { require.NoError(t, err) require.Equal(t, ts, cond.LastTransitionTime.String()) } + +// testGarbageCollectionOfPrometheusRuleBinding validates that the operator removes the reference to the Prometheus resource when the PrometheusRule isn't selected anymore by the workload. +func testGarbageCollectionOfPrometheusRuleBinding(t *testing.T) { + t.Parallel() + ctx := context.Background() + testCtx := framework.NewTestCtx(t) + defer testCtx.Cleanup(t) + + ns := framework.CreateNamespace(ctx, t, testCtx) + framework.SetupPrometheusRBAC(ctx, t, testCtx, ns) + _, err := framework.CreateOrUpdatePrometheusOperatorWithOpts( + ctx, testFramework.PrometheusOperatorOpts{ + Namespace: ns, + AllowedNamespaces: []string{ns}, + EnabledFeatureGates: []operator.FeatureGateName{operator.StatusForConfigurationResourcesFeature}, + }, + ) + require.NoError(t, err) + + name := "prom-rule-status-binding-cleanup-test" + + p := framework.MakeBasicPrometheus(ns, name, name, 1) + p.Spec.RuleSelector = &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "group": name, + }, + } + + _, err = framework.CreatePrometheusAndWaitUntilReady(ctx, ns, p) + require.NoError(t, err) + + pr1 := framework.MakeBasicRule(ns, "rule1", []monitoringv1.RuleGroup{ + { + Name: "TestAlert1", + Rules: []monitoringv1.Rule{ + { + Alert: "TestAlert1", + Expr: intstr.FromString("vector(1)"), + }, + }, + }, + }) + pr1.Labels["group"] = name + pr1, err = framework.MonClientV1.PrometheusRules(ns).Create(ctx, pr1, v1.CreateOptions{}) + require.NoError(t, err) + + pr1.Labels = map[string]string{} + pr1, err = framework.MonClientV1.PrometheusRules(ns).Update(ctx, pr1, v1.UpdateOptions{}) + require.NoError(t, err) + + _, err = framework.WaitForRuleWorkloadBindingCleanup(ctx, pr1, p, monitoringv1.PrometheusName, 1*time.Minute) + require.NoError(t, err) +} + +// testRmPrometheusRuleBindingDuringWorkloadDelete validates that the operator removes the reference to the Prometheus resource when workload is deleted. +func testRmPrometheusRuleBindingDuringWorkloadDelete(t *testing.T) { + t.Parallel() + ctx := context.Background() + testCtx := framework.NewTestCtx(t) + defer testCtx.Cleanup(t) + + ns := framework.CreateNamespace(ctx, t, testCtx) + framework.SetupPrometheusRBAC(ctx, t, testCtx, ns) + _, err := framework.CreateOrUpdatePrometheusOperatorWithOpts( + ctx, testFramework.PrometheusOperatorOpts{ + Namespace: ns, + AllowedNamespaces: []string{ns}, + EnabledFeatureGates: []operator.FeatureGateName{operator.StatusForConfigurationResourcesFeature}, + }, + ) + require.NoError(t, err) + + name := "prom-rule-status-binding-cleanup-test" + + p := framework.MakeBasicPrometheus(ns, name, name, 1) + p.Spec.RuleSelector = &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "group": name, + }, + } + + _, err = framework.CreatePrometheusAndWaitUntilReady(ctx, ns, p) + require.NoError(t, err) + + pr1 := framework.MakeBasicRule(ns, "rule1", []monitoringv1.RuleGroup{ + { + Name: "TestAlert1", + Rules: []monitoringv1.Rule{ + { + Alert: "TestAlert1", + Expr: intstr.FromString("vector(1)"), + }, + }, + }, + }) + pr1.Labels["group"] = name + pr1, err = framework.MonClientV1.PrometheusRules(ns).Create(ctx, pr1, v1.CreateOptions{}) + require.NoError(t, err) + + err = framework.DeletePrometheusAndWaitUntilGone(ctx, ns, name) + require.NoError(t, err) + + _, err = framework.WaitForRuleWorkloadBindingCleanup(ctx, pr1, p, monitoringv1.PrometheusName, 1*time.Minute) + require.NoError(t, err) +} diff --git a/test/framework/prometheus_rule.go b/test/framework/prometheus_rule.go index 6d5ecc8c1..790aea5c5 100644 --- a/test/framework/prometheus_rule.go +++ b/test/framework/prometheus_rule.go @@ -179,3 +179,25 @@ func (f *Framework) WaitForRuleCondition(ctx context.Context, pr *monitoringv1.P } return current, nil } + +func (f *Framework) WaitForRuleWorkloadBindingCleanup(ctx context.Context, pm *monitoringv1.PrometheusRule, workload metav1.Object, resource string, timeout time.Duration) (*monitoringv1.PrometheusRule, error) { + var current *monitoringv1.PrometheusRule + + if err := f.WaitForConfigResWorkloadBindingCleanup( + ctx, + func(ctx context.Context) ([]monitoringv1.WorkloadBinding, error) { + var err error + current, err = f.MonClientV1.PrometheusRules(pm.Namespace).Get(ctx, pm.Name, metav1.GetOptions{}) + if err != nil { + return nil, err + } + return current.Status.Bindings, nil + }, + workload, + resource, + timeout, + ); err != nil { + return nil, fmt.Errorf("podMonitor status %v/%v failed to reach expected condition: %w", pm.Namespace, pm.Name, err) + } + return current, nil +}