1
0
mirror of https://github.com/coreos/prometheus-operator.git synced 2026-02-05 06:45:27 +01:00

Remove binding from PodMonitor's status when workload deletd or invalid Ref (#7936)

* feat:  PodMonitor binding removal

* feat: tests for binding removals from podMon status
This commit is contained in:
Y@&h
2025-09-23 13:11:58 +05:30
committed by GitHub
parent 0470c780a4
commit 7a2bb75866
4 changed files with 170 additions and 3 deletions

View File

@@ -1055,7 +1055,10 @@ func (c *Operator) updateConfigResourcesStatus(ctx context.Context, p *monitorin
return nil
}
configResourceSyncer := prompkg.NewConfigResourceSyncer(p, c.dclient)
var (
configResourceSyncer = prompkg.NewConfigResourceSyncer(p, c.dclient)
getErr error
)
// Update the status of selected serviceMonitors.
for key, configResource := range resources.sMons {
@@ -1071,9 +1074,8 @@ func (c *Operator) updateConfigResourcesStatus(ctx context.Context, p *monitorin
}
}
// Remove bindings from configuration resources which reference the
// Remove bindings from serviceMonitors which reference the
// workload but aren't selected anymore.
var getErr error
if err := c.smonInfs.ListAll(labels.Everything(), func(obj any) {
if getErr != nil {
// Skip all subsequent updates after the first error.
@@ -1105,6 +1107,43 @@ func (c *Operator) updateConfigResourcesStatus(ctx context.Context, p *monitorin
}); err != nil {
return fmt.Errorf("listing all ServiceMonitors from cache failed: %w", err)
}
if getErr != nil {
return getErr
}
// Remove bindings from podMonitors which reference the
// workload but aren't selected anymore.
if err := c.pmonInfs.ListAll(labels.Everything(), func(obj any) {
if getErr != nil {
// Skip all subsequent updates after the first error.
return
}
k, ok := c.accessor.MetaNamespaceKey(obj)
if !ok {
return
}
if _, ok = resources.pMons[k]; ok {
return
}
pm, ok := obj.(*monitoringv1.PodMonitor)
if !ok {
return
}
if err := k8sutil.AddTypeInformationToObject(pm); err != nil {
getErr = fmt.Errorf("failed to add type information to PodMonitor %s: %w", k, err)
return
}
if err := configResourceSyncer.RemoveBinding(ctx, pm); err != nil {
getErr = fmt.Errorf("failed to remove Prometheus binding from PodMonitor %s status: %w", k, err)
}
}); err != nil {
return fmt.Errorf("listing all PodMonitors from cache failed: %w", err)
}
return getErr
}
@@ -1118,6 +1157,8 @@ func (c *Operator) configResStatusCleanup(ctx context.Context, p *monitoringv1.P
configResourceSyncer = prompkg.NewConfigResourceSyncer(p, c.dclient)
getErr error
)
// Remove bindings from all serviceMonitors which reference the workload.
if err := c.smonInfs.ListAll(labels.Everything(), func(obj any) {
if getErr != nil {
// Skip all subsequent updates after the first error.
@@ -1138,7 +1179,31 @@ func (c *Operator) configResStatusCleanup(ctx context.Context, p *monitoringv1.P
}); err != nil {
return fmt.Errorf("listing all ServiceMonitors from cache failed: %w", err)
}
if getErr != nil {
return getErr
}
// Remove bindings from all podMonitors which reference the workload.
if err := c.pmonInfs.ListAll(labels.Everything(), func(obj any) {
if getErr != nil {
// Skip all subsequent updates after the first error.
return
}
pm, ok := obj.(*monitoringv1.PodMonitor)
if !ok {
return
}
if err := k8sutil.AddTypeInformationToObject(pm); err != nil {
getErr = fmt.Errorf("failed to add type information to PodMonitor: %w", err)
return
}
getErr = configResourceSyncer.RemoveBinding(ctx, pm)
}); err != nil {
return fmt.Errorf("listing all PodMonitors from cache failed: %w", err)
}
return getErr
}

View File

@@ -439,6 +439,8 @@ func TestGatedFeatures(t *testing.T) {
"GarbageCollectionOfServiceMonitorBinding": testGarbageCollectionOfServiceMonitorBinding,
"RmServiceMonitorBindingDuringWorkloadDelete": testRmServiceMonitorBindingDuringWorkloadDelete,
"PodMonitorStatusSubresource": testPodMonitorStatusSubresource,
"GarbageCollectionOfPodMonitorBinding": testGarbageCollectionOfPodMonitorBinding,
"RmPodMonitorBindingDuringWorkloadDelete": testRmPodMonitorBindingDuringWorkloadDelete,
"FinalizerForPromAgentWhenStatusForConfigResEnabled": testFinalizerForPromAgentWhenStatusForConfigResEnabled,
}

View File

@@ -343,6 +343,84 @@ func testPodMonitorStatusSubresource(t *testing.T) {
require.Equal(t, ts, cond.LastTransitionTime.String())
}
// testGarbageCollectionOfPodMonitorBinding validates that the operator removes the reference to the Prometheus resource when the PodMonitor isn't selected anymore by the workload.
func testGarbageCollectionOfPodMonitorBinding(t *testing.T) {
t.Parallel()
ctx := context.Background()
testCtx := framework.NewTestCtx(t)
defer testCtx.Cleanup(t)
ns := framework.CreateNamespace(ctx, t, testCtx)
framework.SetupPrometheusRBAC(ctx, t, testCtx, ns)
_, err := framework.CreateOrUpdatePrometheusOperatorWithOpts(
ctx, testFramework.PrometheusOperatorOpts{
Namespace: ns,
AllowedNamespaces: []string{ns},
EnabledFeatureGates: []operator.FeatureGateName{operator.StatusForConfigurationResourcesFeature},
},
)
require.NoError(t, err)
name := "pmon-status-binding-cleanup-test"
p := framework.MakeBasicPrometheus(ns, name, name, 1)
_, err = framework.CreatePrometheusAndWaitUntilReady(ctx, ns, p)
require.NoError(t, err)
pm := framework.MakeBasicPodMonitor(name)
pm, err = framework.MonClientV1.PodMonitors(ns).Create(ctx, pm, v1.CreateOptions{})
require.NoError(t, err)
pm, err = framework.WaitForPodMonitorCondition(ctx, pm, p, monitoringv1.PrometheusName, monitoringv1.Accepted, monitoringv1.ConditionTrue, 1*time.Minute)
require.NoError(t, err)
// Update the PodMonitor's labels, Prometheus doesn't select the resource anymore.
pm.Labels = map[string]string{}
pm, err = framework.MonClientV1.PodMonitors(ns).Update(ctx, pm, v1.UpdateOptions{})
require.NoError(t, err)
_, err = framework.WaitForPodMonitorWorkloadBindingCleanup(ctx, pm, p, monitoringv1.PrometheusName, 1*time.Minute)
require.NoError(t, err)
}
// testRmPodMonitorBindingDuringWorkloadDelete validates that the operator removes the reference to the Prometheus resource from PodMonitor's status when workload is deleted.
func testRmPodMonitorBindingDuringWorkloadDelete(t *testing.T) {
t.Parallel()
ctx := context.Background()
testCtx := framework.NewTestCtx(t)
defer testCtx.Cleanup(t)
ns := framework.CreateNamespace(ctx, t, testCtx)
framework.SetupPrometheusRBAC(ctx, t, testCtx, ns)
_, err := framework.CreateOrUpdatePrometheusOperatorWithOpts(
ctx, testFramework.PrometheusOperatorOpts{
Namespace: ns,
AllowedNamespaces: []string{ns},
EnabledFeatureGates: []operator.FeatureGateName{operator.StatusForConfigurationResourcesFeature},
},
)
require.NoError(t, err)
name := "workload-del-pmon-test"
p := framework.MakeBasicPrometheus(ns, name, name, 1)
_, err = framework.CreatePrometheusAndWaitUntilReady(ctx, ns, p)
require.NoError(t, err, "failed to create Prometheus")
pmon := framework.MakeBasicPodMonitor(name)
pm, err := framework.MonClientV1.PodMonitors(ns).Create(ctx, pmon, v1.CreateOptions{})
require.NoError(t, err)
pm, err = framework.WaitForPodMonitorCondition(ctx, pm, p, monitoringv1.PrometheusName, monitoringv1.Accepted, monitoringv1.ConditionTrue, 1*time.Minute)
require.NoError(t, err)
err = framework.DeletePrometheusAndWaitUntilGone(ctx, ns, name)
require.NoError(t, err)
_, err = framework.WaitForPodMonitorWorkloadBindingCleanup(ctx, pm, p, monitoringv1.PrometheusName, 1*time.Minute)
require.NoError(t, err)
}
// testFinalizerForPromAgentWhenStatusForConfigResEnabled tests the adding/removing of status-cleanup finalizer for PrometheusAgent when StatusForConfigurationResourcesFeature is enabled.
func testFinalizerForPromAgentWhenStatusForConfigResEnabled(t *testing.T) {
t.Parallel()

View File

@@ -47,3 +47,25 @@ func (f *Framework) WaitForPodMonitorCondition(ctx context.Context, pm *monitori
}
return current, nil
}
func (f *Framework) WaitForPodMonitorWorkloadBindingCleanup(ctx context.Context, pm *monitoringv1.PodMonitor, workload metav1.Object, resource string, timeout time.Duration) (*monitoringv1.PodMonitor, error) {
var current *monitoringv1.PodMonitor
if err := f.WaitForConfigResWorkloadBindingCleanup(
ctx,
func(ctx context.Context) ([]monitoringv1.WorkloadBinding, error) {
var err error
current, err = f.MonClientV1.PodMonitors(pm.Namespace).Get(ctx, pm.Name, metav1.GetOptions{})
if err != nil {
return nil, err
}
return current.Status.Bindings, nil
},
workload,
resource,
timeout,
); err != nil {
return nil, fmt.Errorf("podMonitor status %v/%v failed to reach expected condition: %w", pm.Namespace, pm.Name, err)
}
return current, nil
}