mirror of
https://github.com/coreos/prometheus-operator.git
synced 2026-02-05 06:45:27 +01:00
Remove binding from PodMonitor's status when workload deletd or invalid Ref (#7936)
* feat: PodMonitor binding removal * feat: tests for binding removals from podMon status
This commit is contained in:
@@ -1055,7 +1055,10 @@ func (c *Operator) updateConfigResourcesStatus(ctx context.Context, p *monitorin
|
||||
return nil
|
||||
}
|
||||
|
||||
configResourceSyncer := prompkg.NewConfigResourceSyncer(p, c.dclient)
|
||||
var (
|
||||
configResourceSyncer = prompkg.NewConfigResourceSyncer(p, c.dclient)
|
||||
getErr error
|
||||
)
|
||||
|
||||
// Update the status of selected serviceMonitors.
|
||||
for key, configResource := range resources.sMons {
|
||||
@@ -1071,9 +1074,8 @@ func (c *Operator) updateConfigResourcesStatus(ctx context.Context, p *monitorin
|
||||
}
|
||||
}
|
||||
|
||||
// Remove bindings from configuration resources which reference the
|
||||
// Remove bindings from serviceMonitors which reference the
|
||||
// workload but aren't selected anymore.
|
||||
var getErr error
|
||||
if err := c.smonInfs.ListAll(labels.Everything(), func(obj any) {
|
||||
if getErr != nil {
|
||||
// Skip all subsequent updates after the first error.
|
||||
@@ -1105,6 +1107,43 @@ func (c *Operator) updateConfigResourcesStatus(ctx context.Context, p *monitorin
|
||||
}); err != nil {
|
||||
return fmt.Errorf("listing all ServiceMonitors from cache failed: %w", err)
|
||||
}
|
||||
if getErr != nil {
|
||||
return getErr
|
||||
}
|
||||
|
||||
// Remove bindings from podMonitors which reference the
|
||||
// workload but aren't selected anymore.
|
||||
if err := c.pmonInfs.ListAll(labels.Everything(), func(obj any) {
|
||||
if getErr != nil {
|
||||
// Skip all subsequent updates after the first error.
|
||||
return
|
||||
}
|
||||
|
||||
k, ok := c.accessor.MetaNamespaceKey(obj)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
if _, ok = resources.pMons[k]; ok {
|
||||
return
|
||||
}
|
||||
|
||||
pm, ok := obj.(*monitoringv1.PodMonitor)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
if err := k8sutil.AddTypeInformationToObject(pm); err != nil {
|
||||
getErr = fmt.Errorf("failed to add type information to PodMonitor %s: %w", k, err)
|
||||
return
|
||||
}
|
||||
|
||||
if err := configResourceSyncer.RemoveBinding(ctx, pm); err != nil {
|
||||
getErr = fmt.Errorf("failed to remove Prometheus binding from PodMonitor %s status: %w", k, err)
|
||||
}
|
||||
}); err != nil {
|
||||
return fmt.Errorf("listing all PodMonitors from cache failed: %w", err)
|
||||
}
|
||||
return getErr
|
||||
}
|
||||
|
||||
@@ -1118,6 +1157,8 @@ func (c *Operator) configResStatusCleanup(ctx context.Context, p *monitoringv1.P
|
||||
configResourceSyncer = prompkg.NewConfigResourceSyncer(p, c.dclient)
|
||||
getErr error
|
||||
)
|
||||
|
||||
// Remove bindings from all serviceMonitors which reference the workload.
|
||||
if err := c.smonInfs.ListAll(labels.Everything(), func(obj any) {
|
||||
if getErr != nil {
|
||||
// Skip all subsequent updates after the first error.
|
||||
@@ -1138,7 +1179,31 @@ func (c *Operator) configResStatusCleanup(ctx context.Context, p *monitoringv1.P
|
||||
}); err != nil {
|
||||
return fmt.Errorf("listing all ServiceMonitors from cache failed: %w", err)
|
||||
}
|
||||
if getErr != nil {
|
||||
return getErr
|
||||
}
|
||||
|
||||
// Remove bindings from all podMonitors which reference the workload.
|
||||
if err := c.pmonInfs.ListAll(labels.Everything(), func(obj any) {
|
||||
if getErr != nil {
|
||||
// Skip all subsequent updates after the first error.
|
||||
return
|
||||
}
|
||||
|
||||
pm, ok := obj.(*monitoringv1.PodMonitor)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
if err := k8sutil.AddTypeInformationToObject(pm); err != nil {
|
||||
getErr = fmt.Errorf("failed to add type information to PodMonitor: %w", err)
|
||||
return
|
||||
}
|
||||
|
||||
getErr = configResourceSyncer.RemoveBinding(ctx, pm)
|
||||
}); err != nil {
|
||||
return fmt.Errorf("listing all PodMonitors from cache failed: %w", err)
|
||||
}
|
||||
return getErr
|
||||
}
|
||||
|
||||
|
||||
@@ -439,6 +439,8 @@ func TestGatedFeatures(t *testing.T) {
|
||||
"GarbageCollectionOfServiceMonitorBinding": testGarbageCollectionOfServiceMonitorBinding,
|
||||
"RmServiceMonitorBindingDuringWorkloadDelete": testRmServiceMonitorBindingDuringWorkloadDelete,
|
||||
"PodMonitorStatusSubresource": testPodMonitorStatusSubresource,
|
||||
"GarbageCollectionOfPodMonitorBinding": testGarbageCollectionOfPodMonitorBinding,
|
||||
"RmPodMonitorBindingDuringWorkloadDelete": testRmPodMonitorBindingDuringWorkloadDelete,
|
||||
"FinalizerForPromAgentWhenStatusForConfigResEnabled": testFinalizerForPromAgentWhenStatusForConfigResEnabled,
|
||||
}
|
||||
|
||||
|
||||
@@ -343,6 +343,84 @@ func testPodMonitorStatusSubresource(t *testing.T) {
|
||||
require.Equal(t, ts, cond.LastTransitionTime.String())
|
||||
}
|
||||
|
||||
// testGarbageCollectionOfPodMonitorBinding validates that the operator removes the reference to the Prometheus resource when the PodMonitor isn't selected anymore by the workload.
|
||||
func testGarbageCollectionOfPodMonitorBinding(t *testing.T) {
|
||||
t.Parallel()
|
||||
ctx := context.Background()
|
||||
testCtx := framework.NewTestCtx(t)
|
||||
defer testCtx.Cleanup(t)
|
||||
|
||||
ns := framework.CreateNamespace(ctx, t, testCtx)
|
||||
framework.SetupPrometheusRBAC(ctx, t, testCtx, ns)
|
||||
_, err := framework.CreateOrUpdatePrometheusOperatorWithOpts(
|
||||
ctx, testFramework.PrometheusOperatorOpts{
|
||||
Namespace: ns,
|
||||
AllowedNamespaces: []string{ns},
|
||||
EnabledFeatureGates: []operator.FeatureGateName{operator.StatusForConfigurationResourcesFeature},
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
name := "pmon-status-binding-cleanup-test"
|
||||
p := framework.MakeBasicPrometheus(ns, name, name, 1)
|
||||
|
||||
_, err = framework.CreatePrometheusAndWaitUntilReady(ctx, ns, p)
|
||||
require.NoError(t, err)
|
||||
|
||||
pm := framework.MakeBasicPodMonitor(name)
|
||||
pm, err = framework.MonClientV1.PodMonitors(ns).Create(ctx, pm, v1.CreateOptions{})
|
||||
require.NoError(t, err)
|
||||
|
||||
pm, err = framework.WaitForPodMonitorCondition(ctx, pm, p, monitoringv1.PrometheusName, monitoringv1.Accepted, monitoringv1.ConditionTrue, 1*time.Minute)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Update the PodMonitor's labels, Prometheus doesn't select the resource anymore.
|
||||
pm.Labels = map[string]string{}
|
||||
pm, err = framework.MonClientV1.PodMonitors(ns).Update(ctx, pm, v1.UpdateOptions{})
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = framework.WaitForPodMonitorWorkloadBindingCleanup(ctx, pm, p, monitoringv1.PrometheusName, 1*time.Minute)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
// testRmPodMonitorBindingDuringWorkloadDelete validates that the operator removes the reference to the Prometheus resource from PodMonitor's status when workload is deleted.
|
||||
func testRmPodMonitorBindingDuringWorkloadDelete(t *testing.T) {
|
||||
t.Parallel()
|
||||
ctx := context.Background()
|
||||
testCtx := framework.NewTestCtx(t)
|
||||
defer testCtx.Cleanup(t)
|
||||
|
||||
ns := framework.CreateNamespace(ctx, t, testCtx)
|
||||
framework.SetupPrometheusRBAC(ctx, t, testCtx, ns)
|
||||
_, err := framework.CreateOrUpdatePrometheusOperatorWithOpts(
|
||||
ctx, testFramework.PrometheusOperatorOpts{
|
||||
Namespace: ns,
|
||||
AllowedNamespaces: []string{ns},
|
||||
EnabledFeatureGates: []operator.FeatureGateName{operator.StatusForConfigurationResourcesFeature},
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
name := "workload-del-pmon-test"
|
||||
p := framework.MakeBasicPrometheus(ns, name, name, 1)
|
||||
|
||||
_, err = framework.CreatePrometheusAndWaitUntilReady(ctx, ns, p)
|
||||
require.NoError(t, err, "failed to create Prometheus")
|
||||
pmon := framework.MakeBasicPodMonitor(name)
|
||||
|
||||
pm, err := framework.MonClientV1.PodMonitors(ns).Create(ctx, pmon, v1.CreateOptions{})
|
||||
require.NoError(t, err)
|
||||
|
||||
pm, err = framework.WaitForPodMonitorCondition(ctx, pm, p, monitoringv1.PrometheusName, monitoringv1.Accepted, monitoringv1.ConditionTrue, 1*time.Minute)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = framework.DeletePrometheusAndWaitUntilGone(ctx, ns, name)
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = framework.WaitForPodMonitorWorkloadBindingCleanup(ctx, pm, p, monitoringv1.PrometheusName, 1*time.Minute)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
// testFinalizerForPromAgentWhenStatusForConfigResEnabled tests the adding/removing of status-cleanup finalizer for PrometheusAgent when StatusForConfigurationResourcesFeature is enabled.
|
||||
func testFinalizerForPromAgentWhenStatusForConfigResEnabled(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
@@ -47,3 +47,25 @@ func (f *Framework) WaitForPodMonitorCondition(ctx context.Context, pm *monitori
|
||||
}
|
||||
return current, nil
|
||||
}
|
||||
|
||||
func (f *Framework) WaitForPodMonitorWorkloadBindingCleanup(ctx context.Context, pm *monitoringv1.PodMonitor, workload metav1.Object, resource string, timeout time.Duration) (*monitoringv1.PodMonitor, error) {
|
||||
var current *monitoringv1.PodMonitor
|
||||
|
||||
if err := f.WaitForConfigResWorkloadBindingCleanup(
|
||||
ctx,
|
||||
func(ctx context.Context) ([]monitoringv1.WorkloadBinding, error) {
|
||||
var err error
|
||||
current, err = f.MonClientV1.PodMonitors(pm.Namespace).Get(ctx, pm.Name, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return current.Status.Bindings, nil
|
||||
},
|
||||
workload,
|
||||
resource,
|
||||
timeout,
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("podMonitor status %v/%v failed to reach expected condition: %w", pm.Namespace, pm.Name, err)
|
||||
}
|
||||
return current, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user