From 5ccca6a9c3328bda5ff170f32720ce8bc8d416d6 Mon Sep 17 00:00:00 2001 From: Sanchit2662 Date: Sat, 17 Jan 2026 21:58:10 +0530 Subject: [PATCH] fix: use update-in-place for PrometheusRule ConfigMaps Signed-off-by: Sanchit2662 --- pkg/k8sutil/k8sutil.go | 24 ++++++++++++++++++++++++ pkg/operator/rules.go | 40 +++++++++++++++++++++++----------------- 2 files changed, 47 insertions(+), 17 deletions(-) diff --git a/pkg/k8sutil/k8sutil.go b/pkg/k8sutil/k8sutil.go index b03f02ab5..9cf4f729e 100644 --- a/pkg/k8sutil/k8sutil.go +++ b/pkg/k8sutil/k8sutil.go @@ -409,6 +409,30 @@ func CreateOrUpdateSecret(ctx context.Context, secretClient clientv1.SecretInter }) } +// CreateOrUpdateConfigMap merges metadata of existing ConfigMap with new one and updates it. +func CreateOrUpdateConfigMap(ctx context.Context, cmClient clientv1.ConfigMapInterface, desired *v1.ConfigMap) error { + // As stated in the RetryOnConflict's documentation, the returned error shouldn't be wrapped. + return retry.RetryOnConflict(retry.DefaultRetry, func() error { + existingCM, err := cmClient.Get(ctx, desired.Name, metav1.GetOptions{}) + if err != nil { + if !apierrors.IsNotFound(err) { + return err + } + + _, err = cmClient.Create(ctx, desired, metav1.CreateOptions{}) + return err + } + + mutated := existingCM.DeepCopyObject().(*v1.ConfigMap) + mergeMetadata(&desired.ObjectMeta, mutated.ObjectMeta) + if apiequality.Semantic.DeepEqual(existingCM, desired) { + return nil + } + _, err = cmClient.Update(ctx, desired, metav1.UpdateOptions{}) + return err + }) +} + // IsAPIGroupVersionResourceSupported checks if given groupVersion and resource is supported by the cluster. func IsAPIGroupVersionResourceSupported(discoveryCli discovery.DiscoveryInterface, groupVersion schema.GroupVersion, resource string) (bool, error) { apiResourceList, err := discoveryCli.ServerResourcesForGroupVersion(groupVersion.String()) diff --git a/pkg/operator/rules.go b/pkg/operator/rules.go index c8dec5550..11ab2989d 100644 --- a/pkg/operator/rules.go +++ b/pkg/operator/rules.go @@ -388,26 +388,32 @@ func (prs *PrometheusRuleSyncer) Sync(ctx context.Context, rules map[string]stri return nil, fmt.Errorf("failed to generate ConfigMaps for PrometheusRule: %w", err) } - // Delete and recreate the configmap(s). It's not very efficient but proved - // to be robust enough so far. - if len(current) > 0 { - prs.logger.Debug("deleting ConfigMaps for PrometheusRule") - // In theory we could use DeleteCollection but the method isn't - // supported by the fake client. - // See https://github.com/kubernetes/kubernetes/issues/105357 - for _, cm := range current { - err := prs.cmClient.Delete(ctx, cm.Name, metav1.DeleteOptions{}) - if err != nil { - return nil, fmt.Errorf("failed to delete ConfigMap %q: %w", cm.Name, err) - } + // Build a set of desired ConfigMap names for quick lookup. + desiredNames := make(map[string]struct{}, len(configMaps)) + for _, cm := range configMaps { + desiredNames[cm.Name] = struct{}{} + } + + // Create or update the desired ConfigMaps first to ensure rules are never + // missing. This avoids the race condition where deleting ConfigMaps before + // creating new ones could cause the config-reloader to reload Prometheus + // with missing rules. + prs.logger.Debug("creating/updating ConfigMaps for PrometheusRule") + for i := range configMaps { + if err := k8sutil.CreateOrUpdateConfigMap(ctx, prs.cmClient, &configMaps[i]); err != nil { + return nil, fmt.Errorf("failed to create or update ConfigMap %q: %w", configMaps[i].Name, err) } } - prs.logger.Debug("creating ConfigMaps for PrometheusRule") - for _, cm := range configMaps { - _, err = prs.cmClient.Create(ctx, &cm, metav1.CreateOptions{}) - if err != nil { - return nil, fmt.Errorf("failed to create ConfigMap %q: %w", cm.Name, err) + // Delete ConfigMaps that are no longer needed (excess shards from previous + // reconciliations). This happens after creates/updates to ensure rules + // remain available throughout the sync process. + for _, cm := range current { + if _, exists := desiredNames[cm.Name]; !exists { + prs.logger.Debug("deleting excess ConfigMap for PrometheusRule", "configmap", cm.Name) + if err := prs.cmClient.Delete(ctx, cm.Name, metav1.DeleteOptions{}); err != nil { + return nil, fmt.Errorf("failed to delete excess ConfigMap %q: %w", cm.Name, err) + } } }