diff --git a/Documentation/operator.md b/Documentation/operator.md index bb2271d19..cb767980c 100644 --- a/Documentation/operator.md +++ b/Documentation/operator.md @@ -21,8 +21,8 @@ Usage of ./operator: Alertmanager default base image (path without tag/version) (default "quay.io/prometheus/alertmanager") -alertmanager-instance-namespaces value Namespaces where Alertmanager custom resources and corresponding StatefulSets are watched/created. If set this takes precedence over --namespaces or --deny-namespaces for Alertmanager custom resources. - -alertmanager-instance-selector string - Label selector to filter AlertManager Custom Resources to watch. + -alertmanager-instance-selector value + Label selector to filter Alertmanager Custom Resources to watch. -annotations value Annotations to be add to all resources created by the operator -apiserver string @@ -49,7 +49,7 @@ Usage of ./operator: Enable liveness and readiness for the config-reloader container. Default: false -key-file string - NOT RECOMMENDED FOR PRODUCTION - Path to private TLS certificate file. - -kubelet-selector string + -kubelet-selector value Label selector to filter nodes. -kubelet-service string Service/Endpoints object to write kubelets into in format "namespace/name" @@ -69,9 +69,9 @@ Usage of ./operator: Prometheus default base image (path without tag/version) (default "quay.io/prometheus/prometheus") -prometheus-instance-namespaces value Namespaces where Prometheus and PrometheusAgent custom resources and corresponding Secrets, Configmaps and StatefulSets are watched/created. If set this takes precedence over --namespaces or --deny-namespaces for Prometheus custom resources. - -prometheus-instance-selector string + -prometheus-instance-selector value Label selector to filter Prometheus and PrometheusAgent Custom Resources to watch. - -secret-field-selector string + -secret-field-selector value Field selector to filter Secrets to watch -short-version Print just the version number. @@ -79,26 +79,26 @@ Usage of ./operator: Thanos default base image (path without tag/version) (default "quay.io/thanos/thanos") -thanos-ruler-instance-namespaces value Namespaces where ThanosRuler custom resources and corresponding StatefulSets are watched/created. If set this takes precedence over --namespaces or --deny-namespaces for ThanosRuler custom resources. - -thanos-ruler-instance-selector string + -thanos-ruler-instance-selector value Label selector to filter ThanosRuler Custom Resources to watch. -tls-insecure - NOT RECOMMENDED FOR PRODUCTION - Don't verify API server's CA certificate. -version Prints current version. -web.cert-file string - Cert file to be used for operator web server endpoints. (default "/etc/tls/private/tls.crt") + Certficate file to be used for the web server. (default "/etc/tls/private/tls.crt") -web.client-ca-file string - Client CA certificate file to be used for operator web server endpoints. (default "/etc/tls/private/tls-ca.crt") + Client CA certificate file to be used for the web server. (default "/etc/tls/private/tls-ca.crt") -web.enable-http2 Enable HTTP2 connections. -web.enable-tls - Activate prometheus operator web server TLS. This is useful for example when using the rule validation webhook. + Enable TLS for the web server. -web.key-file string - Private key matching the cert file to be used for operator web server endpoints. (default "/etc/tls/private/tls.key") + Private key matching the cert file to be used for the web server. (default "/etc/tls/private/tls.key") -web.listen-address string Address on which to expose metrics and web interface. (default ":8080") - -web.tls-cipher-suites string - Comma-separated list of cipher suites for the server. Values are from tls package constants (https://golang.org/pkg/crypto/tls/#pkg-constants).If omitted, the default Go cipher suites will be used.Note that TLS 1.3 ciphersuites are not configurable. + -web.tls-cipher-suites value + Comma-separated list of cipher suites for the server. Values are from tls package constants (https://golang.org/pkg/crypto/tls/#pkg-constants).If omitted, the default Go cipher suites will be used. Note that TLS 1.3 ciphersuites are not configurable. -web.tls-min-version string Minimum TLS version supported. Value must match version names from https://golang.org/pkg/crypto/tls/#pkg-constants. (default "VersionTLS13") -web.tls-reload-interval duration diff --git a/cmd/admission-webhook/main.go b/cmd/admission-webhook/main.go index d8e222105..63ef69d1b 100644 --- a/cmd/admission-webhook/main.go +++ b/cmd/admission-webhook/main.go @@ -16,19 +16,13 @@ package main import ( "context" - "crypto/tls" "flag" - "fmt" stdlog "log" - "net" "net/http" "os" "os/signal" - "strings" "syscall" - "time" - rbacproxytls "github.com/brancz/kube-rbac-proxy/pkg/tls" "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" @@ -40,54 +34,28 @@ import ( logging "github.com/prometheus-operator/prometheus-operator/internal/log" "github.com/prometheus-operator/prometheus-operator/pkg/admission" "github.com/prometheus-operator/prometheus-operator/pkg/server" -) - -const ( - defaultTLSDir = "/etc/tls/private" - defaultCrtFile = defaultTLSDir + "/tls.crt" - defaultKeyFile = defaultTLSDir + "/tls.key" - defaultCaCrt = defaultTLSDir + "/tls-ca.crt" -) - -var ( - cfg = config{} - flagset = flag.CommandLine - - enableHTTP2 bool - serverTLS bool - rawTLSCipherSuites string + "github.com/prometheus-operator/prometheus-operator/pkg/versionutil" ) func main() { - flagset.StringVar(&cfg.ListenAddress, "web.listen-address", ":8443", "Address on which the admission webhook service listens") - // Mitigate CVE-2023-44487 by disabling HTTP2 by default until the Go - // standard library and golang.org/x/net are fully fixed. - // Right now, it is possible for authenticated and unauthenticated users to - // hold open HTTP2 connections and consume huge amounts of memory. - // See: - // * https://github.com/kubernetes/kubernetes/pull/121120 - // * https://github.com/kubernetes/kubernetes/issues/121197 - // * https://github.com/golang/go/issues/63417#issuecomment-1758858612 - flagset.BoolVar(&enableHTTP2, "web.enable-http2", false, "Enable HTTP2 connections.") - flagset.BoolVar(&serverTLS, "web.enable-tls", true, "Enable TLS web server") + var ( + serverConfig server.Config = server.DefaultConfig(":8443", true) + flagset = flag.CommandLine + logConfig logging.Config + ) - flagset.StringVar(&cfg.ServerTLSConfig.CertFile, "web.cert-file", defaultCrtFile, "Certificate file to be used for the web server.") - flagset.StringVar(&cfg.ServerTLSConfig.KeyFile, "web.key-file", defaultKeyFile, "Private key matching the certificate file to be used for the web server") - flagset.StringVar(&cfg.ServerTLSConfig.ClientCAFile, "web.client-ca-file", defaultCaCrt, "Client CA certificate file to be used for web server.") - - flagset.DurationVar(&cfg.ServerTLSConfig.ReloadInterval, "web.tls-reload-interval", time.Minute, "The interval at which to watch for TLS certificate changes (default 1m).") - flagset.StringVar(&cfg.ServerTLSConfig.MinVersion, "web.tls-min-version", "VersionTLS13", fmt.Sprintf("Minimum TLS version supported. One of %s", validTLSVersions())) - flagset.StringVar(&rawTLSCipherSuites, "web.tls-cipher-suites", "", "Comma-separated list of cipher suites for the server."+ - " Values are from tls package constants (https://golang.org/pkg/crypto/tls/#pkg-constants)."+ - "If omitted, the default Go cipher suites will be used."+ - "Note that TLS 1.3 ciphersuites are not configurable.") - - flagset.StringVar(&cfg.LogLevel, "log-level", logging.LevelInfo, fmt.Sprintf("Log level to use. Possible values: %s", strings.Join(logging.AvailableLogLevels, ", "))) - flagset.StringVar(&cfg.LogFormat, "log-format", logging.FormatLogFmt, fmt.Sprintf("Log format to use. Possible values: %s", strings.Join(logging.AvailableLogFormats, ", "))) + server.RegisterFlags(flagset, &serverConfig) + versionutil.RegisterFlags(flagset) + logging.RegisterFlags(flagset, &logConfig) _ = flagset.Parse(os.Args[1:]) - logger, err := logging.NewLogger(cfg.LogLevel, cfg.LogFormat) + if versionutil.ShouldPrintVersion() { + versionutil.Print(os.Stdout, "admission-webhook") + return + } + + logger, err := logging.NewLogger(logConfig) if err != nil { stdlog.Fatal(err) } @@ -96,66 +64,6 @@ func main() { defer cancel() wg, ctx := errgroup.WithContext(ctx) - listener, err := net.Listen("tcp", cfg.ListenAddress) - if err != nil { - level.Error(logger).Log("msg", "failed to start required HTTP listener", "err", err) - os.Exit(1) - } - - tlsConf, err := loadTLSConfigFromFlags(ctx, logger, wg) - if err != nil { - level.Error(logger).Log("msg", "failed to build TLS config", "err", err) - os.Exit(1) - } - - server := newSrv(logger, tlsConf) - wg.Go(func() error { - return server.run(listener) - }) - - term := make(chan os.Signal, 1) - signal.Notify(term, os.Interrupt, syscall.SIGTERM) - - select { - case sig := <-term: - level.Info(logger).Log("msg", "Received signal, exiting gracefully...", "signal", sig.String()) - case <-ctx.Done(): - } - - if err := server.shutdown(ctx); err != nil { - level.Warn(logger).Log("msg", "Server shutdown error", "err", err) - } - - cancel() - if err := wg.Wait(); err != nil { - level.Warn(logger).Log("msg", "Unhandled error received. Exiting...", "err", err) - os.Exit(1) - } -} - -func (s *srv) run(listener net.Listener) error { - log := log.With(s.logger, "address", listener.Addr().String()) - - if s.s.TLSConfig != nil { - level.Info(log).Log("msg", "Starting TLS enabled server", "http2", enableHTTP2) - if err := s.s.ServeTLS(listener, "", ""); err != http.ErrServerClosed { - return err - } - return nil - } - - level.Info(log).Log("msg", "Starting insecure server") - if err := s.s.Serve(listener); err != http.ErrServerClosed { - return err - } - return nil -} - -func (s *srv) shutdown(ctx context.Context) error { - return s.s.Shutdown(ctx) -} - -func newSrv(logger log.Logger, tlsConf *tls.Config) *srv { mux := http.NewServeMux() admit := admission.New(log.With(logger, "component", "admissionwebhook")) admit.Register(mux) @@ -173,100 +81,32 @@ func newSrv(logger log.Logger, tlsConf *tls.Config) *srv { w.Write([]byte(`{"status":"up"}`)) }) - httpServer := http.Server{ - Handler: mux, - TLSConfig: tlsConf, - ReadHeaderTimeout: 30 * time.Second, - ReadTimeout: 30 * time.Second, - // use flags on standard logger to align with base logger and get consistent parsed fields form adapter: - // use shortfile flag to get proper 'caller' field (avoid being wrongly parsed/extracted from message) - // and no datetime related flag to keep 'ts' field from base logger (with controlled format) - ErrorLog: stdlog.New(log.NewStdlibAdapter(logger), "", stdlog.Lshortfile), - } - if !enableHTTP2 { - httpServer.TLSNextProto = make(map[string]func(*http.Server, *tls.Conn, http.Handler)) + srv, err := server.NewServer(logger, &serverConfig, mux) + if err != nil { + level.Error(logger).Log("msg", "failed to create web server", "err", err) + os.Exit(1) } - return &srv{ - logger: logger, - s: &httpServer, + wg.Go(func() error { + return srv.Serve(ctx) + }) + + term := make(chan os.Signal, 1) + signal.Notify(term, os.Interrupt, syscall.SIGTERM) + + select { + case sig := <-term: + level.Info(logger).Log("msg", "Received signal, exiting gracefully...", "signal", sig.String()) + case <-ctx.Done(): + } + + if err := srv.Shutdown(ctx); err != nil { + level.Warn(logger).Log("msg", "Server shutdown error", "err", err) + } + + cancel() + if err := wg.Wait(); err != nil { + level.Warn(logger).Log("msg", "Unhandled error received. Exiting...", "err", err) + os.Exit(1) } } - -// loadTLSConfigFromFlags creates a tls.Config if configured and starts a watch on the dir to reload certs -func loadTLSConfigFromFlags(ctx context.Context, logger log.Logger, wg *errgroup.Group) (*tls.Config, error) { - var ( - tlsConfig *tls.Config - err error - ) - if serverTLS { - if _, ok := allowedTLSVersions[cfg.ServerTLSConfig.MinVersion]; !ok { - return nil, fmt.Errorf("unsupported TLS version %s provided", cfg.ServerTLSConfig.MinVersion) - } - - if rawTLSCipherSuites != "" { - cfg.ServerTLSConfig.CipherSuites = strings.Split(rawTLSCipherSuites, ",") - } - tlsConfig, err = server.NewTLSConfig( - logger, - cfg.ServerTLSConfig.CertFile, - cfg.ServerTLSConfig.KeyFile, - cfg.ServerTLSConfig.ClientCAFile, - cfg.ServerTLSConfig.MinVersion, - cfg.ServerTLSConfig.CipherSuites, - ) - if tlsConfig == nil || err != nil { - return nil, fmt.Errorf("invalid TLS config: %w", err) - } - } - - if tlsConfig != nil { - r, err := rbacproxytls.NewCertReloader( - cfg.ServerTLSConfig.CertFile, - cfg.ServerTLSConfig.KeyFile, - cfg.ServerTLSConfig.ReloadInterval, - ) - if err != nil { - return nil, fmt.Errorf("failed to initialize certificate reloader: %w", err) - } - - tlsConfig.GetCertificate = r.GetCertificate - wg.Go(func() error { - for { - // r.Watch will wait ReloadInterval, so this is not - // a hot loop - if err := r.Watch(ctx); err != nil { - level.Warn(logger).Log("msg", "error watching certificate reloader", "err", err) - } else { - return nil - } - } - }) - } - return tlsConfig, nil -} - -type config struct { - ListenAddress string - TLSInsecure bool - ServerTLSConfig server.TLSServerConfig - LocalHost string - LogLevel string - LogFormat string -} - -type srv struct { - logger log.Logger - s *http.Server -} - -// any older versions won't allow a secure conn -var allowedTLSVersions = map[string]bool{"VersionTLS13": true, "VersionTLS12": true} - -func validTLSVersions() string { - var out string - for validVersion := range allowedTLSVersions { - out += validVersion + "," - } - return strings.TrimRight(out, ",") -} diff --git a/cmd/operator/main.go b/cmd/operator/main.go index b23e065f9..a115dfd42 100644 --- a/cmd/operator/main.go +++ b/cmd/operator/main.go @@ -16,21 +16,15 @@ package main import ( "context" - "crypto/tls" - "errors" "flag" "fmt" stdlog "log" - "net" "net/http" "net/http/pprof" "os" "os/signal" - "strings" "syscall" - "time" - rbacproxytls "github.com/brancz/kube-rbac-proxy/pkg/tls" "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" @@ -38,10 +32,10 @@ import ( "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/prometheus/common/version" "golang.org/x/sync/errgroup" - v1 "k8s.io/api/core/v1" storagev1 "k8s.io/api/storage/v1" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" logging "github.com/prometheus-operator/prometheus-operator/internal/log" "github.com/prometheus-operator/prometheus-operator/pkg/admission" @@ -57,45 +51,6 @@ import ( "github.com/prometheus-operator/prometheus-operator/pkg/versionutil" ) -const ( - defaultOperatorTLSDir = "/etc/tls/private" -) - -var ( - ns = namespaces{} - deniedNs = namespaces{} - prometheusNs = namespaces{} - alertmanagerNs = namespaces{} - alertmanagerConfigNs = namespaces{} - thanosRulerNs = namespaces{} -) - -type namespaces map[string]struct{} - -// Set implements the flag.Value interface. -func (n namespaces) Set(value string) error { - if n == nil { - return errors.New("expected n of type namespaces to be initialized") - } - for _, ns := range strings.Split(value, ",") { - n[ns] = struct{}{} - } - return nil -} - -// String implements the flag.Value interface. -func (n namespaces) String() string { - return strings.Join(n.asSlice(), ",") -} - -func (n namespaces) asSlice() []string { - var ns = make([]string, 0, len(n)) - for k := range n { - ns = append(ns, k) - } - return ns -} - // checkPrerequisites verifies that the CRD is installed in the cluster and // that the operator has enough permissions to manage the resource. func checkPrerequisites( @@ -132,26 +87,6 @@ func checkPrerequisites( return true, nil } -func serve(srv *http.Server, listener net.Listener, logger log.Logger) func() error { - return func() error { - level.Info(logger).Log("msg", "Starting insecure server on "+listener.Addr().String()) - if err := srv.Serve(listener); err != http.ErrServerClosed { - return err - } - return nil - } -} - -func serveTLS(srv *http.Server, listener net.Listener, logger log.Logger) func() error { - return func() error { - level.Info(logger).Log("msg", "Starting secure server on "+listener.Addr().String(), "http2", enableHTTP2) - if err := srv.ServeTLS(listener, "", ""); err != http.ErrServerClosed { - return err - } - return nil - } -} - const ( defaultReloaderCPU = "10m" defaultReloaderMemory = "50Mi" @@ -160,95 +95,79 @@ const ( var ( cfg = operator.DefaultConfig(defaultReloaderCPU, defaultReloaderMemory) - rawTLSCipherSuites string - enableHTTP2 bool - serverTLS bool + logConfig logging.Config - flagset = flag.CommandLine + impersonateUser string + apiServer string + tlsClientConfig rest.TLSClientConfig + + serverConfig = server.DefaultConfig(":8080", false) ) -func init() { - flagset.StringVar(&cfg.ListenAddress, "web.listen-address", ":8080", "Address on which to expose metrics and web interface.") - // Mitigate CVE-2023-44487 by disabling HTTP2 by default until the Go - // standard library and golang.org/x/net are fully fixed. - // Right now, it is possible for authenticated and unauthenticated users to - // hold open HTTP2 connections and consume huge amounts of memory. - // See: - // * https://github.com/kubernetes/kubernetes/pull/121120 - // * https://github.com/kubernetes/kubernetes/issues/121197 - // * https://github.com/golang/go/issues/63417#issuecomment-1758858612 - flagset.BoolVar(&enableHTTP2, "web.enable-http2", false, "Enable HTTP2 connections.") - flagset.BoolVar(&serverTLS, "web.enable-tls", false, "Activate prometheus operator web server TLS. "+ - " This is useful for example when using the rule validation webhook.") - flagset.StringVar(&cfg.ServerTLSConfig.CertFile, "web.cert-file", defaultOperatorTLSDir+"/tls.crt", "Cert file to be used for operator web server endpoints.") - flagset.StringVar(&cfg.ServerTLSConfig.KeyFile, "web.key-file", defaultOperatorTLSDir+"/tls.key", "Private key matching the cert file to be used for operator web server endpoints.") - flagset.StringVar(&cfg.ServerTLSConfig.ClientCAFile, "web.client-ca-file", defaultOperatorTLSDir+"/tls-ca.crt", "Client CA certificate file to be used for operator web server endpoints.") - flagset.DurationVar(&cfg.ServerTLSConfig.ReloadInterval, "web.tls-reload-interval", time.Minute, "The interval at which to watch for TLS certificate changes, by default set to 1 minute. (default 1m0s).") - flagset.StringVar(&cfg.ServerTLSConfig.MinVersion, "web.tls-min-version", "VersionTLS13", - "Minimum TLS version supported. Value must match version names from https://golang.org/pkg/crypto/tls/#pkg-constants.") - flagset.StringVar(&rawTLSCipherSuites, "web.tls-cipher-suites", "", "Comma-separated list of cipher suites for the server."+ - " Values are from tls package constants (https://golang.org/pkg/crypto/tls/#pkg-constants)."+ - "If omitted, the default Go cipher suites will be used."+ - "Note that TLS 1.3 ciphersuites are not configurable.") +func parseFlags(fs *flag.FlagSet) { + // Web server settings. + server.RegisterFlags(fs, &serverConfig) - flagset.StringVar(&cfg.ImpersonateUser, "as", "", "Username to impersonate. User could be a regular user or a service account in a namespace.") - flagset.StringVar(&cfg.Host, "apiserver", "", "API Server addr, e.g. ' - NOT RECOMMENDED FOR PRODUCTION - http://127.0.0.1:8080'. Omit parameter to run in on-cluster mode and utilize the service account token.") - flagset.StringVar(&cfg.TLSConfig.CertFile, "cert-file", "", " - NOT RECOMMENDED FOR PRODUCTION - Path to public TLS certificate file.") - flagset.StringVar(&cfg.TLSConfig.KeyFile, "key-file", "", "- NOT RECOMMENDED FOR PRODUCTION - Path to private TLS certificate file.") - flagset.StringVar(&cfg.TLSConfig.CAFile, "ca-file", "", "- NOT RECOMMENDED FOR PRODUCTION - Path to TLS CA file.") + // Kubernetes client-go settings. + fs.StringVar(&impersonateUser, "as", "", "Username to impersonate. User could be a regular user or a service account in a namespace.") + fs.StringVar(&apiServer, "apiserver", "", "API Server addr, e.g. ' - NOT RECOMMENDED FOR PRODUCTION - http://127.0.0.1:8080'. Omit parameter to run in on-cluster mode and utilize the service account token.") + fs.StringVar(&tlsClientConfig.CertFile, "cert-file", "", " - NOT RECOMMENDED FOR PRODUCTION - Path to public TLS certificate file.") + fs.StringVar(&tlsClientConfig.KeyFile, "key-file", "", "- NOT RECOMMENDED FOR PRODUCTION - Path to private TLS certificate file.") + fs.StringVar(&tlsClientConfig.CAFile, "ca-file", "", "- NOT RECOMMENDED FOR PRODUCTION - Path to TLS CA file.") + fs.BoolVar(&tlsClientConfig.Insecure, "tls-insecure", false, "- NOT RECOMMENDED FOR PRODUCTION - Don't verify API server's CA certificate.") - flagset.StringVar(&cfg.KubeletObject, "kubelet-service", "", "Service/Endpoints object to write kubelets into in format \"namespace/name\"") - flagset.StringVar(&cfg.KubeletSelector, "kubelet-selector", "", "Label selector to filter nodes.") - flagset.BoolVar(&cfg.TLSInsecure, "tls-insecure", false, "- NOT RECOMMENDED FOR PRODUCTION - Don't verify API server's CA certificate.") + fs.StringVar(&cfg.KubeletObject, "kubelet-service", "", "Service/Endpoints object to write kubelets into in format \"namespace/name\"") + fs.Var(&cfg.KubeletSelector, "kubelet-selector", "Label selector to filter nodes.") // The Prometheus config reloader image is released along with the // Prometheus Operator image, tagged with the same semver version. Default to // the Prometheus Operator version if no Prometheus config reloader image is // specified. - flagset.StringVar(&cfg.ReloaderConfig.Image, "prometheus-config-reloader", operator.DefaultPrometheusConfigReloaderImage, "Prometheus config reloader image") - flagset.Var(&cfg.ReloaderConfig.CPURequests, "config-reloader-cpu-request", "Config Reloader CPU requests. Value \"0\" disables it and causes no request to be configured.") - flagset.Var(&cfg.ReloaderConfig.CPULimits, "config-reloader-cpu-limit", "Config Reloader CPU limits. Value \"0\" disables it and causes no limit to be configured.") - flagset.Var(&cfg.ReloaderConfig.MemoryRequests, "config-reloader-memory-request", "Config Reloader memory requests. Value \"0\" disables it and causes no request to be configured.") - flagset.Var(&cfg.ReloaderConfig.MemoryLimits, "config-reloader-memory-limit", "Config Reloader memory limits. Value \"0\" disables it and causes no limit to be configured.") - flagset.BoolVar(&cfg.ReloaderConfig.EnableProbes, "enable-config-reloader-probes", false, "Enable liveness and readiness for the config-reloader container. Default: false") + fs.StringVar(&cfg.ReloaderConfig.Image, "prometheus-config-reloader", operator.DefaultPrometheusConfigReloaderImage, "Prometheus config reloader image") + fs.Var(&cfg.ReloaderConfig.CPURequests, "config-reloader-cpu-request", "Config Reloader CPU requests. Value \"0\" disables it and causes no request to be configured.") + fs.Var(&cfg.ReloaderConfig.CPULimits, "config-reloader-cpu-limit", "Config Reloader CPU limits. Value \"0\" disables it and causes no limit to be configured.") + fs.Var(&cfg.ReloaderConfig.MemoryRequests, "config-reloader-memory-request", "Config Reloader memory requests. Value \"0\" disables it and causes no request to be configured.") + fs.Var(&cfg.ReloaderConfig.MemoryLimits, "config-reloader-memory-limit", "Config Reloader memory limits. Value \"0\" disables it and causes no limit to be configured.") + fs.BoolVar(&cfg.ReloaderConfig.EnableProbes, "enable-config-reloader-probes", false, "Enable liveness and readiness for the config-reloader container. Default: false") - flagset.StringVar(&cfg.AlertmanagerDefaultBaseImage, "alertmanager-default-base-image", operator.DefaultAlertmanagerBaseImage, "Alertmanager default base image (path without tag/version)") - flagset.StringVar(&cfg.PrometheusDefaultBaseImage, "prometheus-default-base-image", operator.DefaultPrometheusBaseImage, "Prometheus default base image (path without tag/version)") - flagset.StringVar(&cfg.ThanosDefaultBaseImage, "thanos-default-base-image", operator.DefaultThanosBaseImage, "Thanos default base image (path without tag/version)") + fs.StringVar(&cfg.AlertmanagerDefaultBaseImage, "alertmanager-default-base-image", operator.DefaultAlertmanagerBaseImage, "Alertmanager default base image (path without tag/version)") + fs.StringVar(&cfg.PrometheusDefaultBaseImage, "prometheus-default-base-image", operator.DefaultPrometheusBaseImage, "Prometheus default base image (path without tag/version)") + fs.StringVar(&cfg.ThanosDefaultBaseImage, "thanos-default-base-image", operator.DefaultThanosBaseImage, "Thanos default base image (path without tag/version)") - flagset.Var(ns, "namespaces", "Namespaces to scope the interaction of the Prometheus Operator and the apiserver (allow list). This is mutually exclusive with --deny-namespaces.") - flagset.Var(deniedNs, "deny-namespaces", "Namespaces not to scope the interaction of the Prometheus Operator (deny list). This is mutually exclusive with --namespaces.") - flagset.Var(prometheusNs, "prometheus-instance-namespaces", "Namespaces where Prometheus and PrometheusAgent custom resources and corresponding Secrets, Configmaps and StatefulSets are watched/created. If set this takes precedence over --namespaces or --deny-namespaces for Prometheus custom resources.") - flagset.Var(alertmanagerNs, "alertmanager-instance-namespaces", "Namespaces where Alertmanager custom resources and corresponding StatefulSets are watched/created. If set this takes precedence over --namespaces or --deny-namespaces for Alertmanager custom resources.") - flagset.Var(alertmanagerConfigNs, "alertmanager-config-namespaces", "Namespaces where AlertmanagerConfig custom resources and corresponding Secrets are watched/created. If set this takes precedence over --namespaces or --deny-namespaces for AlertmanagerConfig custom resources.") - flagset.Var(thanosRulerNs, "thanos-ruler-instance-namespaces", "Namespaces where ThanosRuler custom resources and corresponding StatefulSets are watched/created. If set this takes precedence over --namespaces or --deny-namespaces for ThanosRuler custom resources.") + fs.Var(cfg.Namespaces.AllowList, "namespaces", "Namespaces to scope the interaction of the Prometheus Operator and the apiserver (allow list). This is mutually exclusive with --deny-namespaces.") + fs.Var(cfg.Namespaces.DenyList, "deny-namespaces", "Namespaces not to scope the interaction of the Prometheus Operator (deny list). This is mutually exclusive with --namespaces.") + fs.Var(cfg.Namespaces.PrometheusAllowList, "prometheus-instance-namespaces", "Namespaces where Prometheus and PrometheusAgent custom resources and corresponding Secrets, Configmaps and StatefulSets are watched/created. If set this takes precedence over --namespaces or --deny-namespaces for Prometheus custom resources.") + fs.Var(cfg.Namespaces.AlertmanagerAllowList, "alertmanager-instance-namespaces", "Namespaces where Alertmanager custom resources and corresponding StatefulSets are watched/created. If set this takes precedence over --namespaces or --deny-namespaces for Alertmanager custom resources.") + fs.Var(cfg.Namespaces.AlertmanagerConfigAllowList, "alertmanager-config-namespaces", "Namespaces where AlertmanagerConfig custom resources and corresponding Secrets are watched/created. If set this takes precedence over --namespaces or --deny-namespaces for AlertmanagerConfig custom resources.") + fs.Var(cfg.Namespaces.ThanosRulerAllowList, "thanos-ruler-instance-namespaces", "Namespaces where ThanosRuler custom resources and corresponding StatefulSets are watched/created. If set this takes precedence over --namespaces or --deny-namespaces for ThanosRuler custom resources.") - flagset.Var(&cfg.Annotations, "annotations", "Annotations to be add to all resources created by the operator") - flagset.Var(&cfg.Labels, "labels", "Labels to be add to all resources created by the operator") + fs.Var(&cfg.Annotations, "annotations", "Annotations to be add to all resources created by the operator") + fs.Var(&cfg.Labels, "labels", "Labels to be add to all resources created by the operator") - flagset.StringVar(&cfg.LocalHost, "localhost", "localhost", "EXPERIMENTAL (could be removed in future releases) - Host used to communicate between local services on a pod. Fixes issues where localhost resolves incorrectly.") - flagset.StringVar(&cfg.ClusterDomain, "cluster-domain", "", "The domain of the cluster. This is used to generate service FQDNs. If this is not specified, DNS search domain expansion is used instead.") + fs.StringVar(&cfg.LocalHost, "localhost", "localhost", "EXPERIMENTAL (could be removed in future releases) - Host used to communicate between local services on a pod. Fixes issues where localhost resolves incorrectly.") + fs.StringVar(&cfg.ClusterDomain, "cluster-domain", "", "The domain of the cluster. This is used to generate service FQDNs. If this is not specified, DNS search domain expansion is used instead.") - flagset.StringVar(&cfg.LogLevel, "log-level", "info", fmt.Sprintf("Log level to use. Possible values: %s", strings.Join(logging.AvailableLogLevels, ", "))) - flagset.StringVar(&cfg.LogFormat, "log-format", "logfmt", fmt.Sprintf("Log format to use. Possible values: %s", strings.Join(logging.AvailableLogFormats, ", "))) + fs.Var(&cfg.PromSelector, "prometheus-instance-selector", "Label selector to filter Prometheus and PrometheusAgent Custom Resources to watch.") + fs.Var(&cfg.AlertmanagerSelector, "alertmanager-instance-selector", "Label selector to filter Alertmanager Custom Resources to watch.") + fs.Var(&cfg.ThanosRulerSelector, "thanos-ruler-instance-selector", "Label selector to filter ThanosRuler Custom Resources to watch.") + fs.Var(&cfg.SecretListWatchSelector, "secret-field-selector", "Field selector to filter Secrets to watch") - flagset.StringVar(&cfg.PromSelector, "prometheus-instance-selector", "", "Label selector to filter Prometheus and PrometheusAgent Custom Resources to watch.") - flagset.StringVar(&cfg.AlertManagerSelector, "alertmanager-instance-selector", "", "Label selector to filter AlertManager Custom Resources to watch.") - flagset.StringVar(&cfg.ThanosRulerSelector, "thanos-ruler-instance-selector", "", "Label selector to filter ThanosRuler Custom Resources to watch.") - flagset.StringVar(&cfg.SecretListWatchSelector, "secret-field-selector", "", "Field selector to filter Secrets to watch") + logging.RegisterFlags(fs, &logConfig) + versionutil.RegisterFlags(fs) + + // No need to check for errors because Parse would exit on error. + _ = fs.Parse(os.Args[1:]) } -func run() int { - versionutil.RegisterFlags() - // No need to check for errors because Parse would exit on error. - _ = flagset.Parse(os.Args[1:]) +func run(fs *flag.FlagSet) int { + parseFlags(fs) if versionutil.ShouldPrintVersion() { versionutil.Print(os.Stdout, "prometheus-operator") return 0 } - logger, err := logging.NewLogger(cfg.LogLevel, cfg.LogFormat) + logger, err := logging.NewLogger(logConfig) if err != nil { stdlog.Fatal(err) } @@ -256,41 +175,16 @@ func run() int { level.Info(logger).Log("msg", "Starting Prometheus Operator", "version", version.Info()) level.Info(logger).Log("build_context", version.BuildContext()) - if len(ns) > 0 && len(deniedNs) > 0 { + if len(cfg.Namespaces.AllowList) > 0 && len(cfg.Namespaces.DenyList) > 0 { level.Error(logger).Log( "msg", "--namespaces and --deny-namespaces are mutually exclusive, only one should be provided", - "namespaces", ns, - "deny_namespaces", deniedNs, + "namespaces", cfg.Namespaces.AllowList, + "deny_namespaces", cfg.Namespaces.DenyList, ) return 1 } - - cfg.Namespaces.AllowList = ns - if len(cfg.Namespaces.AllowList) == 0 { - cfg.Namespaces.AllowList[v1.NamespaceAll] = struct{}{} - } - - cfg.Namespaces.DenyList = deniedNs - cfg.Namespaces.PrometheusAllowList = prometheusNs - cfg.Namespaces.AlertmanagerAllowList = alertmanagerNs - cfg.Namespaces.AlertmanagerConfigAllowList = alertmanagerConfigNs - cfg.Namespaces.ThanosRulerAllowList = thanosRulerNs - - if len(cfg.Namespaces.PrometheusAllowList) == 0 { - cfg.Namespaces.PrometheusAllowList = cfg.Namespaces.AllowList - } - - if len(cfg.Namespaces.AlertmanagerAllowList) == 0 { - cfg.Namespaces.AlertmanagerAllowList = cfg.Namespaces.AllowList - } - - if len(cfg.Namespaces.AlertmanagerConfigAllowList) == 0 { - cfg.Namespaces.AlertmanagerConfigAllowList = cfg.Namespaces.AllowList - } - - if len(cfg.Namespaces.ThanosRulerAllowList) == 0 { - cfg.Namespaces.ThanosRulerAllowList = cfg.Namespaces.AllowList - } + cfg.Namespaces.Finalize() + level.Info(logger).Log("msg", "namespaces filtering configuration ", "config", cfg.Namespaces.String()) ctx, cancel := context.WithCancel(context.Background()) wg, ctx := errgroup.WithContext(ctx) @@ -298,7 +192,7 @@ func run() int { k8sutil.MustRegisterClientGoMetrics(r) - restConfig, err := k8sutil.NewClusterConfig(cfg.Host, cfg.TLSInsecure, &cfg.TLSConfig, cfg.ImpersonateUser) + restConfig, err := k8sutil.NewClusterConfig(apiServer, tlsClientConfig, impersonateUser) if err != nil { level.Error(logger).Log("msg", "failed to create Kubernetes client configuration", "err", err) cancel() @@ -346,7 +240,7 @@ func run() int { ctx, logger, kclient, - namespaces(cfg.Namespaces.AllowList).asSlice(), + cfg.Namespaces.AllowList.Slice(), monitoringv1alpha1.SchemeGroupVersion, monitoringv1alpha1.ScrapeConfigName, k8sutil.ResourceAttribute{ @@ -373,7 +267,7 @@ func run() int { ctx, logger, kclient, - namespaces(cfg.Namespaces.PrometheusAllowList).asSlice(), + cfg.Namespaces.PrometheusAllowList.Slice(), monitoringv1alpha1.SchemeGroupVersion, monitoringv1alpha1.PrometheusAgentName, k8sutil.ResourceAttribute{ @@ -419,30 +313,11 @@ func run() int { return 1 } + // Setup the web server. mux := http.NewServeMux() admit := admission.New(log.With(logger, "component", "admissionwebhook")) admit.Register(mux) - l, err := net.Listen("tcp", cfg.ListenAddress) - if err != nil { - level.Error(logger).Log("msg", "listening failed", "address", cfg.ListenAddress, "err", err) - cancel() - return 1 - } - - var tlsConfig *tls.Config - if serverTLS { - if rawTLSCipherSuites != "" { - cfg.ServerTLSConfig.CipherSuites = strings.Split(rawTLSCipherSuites, ",") - } - tlsConfig, err = server.NewTLSConfig(logger, cfg.ServerTLSConfig.CertFile, cfg.ServerTLSConfig.KeyFile, - cfg.ServerTLSConfig.ClientCAFile, cfg.ServerTLSConfig.MinVersion, cfg.ServerTLSConfig.CipherSuites) - if tlsConfig == nil || err != nil { - level.Error(logger).Log("msg", "invalid TLS config", "err", err) - cancel() - return 1 - } - } r.MustRegister( collectors.NewGoCollector(), @@ -460,6 +335,17 @@ func run() int { w.WriteHeader(http.StatusOK) })) + srv, err := server.NewServer(logger, &serverConfig, mux) + if err != nil { + level.Error(logger).Log("msg", "failed to create web server", "err", err) + cancel() + return 1 + } + + // Start the web server. + wg.Go(func() error { return srv.Serve(ctx) }) + + // Start the controllers. wg.Go(func() error { return po.Run(ctx) }) if pao != nil { wg.Go(func() error { return pao.Run(ctx) }) @@ -467,68 +353,22 @@ func run() int { wg.Go(func() error { return ao.Run(ctx) }) wg.Go(func() error { return to.Run(ctx) }) - if tlsConfig != nil { - r, err := rbacproxytls.NewCertReloader( - cfg.ServerTLSConfig.CertFile, - cfg.ServerTLSConfig.KeyFile, - cfg.ServerTLSConfig.ReloadInterval, - ) - if err != nil { - level.Error(logger).Log("msg", "failed to initialize certificate reloader", "err", err) - cancel() - return 1 - } - - tlsConfig.GetCertificate = r.GetCertificate - - wg.Go(func() error { - for { - // r.Watch will wait ReloadInterval, so this is not - // a hot loop - if err := r.Watch(ctx); err != nil { - level.Warn(logger).Log("msg", "error watching certificate reloader", - "err", err) - } else { - return nil - } - } - }) - } - srv := &http.Server{ - Handler: mux, - TLSConfig: tlsConfig, - ReadHeaderTimeout: 30 * time.Second, - ReadTimeout: 30 * time.Second, - // use flags on standard logger to align with base logger and get consistent parsed fields form adapter: - // use shortfile flag to get proper 'caller' field (avoid being wrongly parsed/extracted from message) - // and no datetime related flag to keep 'ts' field from base logger (with controlled format) - ErrorLog: stdlog.New(log.NewStdlibAdapter(logger), "", stdlog.Lshortfile), - } - if !enableHTTP2 { - srv.TLSNextProto = make(map[string]func(*http.Server, *tls.Conn, http.Handler)) - } - if srv.TLSConfig == nil { - wg.Go(serve(srv, l, logger)) - } else { - wg.Go(serveTLS(srv, l, logger)) - } - term := make(chan os.Signal, 1) signal.Notify(term, os.Interrupt, syscall.SIGTERM) select { case <-term: - level.Info(logger).Log("msg", "Received SIGTERM, exiting gracefully...") + level.Info(logger).Log("msg", "received SIGTERM, exiting gracefully...") case <-ctx.Done(): } if err := srv.Shutdown(ctx); err != nil { - level.Warn(logger).Log("msg", "Server shutdown error", "err", err) + level.Warn(logger).Log("msg", "server shutdown error", "err", err) } cancel() if err := wg.Wait(); err != nil { - level.Warn(logger).Log("msg", "Unhandled error received. Exiting...", "err", err) + level.Warn(logger).Log("msg", "unhandled error received. Exiting...", "err", err) return 1 } @@ -536,5 +376,5 @@ func run() int { } func main() { - os.Exit(run()) + os.Exit(run(flag.CommandLine)) } diff --git a/cmd/operator/main_test.go b/cmd/operator/main_test.go deleted file mode 100644 index 5ec946eff..000000000 --- a/cmd/operator/main_test.go +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2020 The prometheus-operator Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "strings" - "testing" -) - -func TestNamespacesType(t *testing.T) { - var ns namespaces - if ns.String() != "" { - t.Errorf("incorrect string value for nil namespaces, want: empty string, got %v", ns.String()) - } - - val := "a,b,c" - err := ns.Set(val) - if err == nil { - t.Error("expected error for nil namespaces") - } - - ns = namespaces{} - ns.Set(val) - if len(ns) != 3 { - t.Errorf("incorrect length of namespaces, want: %v, got: %v", 3, len(ns)) - } - - for _, next := range strings.Split(val, ",") { - if _, ok := ns[next]; !ok { - t.Errorf("namespace not in map, want: %v, not in map: %v", next, map[string]struct{}(ns)) - } - } - -} diff --git a/cmd/po-rule-migration/main.go b/cmd/po-rule-migration/main.go index fe61f75f3..22a6aa830 100644 --- a/cmd/po-rule-migration/main.go +++ b/cmd/po-rule-migration/main.go @@ -34,11 +34,14 @@ import ( ) func main() { - versionutil.RegisterFlags() + fs := flag.CommandLine + versionutil.RegisterFlags(fs) var ruleConfigMapName = flag.String("rule-config-map", "", "path to rule ConfigMap") var ruleCRDSDestination = flag.String("rule-crds-destination", "", "destination new crds should be created in") - flag.Parse() + + // No need to check for errors because Parse would exit on error. + _ = fs.Parse(os.Args[1:]) if versionutil.ShouldPrintVersion() { versionutil.Print(os.Stdout, "po-rule-migration") diff --git a/cmd/prometheus-config-reloader/main.go b/cmd/prometheus-config-reloader/main.go index 9a7c5f01b..322e36b48 100644 --- a/cmd/prometheus-config-reloader/main.go +++ b/cmd/prometheus-config-reloader/main.go @@ -82,15 +82,16 @@ func main() { "[EXPERIMENTAL] Path to configuration file that can enable TLS or authentication. See: https://prometheus.io/docs/prometheus/latest/configuration/https/", ).Default("").String() - logFormat := app.Flag( + var logConfig logging.Config + app.Flag( "log-format", fmt.Sprintf("log format to use. Possible values: %s", strings.Join(logging.AvailableLogFormats, ", "))). - Default(logging.FormatLogFmt).String() + Default(logging.FormatLogFmt).StringVar(&logConfig.Format) - logLevel := app.Flag( + app.Flag( "log-level", fmt.Sprintf("log level to use. Possible values: %s", strings.Join(logging.AvailableLogLevels, ", "))). - Default(logging.LevelInfo).String() + Default(logging.LevelInfo).StringVar(&logConfig.Level) reloadURL := app.Flag("reload-url", "reload URL to trigger Prometheus reload on"). Default("http://127.0.0.1:9090/-/reload").URL() @@ -107,7 +108,7 @@ func main() { os.Exit(0) } - logger, err := logging.NewLogger(*logLevel, *logFormat) + logger, err := logging.NewLogger(logConfig) if err != nil { stdlog.Fatal(err) } diff --git a/internal/log/log.go b/internal/log/log.go index 75595e4f7..af3490615 100644 --- a/internal/log/log.go +++ b/internal/log/log.go @@ -16,6 +16,7 @@ package log import ( + "flag" "fmt" "os" "strings" @@ -39,9 +40,19 @@ const ( FormatJSON = "json" ) +type Config struct { + Level string + Format string +} + +func RegisterFlags(fs *flag.FlagSet, c *Config) { + fs.StringVar(&c.Level, "log-level", "info", fmt.Sprintf("Log level to use. Possible values: %s", strings.Join(AvailableLogLevels, ", "))) + fs.StringVar(&c.Format, "log-format", "logfmt", fmt.Sprintf("Log format to use. Possible values: %s", strings.Join(AvailableLogFormats, ", "))) +} + // NewLogger returns a log.Logger that prints in the provided format at the // provided level with a UTC timestamp and the caller of the log entry. -func NewLogger(level string, format string) (log.Logger, error) { +func NewLogger(c Config) (log.Logger, error) { var ( logger log.Logger lvlOption loglevel.Option @@ -49,7 +60,7 @@ func NewLogger(level string, format string) (log.Logger, error) { // For log levels other than debug, the klog verbosity level is 0. klogv2.ClampLevel(0) - switch strings.ToLower(level) { + switch strings.ToLower(c.Level) { case LevelAll: lvlOption = loglevel.AllowAll() case LevelDebug: @@ -66,16 +77,16 @@ func NewLogger(level string, format string) (log.Logger, error) { case LevelNone: lvlOption = loglevel.AllowNone() default: - return nil, fmt.Errorf("log log_level %s unknown, %v are possible values", level, AvailableLogLevels) + return nil, fmt.Errorf("log log_level %s unknown, %v are possible values", c.Level, AvailableLogLevels) } - switch format { + switch c.Format { case FormatLogFmt: logger = log.NewLogfmtLogger(log.NewSyncWriter(os.Stdout)) case FormatJSON: logger = log.NewJSONLogger(log.NewSyncWriter(os.Stdout)) default: - return nil, fmt.Errorf("log format %s unknown, %v are possible values", format, AvailableLogFormats) + return nil, fmt.Errorf("log format %s unknown, %v are possible values", c.Format, AvailableLogFormats) } logger = loglevel.NewFilter(logger, lvlOption) diff --git a/pkg/alertmanager/operator.go b/pkg/alertmanager/operator.go index bf4004c22..72936ff49 100644 --- a/pkg/alertmanager/operator.go +++ b/pkg/alertmanager/operator.go @@ -35,7 +35,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/version" "k8s.io/client-go/kubernetes" authv1 "k8s.io/client-go/kubernetes/typed/authorization/v1" "k8s.io/client-go/metadata" @@ -68,8 +67,20 @@ var ( } ) -// Operator manages life cycle of Alertmanager deployments and -// monitoring configurations. +// Config defines the operator's parameters for the Alertmanager controller. +// Whenever the value of one of these parameters is changed, it triggers an +// update of the managed statefulsets. +type Config struct { + LocalHost string + ClusterDomain string + ReloaderConfig operator.ContainerConfig + AlertmanagerDefaultBaseImage string + Annotations operator.Map + Labels operator.Map +} + +// Operator manages the lifecycle of the Alertmanager statefulsets and their +// configurations. type Operator struct { kclient kubernetes.Interface mdClient metadata.Interface @@ -97,19 +108,6 @@ type Operator struct { config Config } -type Config struct { - KubernetesVersion version.Info - LocalHost string - ClusterDomain string - ReloaderConfig operator.ContainerConfig - AlertmanagerDefaultBaseImage string - Namespaces operator.Namespaces - Annotations operator.Map - Labels operator.Map - AlertManagerSelector string - SecretListWatchSelector string -} - // New creates a new controller. func New(ctx context.Context, restConfig *rest.Config, c operator.Config, logger log.Logger, r prometheus.Registerer, canReadStorageClass bool) (*Operator, error) { client, err := kubernetes.NewForConfig(restConfig) @@ -142,17 +140,14 @@ func New(ctx context.Context, restConfig *rest.Config, c operator.Config, logger metrics: operator.NewMetrics(r), reconciliations: &operator.ReconciliationTracker{}, canReadStorageClass: canReadStorageClass, + config: Config{ - KubernetesVersion: c.KubernetesVersion, LocalHost: c.LocalHost, ClusterDomain: c.ClusterDomain, ReloaderConfig: c.ReloaderConfig, AlertmanagerDefaultBaseImage: c.AlertmanagerDefaultBaseImage, - Namespaces: c.Namespaces, Annotations: c.Annotations, Labels: c.Labels, - AlertManagerSelector: c.AlertManagerSelector, - SecretListWatchSelector: c.SecretListWatchSelector, }, } @@ -164,30 +159,25 @@ func New(ctx context.Context, restConfig *rest.Config, c operator.Config, logger r, ) - if err := o.bootstrap(ctx); err != nil { + if err := o.bootstrap(ctx, c); err != nil { return nil, err } return o, nil } -func (c *Operator) bootstrap(ctx context.Context) error { - var err error - - if _, err := labels.Parse(c.config.AlertManagerSelector); err != nil { - return fmt.Errorf("can not parse alertmanager selector value: %w", err) - } - +func (c *Operator) bootstrap(ctx context.Context, config operator.Config) error { c.metrics.MustRegister(c.reconciliations) + var err error c.alrtInfs, err = informers.NewInformersForResource( informers.NewMonitoringInformerFactories( - c.config.Namespaces.AlertmanagerAllowList, - c.config.Namespaces.DenyList, + config.Namespaces.AlertmanagerAllowList, + config.Namespaces.DenyList, c.mclient, resyncPeriod, func(options *metav1.ListOptions) { - options.LabelSelector = c.config.AlertManagerSelector + options.LabelSelector = config.AlertmanagerSelector.String() }, ), monitoringv1.SchemeGroupVersion.WithResource(monitoringv1.AlertmanagerName), @@ -204,8 +194,8 @@ func (c *Operator) bootstrap(ctx context.Context) error { c.alrtCfgInfs, err = informers.NewInformersForResource( informers.NewMonitoringInformerFactories( - c.config.Namespaces.AlertmanagerConfigAllowList, - c.config.Namespaces.DenyList, + config.Namespaces.AlertmanagerConfigAllowList, + config.Namespaces.DenyList, c.mclient, resyncPeriod, nil, @@ -216,19 +206,14 @@ func (c *Operator) bootstrap(ctx context.Context) error { return fmt.Errorf("error creating alertmanagerconfig informers: %w", err) } - secretListWatchSelector, err := fields.ParseSelector(c.config.SecretListWatchSelector) - if err != nil { - return fmt.Errorf("can not parse secrets selector value: %w", err) - } - c.secrInfs, err = informers.NewInformersForResourceWithTransform( informers.NewMetadataInformerFactory( - c.config.Namespaces.AlertmanagerConfigAllowList, - c.config.Namespaces.DenyList, + config.Namespaces.AlertmanagerConfigAllowList, + config.Namespaces.DenyList, c.mdClient, resyncPeriod, func(options *metav1.ListOptions) { - options.FieldSelector = secretListWatchSelector.String() + options.FieldSelector = config.SecretListWatchSelector.String() }, ), v1.SchemeGroupVersion.WithResource("secrets"), @@ -240,8 +225,8 @@ func (c *Operator) bootstrap(ctx context.Context) error { c.ssetInfs, err = informers.NewInformersForResource( informers.NewKubeInformerFactories( - c.config.Namespaces.AlertmanagerAllowList, - c.config.Namespaces.DenyList, + config.Namespaces.AlertmanagerAllowList, + config.Namespaces.DenyList, c.kclient, resyncPeriod, nil, @@ -256,11 +241,11 @@ func (c *Operator) bootstrap(ctx context.Context) error { lw, privileged, err := listwatch.NewNamespaceListWatchFromClient( ctx, o.logger, - o.config.KubernetesVersion, + config.KubernetesVersion, o.kclient.CoreV1(), o.ssarClient, allowList, - o.config.Namespaces.DenyList, + config.Namespaces.DenyList, ) if err != nil { return nil, fmt.Errorf("failed to create namespace lister/watcher: %w", err) @@ -274,15 +259,15 @@ func (c *Operator) bootstrap(ctx context.Context) error { cache.Indexers{}, ), nil } - c.nsAlrtCfgInf, err = newNamespaceInformer(c, c.config.Namespaces.AlertmanagerConfigAllowList) + c.nsAlrtCfgInf, err = newNamespaceInformer(c, config.Namespaces.AlertmanagerConfigAllowList) if err != nil { return err } - if listwatch.IdenticalNamespaces(c.config.Namespaces.AlertmanagerConfigAllowList, c.config.Namespaces.AlertmanagerAllowList) { + if listwatch.IdenticalNamespaces(config.Namespaces.AlertmanagerConfigAllowList, config.Namespaces.AlertmanagerAllowList) { c.nsAlrtInf = c.nsAlrtCfgInf } else { - c.nsAlrtInf, err = newNamespaceInformer(c, c.config.Namespaces.AlertmanagerAllowList) + c.nsAlrtInf, err = newNamespaceInformer(c, config.Namespaces.AlertmanagerAllowList) if err != nil { return err } @@ -733,7 +718,7 @@ func (c *Operator) sync(ctx context.Context, key string) error { failMsg[i] = cause.Message } - level.Info(logger).Log("msg", "recreating AlertManager StatefulSet because the update operation wasn't possible", "reason", strings.Join(failMsg, ", ")) + level.Info(logger).Log("msg", "recreating Alertmanager StatefulSet because the update operation wasn't possible", "reason", strings.Join(failMsg, ", ")) propagationPolicy := metav1.DeletePropagationForeground if err := ssetClient.Delete(ctx, sset.GetName(), metav1.DeleteOptions{PropagationPolicy: &propagationPolicy}); err != nil { return fmt.Errorf("failed to delete StatefulSet to avoid forbidden action: %w", err) @@ -1171,14 +1156,14 @@ func checkHTTPConfig(hc *monitoringv1alpha1.HTTPConfig, amVersion semver.Version if hc.Authorization != nil && !amVersion.GTE(semver.MustParse("0.22.0")) { return fmt.Errorf( - "'authorization' config set in 'httpConfig' but supported in AlertManager >= 0.22.0 only - current %s", + "'authorization' config set in 'httpConfig' but supported in Alertmanager >= 0.22.0 only - current %s", amVersion.String(), ) } if hc.OAuth2 != nil && !amVersion.GTE(semver.MustParse("0.22.0")) { return fmt.Errorf( - "'oauth2' config set in 'httpConfig' but supported in AlertManager >= 0.22.0 only - current %s", + "'oauth2' config set in 'httpConfig' but supported in Alertmanager >= 0.22.0 only - current %s", amVersion.String(), ) } @@ -1329,7 +1314,7 @@ func checkOpsGenieResponder(opsgenieResponder []monitoringv1alpha1.OpsGenieConfi lessThanV0_24 := amVersion.LT(semver.MustParse("0.24.0")) for _, resp := range opsgenieResponder { if resp.Type == "teams" && lessThanV0_24 { - return fmt.Errorf("'teams' set in 'opsgenieResponder' but supported in AlertManager >= 0.24.0 only") + return fmt.Errorf("'teams' set in 'opsgenieResponder' but supported in Alertmanager >= 0.24.0 only") } } return nil diff --git a/pkg/alertmanager/operator_test.go b/pkg/alertmanager/operator_test.go index c74aad221..4b26ce7fb 100644 --- a/pkg/alertmanager/operator_test.go +++ b/pkg/alertmanager/operator_test.go @@ -1239,7 +1239,11 @@ func TestProvisionAlertmanagerConfiguration(t *testing.T) { ssarClient: &alwaysAllowed{}, logger: level.NewFilter(log.NewLogfmtLogger(os.Stdout), level.AllowInfo()), metrics: operator.NewMetrics(prometheus.NewRegistry()), - config: Config{ + } + + err := o.bootstrap( + context.Background(), + operator.Config{ Namespaces: operator.Namespaces{ AlertmanagerConfigAllowList: map[string]struct{}{ v1.NamespaceAll: {}, @@ -1249,9 +1253,7 @@ func TestProvisionAlertmanagerConfiguration(t *testing.T) { }, }, }, - } - - err := o.bootstrap(context.Background()) + ) if err != nil { t.Fatalf("unexpected error: %v", err) } diff --git a/pkg/k8sutil/k8sutil.go b/pkg/k8sutil/k8sutil.go index 14b903900..149eaccc0 100644 --- a/pkg/k8sutil/k8sutil.go +++ b/pkg/k8sutil/k8sutil.go @@ -79,7 +79,7 @@ func PodRunningAndReady(pod v1.Pod) (bool, error) { return false, nil } -func NewClusterConfig(host string, tlsInsecure bool, tlsConfig *rest.TLSClientConfig, asUser string) (*rest.Config, error) { +func NewClusterConfig(host string, tlsConfig rest.TLSClientConfig, asUser string) (*rest.Config, error) { var cfg *rest.Config var err error @@ -104,8 +104,7 @@ func NewClusterConfig(host string, tlsInsecure bool, tlsConfig *rest.TLSClientCo return nil, fmt.Errorf("error parsing host url %s: %w", host, err) } if hostURL.Scheme == "https" { - cfg.TLSClientConfig = *tlsConfig - cfg.Insecure = tlsInsecure + cfg.TLSClientConfig = tlsConfig } } } diff --git a/pkg/operator/config.go b/pkg/operator/config.go index 5113de164..1371a62da 100644 --- a/pkg/operator/config.go +++ b/pkg/operator/config.go @@ -16,48 +16,56 @@ package operator import ( "fmt" + "slices" "sort" "strings" "golang.org/x/exp/maps" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/version" - "k8s.io/client-go/rest" - - "github.com/prometheus-operator/prometheus-operator/pkg/server" ) // Config defines configuration parameters for the Operator. type Config struct { - // Kubernetes client configuration. - Host string - TLSInsecure bool - TLSConfig rest.TLSClientConfig - ImpersonateUser string + // Version reported by the Kubernetes API. KubernetesVersion version.Info - ClusterDomain string - KubeletObject string - KubeletSelector string - ListenAddress string - ServerTLSConfig server.TLSServerConfig - ReloaderConfig ContainerConfig + // Parameters for the kubelet endpoint controller. + KubeletObject string + KubeletSelector LabelSelector + + // Cluster domain for Kubernetes services managed by the operator. + ClusterDomain string + + // Global configuration for the reloader config sidecar. + ReloaderConfig ContainerConfig + + // Base container images for operands. AlertmanagerDefaultBaseImage string PrometheusDefaultBaseImage string ThanosDefaultBaseImage string - Namespaces Namespaces - Annotations Map - Labels Map - LocalHost string - LogLevel string - LogFormat string - PromSelector string - AlertManagerSelector string - ThanosRulerSelector string - SecretListWatchSelector string + + // Allow and deny lists for namespace watchers. + Namespaces Namespaces + + // Metadata applied to all resources managed by the operator. + Annotations Map + Labels Map + + // Custom name to use for "localhost". + LocalHost string + + // Label and field selectors for resource watchers. + PromSelector LabelSelector + AlertmanagerSelector LabelSelector + ThanosRulerSelector LabelSelector + SecretListWatchSelector FieldSelector } +// DefaultConfig returns a default operator configuration. func DefaultConfig(cpu, memory string) Config { return Config{ ReloaderConfig: ContainerConfig{ @@ -66,6 +74,14 @@ func DefaultConfig(cpu, memory string) Config { MemoryRequests: Quantity{q: resource.MustParse(memory)}, MemoryLimits: Quantity{q: resource.MustParse(memory)}, }, + Namespaces: Namespaces{ + AllowList: StringSet{}, + DenyList: StringSet{}, + PrometheusAllowList: StringSet{}, + AlertmanagerAllowList: StringSet{}, + AlertmanagerConfigAllowList: StringSet{}, + ThanosRulerAllowList: StringSet{}, + }, } } @@ -196,8 +212,126 @@ func (m *Map) SortedKeys() []string { } type Namespaces struct { - // Allow list/deny list for common custom resources. - AllowList, DenyList map[string]struct{} - // Allow list for prometheus/alertmanager custom resources. - PrometheusAllowList, AlertmanagerAllowList, AlertmanagerConfigAllowList, ThanosRulerAllowList map[string]struct{} + // Allow list for common custom resources. + AllowList StringSet + // Deny list for common custom resources. + DenyList StringSet + // Allow list for Prometheus custom resources. + PrometheusAllowList StringSet + // Allow list for Alertmanager custom resources. + AlertmanagerAllowList StringSet + // Allow list for AlertmanagerConfig custom resources. + AlertmanagerConfigAllowList StringSet + // Allow list for ThanosRuler custom resources. + ThanosRulerAllowList StringSet +} + +func (n *Namespaces) String() string { + return fmt.Sprintf("{allow_list=%q,deny_list=%q,prometheus_allow_list=%q,alertmanager_allow_list=%q,alertmanagerconfig_allow_list=%q,thanosruler_allow_list=%q}", + n.AllowList, + n.DenyList, + n.PrometheusAllowList, + n.AlertmanagerAllowList, + n.AlertmanagerConfigAllowList, + n.ThanosRulerAllowList, + ) +} + +func (n *Namespaces) Finalize() { + if len(n.AllowList) == 0 { + n.AllowList.Insert(v1.NamespaceAll) + } + + if len(n.PrometheusAllowList) == 0 { + n.PrometheusAllowList = n.AllowList + } + + if len(n.AlertmanagerAllowList) == 0 { + n.AlertmanagerAllowList = n.AllowList + } + + if len(n.AlertmanagerConfigAllowList) == 0 { + n.AlertmanagerConfigAllowList = n.AllowList + } + + if len(n.ThanosRulerAllowList) == 0 { + n.ThanosRulerAllowList = n.AllowList + } +} + +type LabelSelector string + +// String implements the flag.Value interface +func (ls *LabelSelector) String() string { + if ls == nil { + return "" + } + + return string(*ls) +} + +// Set implements the flag.Value interface. +func (ls *LabelSelector) Set(value string) error { + if _, err := labels.Parse(value); err != nil { + return err + } + + *ls = LabelSelector(value) + return nil +} + +type FieldSelector string + +// String implements the flag.Value interface +func (fs *FieldSelector) String() string { + if fs == nil { + return "" + } + + return string(*fs) +} + +// Set implements the flag.Value interface. +func (fs *FieldSelector) Set(value string) error { + if _, err := fields.ParseSelector(value); err != nil { + return err + } + + *fs = FieldSelector(value) + return nil +} + +// StringSet represents a list of comma-separated strings. +type StringSet map[string]struct{} + +// Set implements the flag.Value interface. +func (s StringSet) Set(value string) error { + if s == nil { + return fmt.Errorf("expected StringSet variable to be initialized") + } + + for _, v := range strings.Split(value, ",") { + s[v] = struct{}{} + } + + return nil +} + +// String implements the flag.Value interface. +func (s StringSet) String() string { + return strings.Join(s.Slice(), ",") +} + +func (s StringSet) Insert(value string) { + s[value] = struct{}{} +} + +func (s StringSet) Slice() []string { + ss := make([]string, 0, len(s)) + for k := range s { + ss = append(ss, k) + } + + slices.Sort(ss) + return ss } diff --git a/pkg/operator/config_test.go b/pkg/operator/config_test.go index 1e7f3ede4..d5d4a6070 100644 --- a/pkg/operator/config_test.go +++ b/pkg/operator/config_test.go @@ -34,3 +34,79 @@ func TestMap(t *testing.T) { require.Equal(t, map[string]string{"foo": "bar", "foo2": "bar2", "foo3": "bar3"}, m.Merge(map[string]string{"foo": "xxx", "foo3": "bar3"})) } + +func TestFieldSelector(t *testing.T) { + for _, tc := range []struct { + value string + fail bool + }{ + { + value: "", + }, + { + value: "foo = bar", + }, + { + value: "foo", + fail: true, + }, + } { + t.Run(tc.value, func(t *testing.T) { + fs := new(FieldSelector) + + err := fs.Set(tc.value) + if tc.fail { + require.Error(t, err) + return + } + + require.NoError(t, err) + }) + } +} + +func TestLabelSelector(t *testing.T) { + for _, tc := range []struct { + value string + fail bool + }{ + { + value: "", + }, + { + value: "foo in (bar)", + }, + { + value: "foo in", + fail: true, + }, + } { + t.Run(tc.value, func(t *testing.T) { + ls := new(LabelSelector) + + err := ls.Set(tc.value) + if tc.fail { + require.Error(t, err) + return + } + + require.NoError(t, err) + }) + } +} + +func TestStringSet(t *testing.T) { + var s StringSet + + require.Error(t, s.Set("a,b,c")) + + s = StringSet{} + + require.NoError(t, s.Set("a,b,c")) + require.Equal(t, len(s), 3) + require.Equal(t, s.String(), "a,b,c") + for _, k := range []string{"a", "b", "c"} { + _, found := s[k] + require.True(t, found) + } +} diff --git a/pkg/prometheus/agent/operator.go b/pkg/prometheus/agent/operator.go index 509bb5b7f..f3640d38f 100644 --- a/pkg/prometheus/agent/operator.go +++ b/pkg/prometheus/agent/operator.go @@ -79,7 +79,7 @@ type Operator struct { metrics *operator.Metrics reconciliations *operator.ReconciliationTracker - config operator.Config + config prompkg.Config endpointSliceSupported bool scrapeConfigSupported bool canReadStorageClass bool @@ -88,7 +88,7 @@ type Operator struct { } // New creates a new controller. -func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, logger log.Logger, r prometheus.Registerer, scrapeConfigSupported bool, canReadStorageClass bool) (*Operator, error) { +func New(ctx context.Context, restConfig *rest.Config, c operator.Config, logger log.Logger, r prometheus.Registerer, scrapeConfigSupported bool, canReadStorageClass bool) (*Operator, error) { client, err := kubernetes.NewForConfig(restConfig) if err != nil { return nil, fmt.Errorf("instantiating kubernetes client failed: %w", err) @@ -104,49 +104,47 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log return nil, fmt.Errorf("instantiating monitoring client failed: %w", err) } - if _, err := labels.Parse(conf.PromSelector); err != nil { - return nil, fmt.Errorf("can not parse prometheus-agent selector value: %w", err) - } - - secretListWatchSelector, err := fields.ParseSelector(conf.SecretListWatchSelector) - if err != nil { - return nil, fmt.Errorf("can not parse secrets selector value: %w", err) - } - // All the metrics exposed by the controller get the controller="prometheus-agent" label. r = prometheus.WrapRegistererWith(prometheus.Labels{"controller": "prometheus-agent"}, r) - c := &Operator{ - kclient: client, - mdClient: mdClient, - mclient: mclient, - logger: logger, - config: conf, + o := &Operator{ + kclient: client, + mdClient: mdClient, + mclient: mclient, + logger: logger, + config: prompkg.Config{ + LocalHost: c.LocalHost, + ReloaderConfig: c.ReloaderConfig, + PrometheusDefaultBaseImage: c.PrometheusDefaultBaseImage, + ThanosDefaultBaseImage: c.ThanosDefaultBaseImage, + Annotations: c.Annotations, + Labels: c.Labels, + }, metrics: operator.NewMetrics(r), reconciliations: &operator.ReconciliationTracker{}, scrapeConfigSupported: scrapeConfigSupported, canReadStorageClass: canReadStorageClass, } - c.metrics.MustRegister( - c.reconciliations, + o.metrics.MustRegister( + o.reconciliations, ) - c.rr = operator.NewResourceReconciler( - c.logger, - c, - c.metrics, + o.rr = operator.NewResourceReconciler( + o.logger, + o, + o.metrics, monitoringv1alpha1.PrometheusAgentsKind, r, ) - c.promInfs, err = informers.NewInformersForResource( + o.promInfs, err = informers.NewInformersForResource( informers.NewMonitoringInformerFactories( - c.config.Namespaces.PrometheusAllowList, - c.config.Namespaces.DenyList, + c.Namespaces.PrometheusAllowList, + c.Namespaces.DenyList, mclient, resyncPeriod, func(options *metav1.ListOptions) { - options.LabelSelector = c.config.PromSelector + options.LabelSelector = c.PromSelector.String() }, ), monitoringv1alpha1.SchemeGroupVersion.WithResource(monitoringv1alpha1.PrometheusAgentName), @@ -156,16 +154,16 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log } var promStores []cache.Store - for _, informer := range c.promInfs.GetInformers() { + for _, informer := range o.promInfs.GetInformers() { promStores = append(promStores, informer.Informer().GetStore()) } - c.metrics.MustRegister(prompkg.NewCollectorForStores(promStores...)) + o.metrics.MustRegister(prompkg.NewCollectorForStores(promStores...)) - c.smonInfs, err = informers.NewInformersForResource( + o.smonInfs, err = informers.NewInformersForResource( informers.NewMonitoringInformerFactories( - c.config.Namespaces.AllowList, - c.config.Namespaces.DenyList, + c.Namespaces.AllowList, + c.Namespaces.DenyList, mclient, resyncPeriod, nil, @@ -176,10 +174,10 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log return nil, fmt.Errorf("error creating servicemonitor informers: %w", err) } - c.pmonInfs, err = informers.NewInformersForResource( + o.pmonInfs, err = informers.NewInformersForResource( informers.NewMonitoringInformerFactories( - c.config.Namespaces.AllowList, - c.config.Namespaces.DenyList, + c.Namespaces.AllowList, + c.Namespaces.DenyList, mclient, resyncPeriod, nil, @@ -190,10 +188,10 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log return nil, fmt.Errorf("error creating podmonitor informers: %w", err) } - c.probeInfs, err = informers.NewInformersForResource( + o.probeInfs, err = informers.NewInformersForResource( informers.NewMonitoringInformerFactories( - c.config.Namespaces.AllowList, - c.config.Namespaces.DenyList, + c.Namespaces.AllowList, + c.Namespaces.DenyList, mclient, resyncPeriod, nil, @@ -204,11 +202,11 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log return nil, fmt.Errorf("error creating probe informers: %w", err) } - if c.scrapeConfigSupported { - c.sconInfs, err = informers.NewInformersForResource( + if o.scrapeConfigSupported { + o.sconInfs, err = informers.NewInformersForResource( informers.NewMonitoringInformerFactories( - c.config.Namespaces.AllowList, - c.config.Namespaces.DenyList, + c.Namespaces.AllowList, + c.Namespaces.DenyList, mclient, resyncPeriod, nil, @@ -220,11 +218,11 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log } } - c.cmapInfs, err = informers.NewInformersForResourceWithTransform( + o.cmapInfs, err = informers.NewInformersForResourceWithTransform( informers.NewMetadataInformerFactory( - c.config.Namespaces.PrometheusAllowList, - c.config.Namespaces.DenyList, - c.mdClient, + c.Namespaces.PrometheusAllowList, + c.Namespaces.DenyList, + o.mdClient, resyncPeriod, func(options *metav1.ListOptions) { options.LabelSelector = prompkg.LabelPrometheusName @@ -237,14 +235,14 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log return nil, fmt.Errorf("error creating configmap informers: %w", err) } - c.secrInfs, err = informers.NewInformersForResourceWithTransform( + o.secrInfs, err = informers.NewInformersForResourceWithTransform( informers.NewMetadataInformerFactory( - c.config.Namespaces.PrometheusAllowList, - c.config.Namespaces.DenyList, - c.mdClient, + c.Namespaces.PrometheusAllowList, + c.Namespaces.DenyList, + o.mdClient, resyncPeriod, func(options *metav1.ListOptions) { - options.FieldSelector = secretListWatchSelector.String() + options.FieldSelector = c.SecretListWatchSelector.String() }, ), v1.SchemeGroupVersion.WithResource(string(v1.ResourceSecrets)), @@ -254,11 +252,11 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log return nil, fmt.Errorf("error creating secrets informers: %w", err) } - c.ssetInfs, err = informers.NewInformersForResource( + o.ssetInfs, err = informers.NewInformersForResource( informers.NewKubeInformerFactories( - c.config.Namespaces.PrometheusAllowList, - c.config.Namespaces.DenyList, - c.kclient, + c.Namespaces.PrometheusAllowList, + c.Namespaces.DenyList, + o.kclient, resyncPeriod, nil, ), @@ -272,55 +270,55 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log lw, privileged, err := listwatch.NewNamespaceListWatchFromClient( ctx, o.logger, - o.config.KubernetesVersion, + c.KubernetesVersion, o.kclient.CoreV1(), o.kclient.AuthorizationV1().SelfSubjectAccessReviews(), allowList, - o.config.Namespaces.DenyList, + c.Namespaces.DenyList, ) if err != nil { return nil, err } - level.Debug(c.logger).Log("msg", "creating namespace informer", "privileged", privileged) + level.Debug(o.logger).Log("msg", "creating namespace informer", "privileged", privileged) return cache.NewSharedIndexInformer( o.metrics.NewInstrumentedListerWatcher(lw), &v1.Namespace{}, resyncPeriod, cache.Indexers{}, ), nil } - c.nsMonInf, err = newNamespaceInformer(c, c.config.Namespaces.AllowList) + o.nsMonInf, err = newNamespaceInformer(o, c.Namespaces.AllowList) if err != nil { return nil, err } - if listwatch.IdenticalNamespaces(c.config.Namespaces.AllowList, c.config.Namespaces.PrometheusAllowList) { - c.nsPromInf = c.nsMonInf + if listwatch.IdenticalNamespaces(c.Namespaces.AllowList, c.Namespaces.PrometheusAllowList) { + o.nsPromInf = o.nsMonInf } else { - c.nsPromInf, err = newNamespaceInformer(c, c.config.Namespaces.PrometheusAllowList) + o.nsPromInf, err = newNamespaceInformer(o, c.Namespaces.PrometheusAllowList) if err != nil { return nil, err } } - endpointSliceSupported, err := k8sutil.IsAPIGroupVersionResourceSupported(c.kclient.Discovery(), schema.GroupVersion{Group: "discovery.k8s.io", Version: "v1"}, "endpointslices") + endpointSliceSupported, err := k8sutil.IsAPIGroupVersionResourceSupported(o.kclient.Discovery(), schema.GroupVersion{Group: "discovery.k8s.io", Version: "v1"}, "endpointslices") if err != nil { - level.Warn(c.logger).Log("msg", "failed to check if the API supports the endpointslice resources", "err ", err) + level.Warn(o.logger).Log("msg", "failed to check if the API supports the endpointslice resources", "err ", err) } - level.Info(c.logger).Log("msg", "Kubernetes API capabilities", "endpointslices", endpointSliceSupported) + level.Info(o.logger).Log("msg", "Kubernetes API capabilities", "endpointslices", endpointSliceSupported) // The operator doesn't yet support the endpointslices API. // See https://github.com/prometheus-operator/prometheus-operator/issues/3862 // for details. - c.endpointSliceSupported = false + o.endpointSliceSupported = false - c.statusReporter = prompkg.StatusReporter{ - Kclient: c.kclient, - Reconciliations: c.reconciliations, - SsetInfs: c.ssetInfs, - Rr: c.rr, + o.statusReporter = prompkg.StatusReporter{ + Kclient: o.kclient, + Reconciliations: o.reconciliations, + SsetInfs: o.ssetInfs, + Rr: o.rr, } - return c, nil + return o, nil } // Run the controller. @@ -752,7 +750,7 @@ func (c *Operator) createOrUpdateConfigurationSecret(ctx context.Context, p *mon return k8sutil.CreateOrUpdateSecret(ctx, sClient, s) } -func createSSetInputHash(p monitoringv1alpha1.PrometheusAgent, c operator.Config, tlsAssets *operator.ShardedSecret, ssSpec appsv1.StatefulSetSpec) (string, error) { +func createSSetInputHash(p monitoringv1alpha1.PrometheusAgent, c prompkg.Config, tlsAssets *operator.ShardedSecret, ssSpec appsv1.StatefulSetSpec) (string, error) { var http2 *bool if p.Spec.Web != nil && p.Spec.Web.WebConfigFileFields.HTTPConfig != nil { http2 = p.Spec.Web.WebConfigFileFields.HTTPConfig.HTTP2 @@ -768,7 +766,7 @@ func createSSetInputHash(p monitoringv1alpha1.PrometheusAgent, c operator.Config PrometheusAnnotations map[string]string PrometheusGeneration int64 PrometheusWebHTTP2 *bool - Config operator.Config + Config prompkg.Config StatefulSetSpec appsv1.StatefulSetSpec Assets []string `hash:"set"` }{ diff --git a/pkg/prometheus/agent/statefulset.go b/pkg/prometheus/agent/statefulset.go index 8ec641567..bdb876bd6 100644 --- a/pkg/prometheus/agent/statefulset.go +++ b/pkg/prometheus/agent/statefulset.go @@ -41,7 +41,7 @@ const ( func makeStatefulSet( name string, p monitoringv1.PrometheusInterface, - config *operator.Config, + config *prompkg.Config, cg *prompkg.ConfigGenerator, inputHash string, shard int32, @@ -166,7 +166,7 @@ func makeStatefulSet( func makeStatefulSetSpec( p monitoringv1.PrometheusInterface, - c *operator.Config, + c *prompkg.Config, cg *prompkg.ConfigGenerator, shard int32, tlsAssetSecrets []string, @@ -426,7 +426,7 @@ func makeStatefulSetSpec( }, nil } -func makeStatefulSetService(p *monitoringv1alpha1.PrometheusAgent, config operator.Config) *v1.Service { +func makeStatefulSetService(p *monitoringv1alpha1.PrometheusAgent, config prompkg.Config) *v1.Service { p = p.DeepCopy() if p.Spec.PortName == "" { diff --git a/pkg/prometheus/agent/statefulset_test.go b/pkg/prometheus/agent/statefulset_test.go index 67f7e1282..232f2e7e2 100644 --- a/pkg/prometheus/agent/statefulset_test.go +++ b/pkg/prometheus/agent/statefulset_test.go @@ -33,7 +33,7 @@ import ( ) var ( - defaultTestConfig = &operator.Config{ + defaultTestConfig = &prompkg.Config{ LocalHost: "localhost", ReloaderConfig: operator.DefaultReloaderTestConfig.ReloaderConfig, PrometheusDefaultBaseImage: operator.DefaultPrometheusBaseImage, diff --git a/pkg/prometheus/operator.go b/pkg/prometheus/operator.go index f37bcd34b..319e39e01 100644 --- a/pkg/prometheus/operator.go +++ b/pkg/prometheus/operator.go @@ -37,6 +37,18 @@ import ( var prometheusKeyInShardStatefulSet = regexp.MustCompile("^(.+)/prometheus-(.+)-shard-[1-9][0-9]*$") var prometheusKeyInStatefulSet = regexp.MustCompile("^(.+)/prometheus-(.+)$") +// Config defines the operator's parameters for the Prometheus controllers. +// Whenever the value of one of these parameters is changed, it triggers an +// update of the managed statefulsets. +type Config struct { + LocalHost string + ReloaderConfig operator.ContainerConfig + PrometheusDefaultBaseImage string + ThanosDefaultBaseImage string + Annotations operator.Map + Labels operator.Map +} + type StatusReporter struct { Kclient kubernetes.Interface Reconciliations *operator.ReconciliationTracker diff --git a/pkg/prometheus/server/operator.go b/pkg/prometheus/server/operator.go index 69995bf42..bf274cb61 100644 --- a/pkg/prometheus/server/operator.go +++ b/pkg/prometheus/server/operator.go @@ -61,6 +61,7 @@ type Operator struct { logger log.Logger accessor *operator.Accessor + config prompkg.Config nsPromInf cache.SharedIndexInformer nsMonInf cache.SharedIndexInformer @@ -79,6 +80,7 @@ type Operator struct { metrics *operator.Metrics reconciliations *operator.ReconciliationTracker + statusReporter prompkg.StatusReporter nodeAddressLookupErrors prometheus.Counter nodeEndpointSyncs prometheus.Counter @@ -86,17 +88,16 @@ type Operator struct { kubeletObjectName string kubeletObjectNamespace string + kubeletSelector string kubeletSyncEnabled bool - config operator.Config + endpointSliceSupported bool scrapeConfigSupported bool canReadStorageClass bool - - statusReporter prompkg.StatusReporter } // New creates a new controller. -func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, logger log.Logger, r prometheus.Registerer, scrapeConfigSupported bool, canReadStorageClass bool) (*Operator, error) { +func New(ctx context.Context, restConfig *rest.Config, c operator.Config, logger log.Logger, r prometheus.Registerer, scrapeConfigSupported bool, canReadStorageClass bool) (*Operator, error) { client, err := kubernetes.NewForConfig(restConfig) if err != nil { return nil, fmt.Errorf("instantiating kubernetes client failed: %w", err) @@ -112,21 +113,12 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log return nil, fmt.Errorf("instantiating monitoring client failed: %w", err) } - if _, err := labels.Parse(conf.PromSelector); err != nil { - return nil, fmt.Errorf("can not parse prometheus selector value: %w", err) - } - - secretListWatchSelector, err := fields.ParseSelector(conf.SecretListWatchSelector) - if err != nil { - return nil, fmt.Errorf("can not parse secrets selector value: %w", err) - } - kubeletObjectName := "" kubeletObjectNamespace := "" kubeletSyncEnabled := false - if conf.KubeletObject != "" { - parts := strings.Split(conf.KubeletObject, "/") + if c.KubeletObject != "" { + parts := strings.Split(c.KubeletObject, "/") if len(parts) != 2 { return nil, fmt.Errorf("malformatted kubelet object string, must be in format \"namespace/name\"") } @@ -138,18 +130,28 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log // All the metrics exposed by the controller get the controller="prometheus" label. r = prometheus.WrapRegistererWith(prometheus.Labels{"controller": "prometheus"}, r) - c := &Operator{ - kclient: client, - mdClient: mdClient, - mclient: mclient, - logger: logger, - accessor: operator.NewAccessor(logger), + o := &Operator{ + kclient: client, + mdClient: mdClient, + mclient: mclient, + logger: logger, + accessor: operator.NewAccessor(logger), + kubeletObjectName: kubeletObjectName, kubeletObjectNamespace: kubeletObjectNamespace, kubeletSyncEnabled: kubeletSyncEnabled, - config: conf, - metrics: operator.NewMetrics(r), - reconciliations: &operator.ReconciliationTracker{}, + kubeletSelector: c.KubeletSelector.String(), + + config: prompkg.Config{ + LocalHost: c.LocalHost, + ReloaderConfig: c.ReloaderConfig, + PrometheusDefaultBaseImage: c.PrometheusDefaultBaseImage, + ThanosDefaultBaseImage: c.ThanosDefaultBaseImage, + Annotations: c.Annotations, + Labels: c.Labels, + }, + metrics: operator.NewMetrics(r), + reconciliations: &operator.ReconciliationTracker{}, nodeAddressLookupErrors: prometheus.NewCounter(prometheus.CounterOpts{ Name: "prometheus_operator_node_address_lookup_errors_total", Help: "Number of times a node IP address could not be determined", @@ -165,29 +167,29 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log scrapeConfigSupported: scrapeConfigSupported, canReadStorageClass: canReadStorageClass, } - c.metrics.MustRegister( - c.nodeAddressLookupErrors, - c.nodeEndpointSyncs, - c.nodeEndpointSyncErrors, - c.reconciliations, + o.metrics.MustRegister( + o.nodeAddressLookupErrors, + o.nodeEndpointSyncs, + o.nodeEndpointSyncErrors, + o.reconciliations, ) - c.rr = operator.NewResourceReconciler( - c.logger, - c, - c.metrics, + o.rr = operator.NewResourceReconciler( + o.logger, + o, + o.metrics, monitoringv1.PrometheusesKind, r, ) - c.promInfs, err = informers.NewInformersForResource( + o.promInfs, err = informers.NewInformersForResource( informers.NewMonitoringInformerFactories( - c.config.Namespaces.PrometheusAllowList, - c.config.Namespaces.DenyList, + c.Namespaces.PrometheusAllowList, + c.Namespaces.DenyList, mclient, resyncPeriod, func(options *metav1.ListOptions) { - options.LabelSelector = c.config.PromSelector + options.LabelSelector = c.PromSelector.String() }, ), monitoringv1.SchemeGroupVersion.WithResource(monitoringv1.PrometheusName), @@ -197,15 +199,15 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log } var promStores []cache.Store - for _, informer := range c.promInfs.GetInformers() { + for _, informer := range o.promInfs.GetInformers() { promStores = append(promStores, informer.Informer().GetStore()) } - c.metrics.MustRegister(prompkg.NewCollectorForStores(promStores...)) + o.metrics.MustRegister(prompkg.NewCollectorForStores(promStores...)) - c.smonInfs, err = informers.NewInformersForResource( + o.smonInfs, err = informers.NewInformersForResource( informers.NewMonitoringInformerFactories( - c.config.Namespaces.AllowList, - c.config.Namespaces.DenyList, + c.Namespaces.AllowList, + c.Namespaces.DenyList, mclient, resyncPeriod, nil, @@ -216,10 +218,10 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log return nil, fmt.Errorf("error creating servicemonitor informers: %w", err) } - c.pmonInfs, err = informers.NewInformersForResource( + o.pmonInfs, err = informers.NewInformersForResource( informers.NewMonitoringInformerFactories( - c.config.Namespaces.AllowList, - c.config.Namespaces.DenyList, + c.Namespaces.AllowList, + c.Namespaces.DenyList, mclient, resyncPeriod, nil, @@ -230,10 +232,10 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log return nil, fmt.Errorf("error creating podmonitor informers: %w", err) } - c.probeInfs, err = informers.NewInformersForResource( + o.probeInfs, err = informers.NewInformersForResource( informers.NewMonitoringInformerFactories( - c.config.Namespaces.AllowList, - c.config.Namespaces.DenyList, + c.Namespaces.AllowList, + c.Namespaces.DenyList, mclient, resyncPeriod, nil, @@ -244,11 +246,11 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log return nil, fmt.Errorf("error creating probe informers: %w", err) } - if c.scrapeConfigSupported { - c.sconInfs, err = informers.NewInformersForResource( + if o.scrapeConfigSupported { + o.sconInfs, err = informers.NewInformersForResource( informers.NewMonitoringInformerFactories( - c.config.Namespaces.AllowList, - c.config.Namespaces.DenyList, + c.Namespaces.AllowList, + c.Namespaces.DenyList, mclient, resyncPeriod, nil, @@ -259,10 +261,10 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log return nil, fmt.Errorf("error creating scrapeconfigs informers: %w", err) } } - c.ruleInfs, err = informers.NewInformersForResource( + o.ruleInfs, err = informers.NewInformersForResource( informers.NewMonitoringInformerFactories( - c.config.Namespaces.AllowList, - c.config.Namespaces.DenyList, + c.Namespaces.AllowList, + c.Namespaces.DenyList, mclient, resyncPeriod, nil, @@ -273,11 +275,11 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log return nil, fmt.Errorf("error creating prometheusrule informers: %w", err) } - c.cmapInfs, err = informers.NewInformersForResourceWithTransform( + o.cmapInfs, err = informers.NewInformersForResourceWithTransform( informers.NewMetadataInformerFactory( - c.config.Namespaces.PrometheusAllowList, - c.config.Namespaces.DenyList, - c.mdClient, + c.Namespaces.PrometheusAllowList, + c.Namespaces.DenyList, + o.mdClient, resyncPeriod, func(options *metav1.ListOptions) { options.LabelSelector = prompkg.LabelPrometheusName @@ -290,14 +292,14 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log return nil, fmt.Errorf("error creating configmap informers: %w", err) } - c.secrInfs, err = informers.NewInformersForResourceWithTransform( + o.secrInfs, err = informers.NewInformersForResourceWithTransform( informers.NewMetadataInformerFactory( - c.config.Namespaces.PrometheusAllowList, - c.config.Namespaces.DenyList, - c.mdClient, + c.Namespaces.PrometheusAllowList, + c.Namespaces.DenyList, + o.mdClient, resyncPeriod, func(options *metav1.ListOptions) { - options.FieldSelector = secretListWatchSelector.String() + options.FieldSelector = c.SecretListWatchSelector.String() }, ), v1.SchemeGroupVersion.WithResource(string(v1.ResourceSecrets)), @@ -307,11 +309,11 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log return nil, fmt.Errorf("error creating secrets informers: %w", err) } - c.ssetInfs, err = informers.NewInformersForResource( + o.ssetInfs, err = informers.NewInformersForResource( informers.NewKubeInformerFactories( - c.config.Namespaces.PrometheusAllowList, - c.config.Namespaces.DenyList, - c.kclient, + c.Namespaces.PrometheusAllowList, + c.Namespaces.DenyList, + o.kclient, resyncPeriod, nil, ), @@ -325,55 +327,55 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log lw, privileged, err := listwatch.NewNamespaceListWatchFromClient( ctx, o.logger, - o.config.KubernetesVersion, + c.KubernetesVersion, o.kclient.CoreV1(), o.kclient.AuthorizationV1().SelfSubjectAccessReviews(), allowList, - o.config.Namespaces.DenyList, + c.Namespaces.DenyList, ) if err != nil { return nil, err } - level.Debug(c.logger).Log("msg", "creating namespace informer", "privileged", privileged) + level.Debug(o.logger).Log("msg", "creating namespace informer", "privileged", privileged) return cache.NewSharedIndexInformer( o.metrics.NewInstrumentedListerWatcher(lw), &v1.Namespace{}, resyncPeriod, cache.Indexers{}, ), nil } - c.nsMonInf, err = newNamespaceInformer(c, c.config.Namespaces.AllowList) + o.nsMonInf, err = newNamespaceInformer(o, c.Namespaces.AllowList) if err != nil { return nil, err } - if listwatch.IdenticalNamespaces(c.config.Namespaces.AllowList, c.config.Namespaces.PrometheusAllowList) { - c.nsPromInf = c.nsMonInf + if listwatch.IdenticalNamespaces(c.Namespaces.AllowList, c.Namespaces.PrometheusAllowList) { + o.nsPromInf = o.nsMonInf } else { - c.nsPromInf, err = newNamespaceInformer(c, c.config.Namespaces.PrometheusAllowList) + o.nsPromInf, err = newNamespaceInformer(o, c.Namespaces.PrometheusAllowList) if err != nil { return nil, err } } - endpointSliceSupported, err := k8sutil.IsAPIGroupVersionResourceSupported(c.kclient.Discovery(), schema.GroupVersion{Group: "discovery.k8s.io", Version: "v1"}, "endpointslices") + endpointSliceSupported, err := k8sutil.IsAPIGroupVersionResourceSupported(o.kclient.Discovery(), schema.GroupVersion{Group: "discovery.k8s.io", Version: "v1"}, "endpointslices") if err != nil { - level.Warn(c.logger).Log("msg", "failed to check if the API supports the endpointslice resources", "err ", err) + level.Warn(o.logger).Log("msg", "failed to check if the API supports the endpointslice resources", "err ", err) } - level.Info(c.logger).Log("msg", "Kubernetes API capabilities", "endpointslices", endpointSliceSupported) + level.Info(o.logger).Log("msg", "Kubernetes API capabilities", "endpointslices", endpointSliceSupported) // The operator doesn't yet support the endpointslices API. // See https://github.com/prometheus-operator/prometheus-operator/issues/3862 // for details. - c.endpointSliceSupported = false + o.endpointSliceSupported = false - c.statusReporter = prompkg.StatusReporter{ - Kclient: c.kclient, - Reconciliations: c.reconciliations, - SsetInfs: c.ssetInfs, - Rr: c.rr, + o.statusReporter = prompkg.StatusReporter{ + Kclient: o.kclient, + Reconciliations: o.reconciliations, + SsetInfs: o.ssetInfs, + Rr: o.rr, } - return c, nil + return o, nil } // waitForCacheSync waits for the informers' caches to be synced. @@ -635,7 +637,7 @@ func (c *Operator) syncNodeEndpoints(ctx context.Context) error { }, } - nodes, err := c.kclient.CoreV1().Nodes().List(ctx, metav1.ListOptions{LabelSelector: c.config.KubeletSelector}) + nodes, err := c.kclient.CoreV1().Nodes().List(ctx, metav1.ListOptions{LabelSelector: c.kubeletSelector}) if err != nil { return fmt.Errorf("listing nodes failed: %w", err) } @@ -1408,7 +1410,7 @@ func logDeprecatedFields(logger log.Logger, p *monitoringv1.Prometheus) { } } -func createSSetInputHash(p monitoringv1.Prometheus, c operator.Config, ruleConfigMapNames []string, tlsAssets *operator.ShardedSecret, ssSpec appsv1.StatefulSetSpec) (string, error) { +func createSSetInputHash(p monitoringv1.Prometheus, c prompkg.Config, ruleConfigMapNames []string, tlsAssets *operator.ShardedSecret, ssSpec appsv1.StatefulSetSpec) (string, error) { var http2 *bool if p.Spec.Web != nil && p.Spec.Web.WebConfigFileFields.HTTPConfig != nil { http2 = p.Spec.Web.WebConfigFileFields.HTTPConfig.HTTP2 @@ -1424,7 +1426,7 @@ func createSSetInputHash(p monitoringv1.Prometheus, c operator.Config, ruleConfi PrometheusAnnotations map[string]string PrometheusGeneration int64 PrometheusWebHTTP2 *bool - Config operator.Config + Config prompkg.Config StatefulSetSpec appsv1.StatefulSetSpec RuleConfigMaps []string `hash:"set"` Assets []string `hash:"set"` diff --git a/pkg/prometheus/server/operator_test.go b/pkg/prometheus/server/operator_test.go index 25cc3222f..1fc0f9205 100644 --- a/pkg/prometheus/server/operator_test.go +++ b/pkg/prometheus/server/operator_test.go @@ -26,7 +26,7 @@ import ( "k8s.io/utils/ptr" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" - "github.com/prometheus-operator/prometheus-operator/pkg/operator" + prompkg "github.com/prometheus-operator/prometheus-operator/pkg/prometheus" ) func TestListOptions(t *testing.T) { @@ -207,7 +207,7 @@ func TestCreateStatefulSetInputHash(t *testing.T) { }, } { t.Run(tc.name, func(t *testing.T) { - c := operator.Config{} + c := prompkg.Config{} p1Hash, err := createSSetInputHash(tc.a, c, []string{}, nil, appsv1.StatefulSetSpec{}) if err != nil { diff --git a/pkg/prometheus/server/statefulset.go b/pkg/prometheus/server/statefulset.go index adeb56b2e..38d1f53a9 100644 --- a/pkg/prometheus/server/statefulset.go +++ b/pkg/prometheus/server/statefulset.go @@ -41,7 +41,7 @@ const ( ) // TODO(ArthurSens): generalize it enough to be used by both server and agent. -func makeStatefulSetService(p *monitoringv1.Prometheus, config operator.Config) *v1.Service { +func makeStatefulSetService(p *monitoringv1.Prometheus, config prompkg.Config) *v1.Service { p = p.DeepCopy() if p.Spec.PortName == "" { @@ -103,7 +103,7 @@ func makeStatefulSet( queryLogFile string, thanos *monitoringv1.ThanosSpec, disableCompaction bool, - config *operator.Config, + config *prompkg.Config, cg *prompkg.ConfigGenerator, ruleConfigMapNames []string, inputHash string, @@ -239,7 +239,7 @@ func makeStatefulSetSpec( thanos *monitoringv1.ThanosSpec, disableCompaction bool, p monitoringv1.PrometheusInterface, - c *operator.Config, + c *prompkg.Config, cg *prompkg.ConfigGenerator, shard int32, ruleConfigMapNames []string, @@ -647,7 +647,7 @@ func createThanosContainer( disableCompaction *bool, p monitoringv1.PrometheusInterface, thanos *monitoringv1.ThanosSpec, - c *operator.Config, + c *prompkg.Config, prometheusURIScheme, webRoutePrefix string) (*v1.Container, error) { var container *v1.Container diff --git a/pkg/prometheus/server/statefulset_test.go b/pkg/prometheus/server/statefulset_test.go index afcdd3220..15841926b 100644 --- a/pkg/prometheus/server/statefulset_test.go +++ b/pkg/prometheus/server/statefulset_test.go @@ -39,7 +39,7 @@ import ( ) var ( - defaultTestConfig = &operator.Config{ + defaultTestConfig = &prompkg.Config{ LocalHost: "localhost", ReloaderConfig: operator.DefaultReloaderTestConfig.ReloaderConfig, PrometheusDefaultBaseImage: operator.DefaultPrometheusBaseImage, @@ -874,7 +874,7 @@ func TestTagAndShaAndVersion(t *testing.T) { } func TestPrometheusDefaultBaseImageFlag(t *testing.T) { - operatorConfig := &operator.Config{ + operatorConfig := &prompkg.Config{ ReloaderConfig: defaultTestConfig.ReloaderConfig, PrometheusDefaultBaseImage: "nondefaultuseflag/quay.io/prometheus/prometheus", ThanosDefaultBaseImage: "nondefaultuseflag/quay.io/thanos/thanos", @@ -926,7 +926,7 @@ func TestPrometheusDefaultBaseImageFlag(t *testing.T) { } func TestThanosDefaultBaseImageFlag(t *testing.T) { - thanosBaseImageConfig := &operator.Config{ + thanosBaseImageConfig := &prompkg.Config{ ReloaderConfig: defaultTestConfig.ReloaderConfig, PrometheusDefaultBaseImage: "nondefaultuseflag/quay.io/prometheus/prometheus", ThanosDefaultBaseImage: "nondefaultuseflag/quay.io/thanos/thanos", @@ -1534,7 +1534,7 @@ func TestRetentionAndRetentionSize(t *testing.T) { } func TestReplicasConfigurationWithSharding(t *testing.T) { - testConfig := &operator.Config{ + testConfig := &prompkg.Config{ ReloaderConfig: defaultTestConfig.ReloaderConfig, PrometheusDefaultBaseImage: "quay.io/prometheus/prometheus", ThanosDefaultBaseImage: "quay.io/thanos/thanos:v0.7.0", @@ -1596,7 +1596,7 @@ func TestReplicasConfigurationWithSharding(t *testing.T) { func TestSidecarResources(t *testing.T) { operator.TestSidecarsResources(t, func(reloaderConfig operator.ContainerConfig) *appsv1.StatefulSet { - testConfig := &operator.Config{ + testConfig := &prompkg.Config{ ReloaderConfig: reloaderConfig, PrometheusDefaultBaseImage: defaultTestConfig.PrometheusDefaultBaseImage, ThanosDefaultBaseImage: defaultTestConfig.ThanosDefaultBaseImage, diff --git a/pkg/prometheus/statefulset.go b/pkg/prometheus/statefulset.go index 163437ad2..9f31cdab2 100644 --- a/pkg/prometheus/statefulset.go +++ b/pkg/prometheus/statefulset.go @@ -119,7 +119,7 @@ func compress(data []byte) ([]byte, error) { return buf.Bytes(), nil } -func MakeConfigurationSecret(p monitoringv1.PrometheusInterface, config operator.Config, data []byte) (*v1.Secret, error) { +func MakeConfigurationSecret(p monitoringv1.PrometheusInterface, config Config, data []byte) (*v1.Secret, error) { promConfig, err := compress(data) if err != nil { return nil, err diff --git a/pkg/server/server.go b/pkg/server/server.go index 548e4b048..21fd4bcad 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -15,49 +15,132 @@ package server import ( + "context" "crypto/tls" "crypto/x509" + "flag" "fmt" + stdlog "log" + "net" + "net/http" "os" + "path/filepath" "time" + rbacproxytls "github.com/brancz/kube-rbac-proxy/pkg/tls" "github.com/go-kit/log" "github.com/go-kit/log/level" - "k8s.io/component-base/cli/flag" + kflag "k8s.io/component-base/cli/flag" + + "github.com/prometheus-operator/prometheus-operator/pkg/operator" ) -// TLSServerConfig contains the necessary fields to configure -// web server TLS -type TLSServerConfig struct { +const ( + defaultTLSDir = "/etc/tls/private" + defaultTLSVersion = "VersionTLS13" +) + +func DefaultConfig(listenAddress string, enableTLS bool) Config { + return Config{ + ListenAddress: listenAddress, + // Mitigate CVE-2023-44487 by disabling HTTP2 by default until the Go + // standard library and golang.org/x/net are fully fixed. + // Right now, it is possible for authenticated and unauthenticated users to + // hold open HTTP2 connections and consume huge amounts of memory. + // See: + // * https://github.com/kubernetes/kubernetes/pull/121120 + // * https://github.com/kubernetes/kubernetes/issues/121197 + // * https://github.com/golang/go/issues/63417#issuecomment-1758858612 + EnableHTTP2: false, + TLSConfig: TLSConfig{ + Enabled: enableTLS, + CertFile: filepath.Join(defaultTLSDir, "tls.crt"), + KeyFile: filepath.Join(defaultTLSDir, "tls.key"), + ClientCAFile: filepath.Join(defaultTLSDir, "tls-ca.crt"), + MinVersion: defaultTLSVersion, + CipherSuites: operator.StringSet{}, + ReloadInterval: time.Minute, + }, + } +} + +func RegisterFlags(fs *flag.FlagSet, c *Config) { + fs.StringVar(&c.ListenAddress, "web.listen-address", c.ListenAddress, "Address on which to expose metrics and web interface.") + + fs.BoolVar(&c.EnableHTTP2, "web.enable-http2", c.EnableHTTP2, "Enable HTTP2 connections.") + + fs.BoolVar(&c.TLSConfig.Enabled, "web.enable-tls", c.TLSConfig.Enabled, "Enable TLS for the web server.") + fs.StringVar(&c.TLSConfig.CertFile, "web.cert-file", c.TLSConfig.CertFile, "Certficate file to be used for the web server.") + fs.StringVar(&c.TLSConfig.KeyFile, "web.key-file", c.TLSConfig.KeyFile, "Private key matching the cert file to be used for the web server.") + fs.StringVar(&c.TLSConfig.ClientCAFile, "web.client-ca-file", c.TLSConfig.ClientCAFile, "Client CA certificate file to be used for the web server.") + + fs.DurationVar(&c.TLSConfig.ReloadInterval, "web.tls-reload-interval", c.TLSConfig.ReloadInterval, "The interval at which to watch for TLS certificate changes, by default set to 1 minute. (default 1m0s).") + + fs.StringVar(&c.TLSConfig.MinVersion, "web.tls-min-version", c.TLSConfig.MinVersion, + "Minimum TLS version supported. Value must match version names from https://golang.org/pkg/crypto/tls/#pkg-constants.") + fs.Var(&c.TLSConfig.CipherSuites, "web.tls-cipher-suites", "Comma-separated list of cipher suites for the server."+ + " Values are from tls package constants (https://golang.org/pkg/crypto/tls/#pkg-constants)."+ + "If omitted, the default Go cipher suites will be used. "+ + "Note that TLS 1.3 ciphersuites are not configurable.") +} + +// Config defines the web server configuration. +type Config struct { + ListenAddress string + EnableHTTP2 bool + TLSConfig TLSConfig +} + +// TLSConfig defines the TLS settings of the web server. +type TLSConfig struct { + Enabled bool CertFile string KeyFile string ClientCAFile string MinVersion string - CipherSuites []string + CipherSuites operator.StringSet ReloadInterval time.Duration } -// NewTLSConfig provides new server TLS configuration. -func NewTLSConfig(logger log.Logger, certFile, keyFile, clientCAFile, minVersion string, cipherSuites []string) (*tls.Config, error) { - if certFile == "" && keyFile == "" { - if clientCAFile != "" { - return nil, fmt.Errorf("when a client CA is used a server key and certificate must also be provided") +// Convert returns a *tls.Config from the given TLSConfig. +func (tc *TLSConfig) Convert(logger log.Logger) (*tls.Config, error) { + if logger == nil { + logger = log.NewNopLogger() + } + + if !tc.Enabled { + return nil, nil + } + + if tc.CertFile == "" && tc.KeyFile == "" { + if tc.ClientCAFile != "" { + return nil, fmt.Errorf("server key and certificate must be provided when a client CA is configured") } - return nil, fmt.Errorf("TLS disabled. key and cert must be set to enable") + + // Disable TLS. + level.Warn(logger).Log("msg", "server key and certificate not provided, TLS disabled") + return nil, nil } tlsCfg := &tls.Config{} - - version, err := flag.TLSVersion(minVersion) + version, err := kflag.TLSVersion(tc.MinVersion) if err != nil { - return nil, fmt.Errorf("TLS version invalid: %w", err) + return nil, fmt.Errorf("invalid TLS version: %w", err) + } + + // Any older versions won't allow a secure connection. + switch version { + case tls.VersionTLS12: + case tls.VersionTLS13: + default: + return nil, fmt.Errorf("TLS version %q isn't supported", tls.VersionName(version)) } tlsCfg.MinVersion = version - cipherSuiteIDs, err := flag.TLSCipherSuites(cipherSuites) + cipherSuiteIDs, err := kflag.TLSCipherSuites(tc.CipherSuites.Slice()) if err != nil { - return nil, fmt.Errorf("TLS cipher suite name to ID conversion: %w", err) + return nil, fmt.Errorf("failed to convert TLS cipher suite name to ID: %w", err) } // A list of supported cipher suites for TLS versions up to TLS 1.2. @@ -65,24 +148,124 @@ func NewTLSConfig(logger log.Logger, certFile, keyFile, clientCAFile, minVersion // Note that TLS 1.3 ciphersuites are not configurable. tlsCfg.CipherSuites = cipherSuiteIDs - if clientCAFile != "" { - if info, err := os.Stat(clientCAFile); err == nil && info.Mode().IsRegular() { - caPEM, err := os.ReadFile(clientCAFile) - if err != nil { - return nil, fmt.Errorf("reading client CA: %w", err) - } + if tc.ClientCAFile == "" { + return tlsCfg, nil + } - certPool := x509.NewCertPool() - if !certPool.AppendCertsFromPEM(caPEM) { - return nil, fmt.Errorf("building client CA: %w", err) - } + info, err := os.Stat(tc.ClientCAFile) + switch { + case err != nil: + level.Warn(logger).Log("msg", "server TLS client verification disabled", "err", err, "client_ca_file", tc.ClientCAFile) - tlsCfg.ClientCAs = certPool - tlsCfg.ClientAuth = tls.RequireAndVerifyClientCert + case !info.Mode().IsRegular(): + level.Warn(logger).Log("msg", "server TLS client verification disabled", "client_ca_file", tc.ClientCAFile, "file_mode", info.Mode().String()) - level.Info(logger).Log("msg", "server TLS client verification enabled") + default: + caPEM, err := os.ReadFile(tc.ClientCAFile) + if err != nil { + return nil, fmt.Errorf("reading client CA %q: %w", tc.ClientCAFile, err) } + + certPool := x509.NewCertPool() + if !certPool.AppendCertsFromPEM(caPEM) { + return nil, fmt.Errorf("client CA %q: failed to parse certificate", tc.ClientCAFile) + } + + tlsCfg.ClientCAs = certPool + tlsCfg.ClientAuth = tls.RequireAndVerifyClientCert + level.Info(logger).Log("msg", "server TLS client verification enabled", "client_ca_file", tc.ClientCAFile) } return tlsCfg, nil } + +// Server is a web server. +type Server struct { + logger log.Logger + srv *http.Server + listener net.Listener + cfg *Config +} + +// NewServer initializes a web server with the given handler (typically an http.MuxServe). +func NewServer(logger log.Logger, c *Config, handler http.Handler) (*Server, error) { + tlsConfig, err := c.TLSConfig.Convert(logger) + if err != nil { + return nil, fmt.Errorf("failed to create TLS configuration: %w", err) + } + + listener, err := net.Listen("tcp", c.ListenAddress) + if err != nil { + return nil, err + } + + srv := &http.Server{ + Handler: handler, + TLSConfig: tlsConfig, + ReadHeaderTimeout: 30 * time.Second, + ReadTimeout: 30 * time.Second, + // use flags on standard logger to align with base logger and get consistent parsed fields form adapter: + // use shortfile flag to get proper 'caller' field (avoid being wrongly parsed/extracted from message) + // and no datetime related flag to keep 'ts' field from base logger (with controlled format) + ErrorLog: stdlog.New(log.NewStdlibAdapter(logger), "", stdlog.Lshortfile), + } + + if !c.EnableHTTP2 { + srv.TLSNextProto = make(map[string]func(*http.Server, *tls.Conn, http.Handler)) + } + + return &Server{ + logger: logger, + srv: srv, + listener: listener, + cfg: c, + }, nil +} + +// Serve starts the web server. It will block until the server is shutted down +// or an error occurs. +func (s *Server) Serve(ctx context.Context) error { + if s.srv.TLSConfig == nil { + level.Info(s.logger).Log("msg", "starting insecure server", "address", s.listener.Addr().String()) + if err := s.srv.Serve(s.listener); err != http.ErrServerClosed { + return err + } + + return nil + } + + r, err := rbacproxytls.NewCertReloader( + s.cfg.TLSConfig.CertFile, + s.cfg.TLSConfig.KeyFile, + s.cfg.TLSConfig.ReloadInterval, + ) + if err != nil { + return fmt.Errorf("failed to initialize certificate reloader: %w", err) + } + + s.srv.TLSConfig.GetCertificate = r.GetCertificate + go func() { + for { + // r.Watch will wait ReloadInterval, so this is not + // a hot loop + if err := r.Watch(ctx); err != nil { + level.Warn(s.logger).Log( + "msg", "error watching certificate reloader", + "err", err) + } + } + }() + + level.Info(s.logger).Log("msg", "starting secure server", "address", s.listener.Addr().String(), "http2", s.cfg.EnableHTTP2) + if err := s.srv.ServeTLS(s.listener, "", ""); err != http.ErrServerClosed { + return err + } + + return nil +} + +// Shutdown closes gracefully all active connections. +func (s *Server) Shutdown(ctx context.Context) error { + level.Info(s.logger).Log("msg", "shutting down web server") + return s.srv.Shutdown(ctx) +} diff --git a/pkg/server/server_test.go b/pkg/server/server_test.go index 778d325ab..f7b73d904 100644 --- a/pkg/server/server_test.go +++ b/pkg/server/server_test.go @@ -15,12 +15,101 @@ package server import ( + "crypto/tls" "testing" + + "github.com/stretchr/testify/require" + + "github.com/prometheus-operator/prometheus-operator/pkg/operator" ) -func TestNewTLSConfig(t *testing.T) { - _, err := NewTLSConfig(nil, "", "", "foo.txt", "", nil) - if err == nil { - t.Errorf("expected tls err when client CA set without key and cert files") +func TestConvertTLSConfig(t *testing.T) { + for _, tc := range []struct { + c TLSConfig + + err bool + assert func(*testing.T, *tls.Config) + }{ + { + c: TLSConfig{}, + + assert: func(t *testing.T, c *tls.Config) { + require.Nil(t, c) + }, + }, + { + c: TLSConfig{ + Enabled: true, + ClientCAFile: "ca.crt", + }, + err: true, + }, + { + c: TLSConfig{ + Enabled: true, + }, + + assert: func(t *testing.T, c *tls.Config) { + require.Nil(t, c) + }, + }, + { + c: TLSConfig{ + Enabled: true, + CertFile: "server.crt", + KeyFile: "server.key", + MinVersion: "VersionTLSXX", + }, + + err: true, + }, + { + c: TLSConfig{ + Enabled: true, + CertFile: "server.crt", + KeyFile: "server.key", + }, + + assert: func(t *testing.T, c *tls.Config) { + require.NotNil(t, c) + require.Equal(t, tls.VersionTLS12, int(c.MinVersion)) + }, + }, + { + c: TLSConfig{ + Enabled: true, + CertFile: "server.crt", + KeyFile: "server.key", + CipherSuites: operator.StringSet(map[string]struct{}{"foo": {}}), + }, + + err: true, + }, + { + c: TLSConfig{ + Enabled: true, + CertFile: "server.crt", + KeyFile: "server.key", + CipherSuites: operator.StringSet(map[string]struct{}{"TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA": {}}), + }, + + assert: func(t *testing.T, c *tls.Config) { + require.NotNil(t, c) + require.Equal(t, []uint16{tls.TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA}, c.CipherSuites) + }, + }, + } { + t.Run("", func(t *testing.T) { + c, err := tc.c.Convert(nil) + if tc.err { + require.Error(t, err) + return + } + + require.NoError(t, err) + if tc.assert != nil { + tc.assert(t, c) + } + }) } } diff --git a/pkg/thanos/operator.go b/pkg/thanos/operator.go index 53570cbb6..7c1bcf836 100644 --- a/pkg/thanos/operator.go +++ b/pkg/thanos/operator.go @@ -30,7 +30,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/version" "k8s.io/client-go/kubernetes" "k8s.io/client-go/metadata" "k8s.io/client-go/rest" @@ -76,22 +75,19 @@ type Operator struct { config Config } -// Config defines configuration parameters for the Operator. +// Config defines the operator's parameters for the Thanos controller. +// Whenever the value of one of these parameters is changed, it triggers an +// update of the managed statefulsets. type Config struct { - KubernetesVersion version.Info + LocalHost string ReloaderConfig operator.ContainerConfig ThanosDefaultBaseImage string - Namespaces operator.Namespaces Annotations operator.Map Labels operator.Map - LocalHost string - LogLevel string - LogFormat string - ThanosRulerSelector string } // New creates a new controller. -func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, logger log.Logger, r prometheus.Registerer, canReadStorageClass bool) (*Operator, error) { +func New(ctx context.Context, restConfig *rest.Config, c operator.Config, logger log.Logger, r prometheus.Registerer, canReadStorageClass bool) (*Operator, error) { client, err := kubernetes.NewForConfig(restConfig) if err != nil { return nil, fmt.Errorf("instantiating kubernetes client failed: %w", err) @@ -107,10 +103,6 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log return nil, fmt.Errorf("instantiating monitoring client failed: %w", err) } - if _, err := labels.Parse(conf.ThanosRulerSelector); err != nil { - return nil, fmt.Errorf("can not parse thanos ruler selector value: %w", err) - } - // All the metrics exposed by the controller get the controller="thanos" label. r = prometheus.WrapRegistererWith(prometheus.Labels{"controller": "thanos"}, r) @@ -124,16 +116,11 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log reconciliations: &operator.ReconciliationTracker{}, canReadStorageClass: canReadStorageClass, config: Config{ - KubernetesVersion: conf.KubernetesVersion, - ReloaderConfig: conf.ReloaderConfig, - ThanosDefaultBaseImage: conf.ThanosDefaultBaseImage, - Namespaces: conf.Namespaces, - Annotations: conf.Annotations, - Labels: conf.Labels, - LocalHost: conf.LocalHost, - LogLevel: conf.LogLevel, - LogFormat: conf.LogFormat, - ThanosRulerSelector: conf.ThanosRulerSelector, + ReloaderConfig: c.ReloaderConfig, + ThanosDefaultBaseImage: c.ThanosDefaultBaseImage, + Annotations: c.Annotations, + Labels: c.Labels, + LocalHost: c.LocalHost, }, } @@ -147,8 +134,8 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log o.cmapInfs, err = informers.NewInformersForResource( informers.NewMetadataInformerFactory( - o.config.Namespaces.ThanosRulerAllowList, - o.config.Namespaces.DenyList, + c.Namespaces.ThanosRulerAllowList, + c.Namespaces.DenyList, o.mdClient, resyncPeriod, func(options *metav1.ListOptions) { @@ -163,12 +150,12 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log o.thanosRulerInfs, err = informers.NewInformersForResource( informers.NewMonitoringInformerFactories( - o.config.Namespaces.ThanosRulerAllowList, - o.config.Namespaces.DenyList, + c.Namespaces.ThanosRulerAllowList, + c.Namespaces.DenyList, mclient, resyncPeriod, func(options *metav1.ListOptions) { - options.LabelSelector = o.config.ThanosRulerSelector + options.LabelSelector = c.ThanosRulerSelector.String() }, ), monitoringv1.SchemeGroupVersion.WithResource(monitoringv1.ThanosRulerName), @@ -185,8 +172,8 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log o.ruleInfs, err = informers.NewInformersForResource( informers.NewMonitoringInformerFactories( - o.config.Namespaces.AllowList, - o.config.Namespaces.DenyList, + c.Namespaces.AllowList, + c.Namespaces.DenyList, mclient, resyncPeriod, nil, @@ -199,8 +186,8 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log o.ssetInfs, err = informers.NewInformersForResource( informers.NewKubeInformerFactories( - o.config.Namespaces.ThanosRulerAllowList, - o.config.Namespaces.DenyList, + c.Namespaces.ThanosRulerAllowList, + c.Namespaces.DenyList, o.kclient, resyncPeriod, nil, @@ -215,11 +202,11 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log lw, privileged, err := listwatch.NewNamespaceListWatchFromClient( ctx, o.logger, - o.config.KubernetesVersion, + c.KubernetesVersion, o.kclient.CoreV1(), o.kclient.AuthorizationV1().SelfSubjectAccessReviews(), allowList, - o.config.Namespaces.DenyList) + c.Namespaces.DenyList) if err != nil { return nil, err } @@ -233,15 +220,15 @@ func New(ctx context.Context, restConfig *rest.Config, conf operator.Config, log ), nil } - o.nsRuleInf, err = newNamespaceInformer(o, o.config.Namespaces.AllowList) + o.nsRuleInf, err = newNamespaceInformer(o, c.Namespaces.AllowList) if err != nil { return nil, err } - if listwatch.IdenticalNamespaces(o.config.Namespaces.AllowList, o.config.Namespaces.ThanosRulerAllowList) { + if listwatch.IdenticalNamespaces(c.Namespaces.AllowList, c.Namespaces.ThanosRulerAllowList) { o.nsThanosRulerInf = o.nsRuleInf } else { - o.nsThanosRulerInf, err = newNamespaceInformer(o, o.config.Namespaces.ThanosRulerAllowList) + o.nsThanosRulerInf, err = newNamespaceInformer(o, c.Namespaces.ThanosRulerAllowList) if err != nil { return nil, err } diff --git a/pkg/versionutil/cli.go b/pkg/versionutil/cli.go index ac8a20ac7..fa37c7172 100644 --- a/pkg/versionutil/cli.go +++ b/pkg/versionutil/cli.go @@ -31,14 +31,14 @@ var ( // RegisterParseFlags registers and parses version related flags. func RegisterParseFlags() { - RegisterFlags() + RegisterFlags(flag.CommandLine) flag.Parse() } // RegisterFlags registers version related flags to core. -func RegisterFlags() { - flag.BoolVar(&printVer, "version", false, "Prints current version.") - flag.BoolVar(&printShort, "short-version", false, "Print just the version number.") +func RegisterFlags(fs *flag.FlagSet) { + fs.BoolVar(&printVer, "version", false, "Prints current version.") + fs.BoolVar(&printShort, "short-version", false, "Print just the version number.") } // RegisterIntoKingpinFlags registers version related flags in kingpin framework.