From 1f181edb0c425857eb068a9fe0f349be0dc37243 Mon Sep 17 00:00:00 2001 From: Eliska Romanova Date: Mon, 14 Oct 2024 14:22:58 +0200 Subject: [PATCH] Monitoring docs restructure integration --- _topic_maps/_topic_map.yml | 80 ++++++-- ...n-metrics-using-developer-perspective.adoc | 5 + .../troubleshooting.adoc | 4 +- ...ing-argo-cd-custom-resource-workloads.adoc | 4 +- ...ting-applications-with-cicd-pipelines.adoc | 2 +- .../observability/telco-observability.adoc | 13 +- getting_started/openshift-overview.adoc | 8 +- ...ing-vsphere-problem-detector-operator.adoc | 2 +- installing/overview/installing-preparing.adoc | 4 +- .../validating-an-installation.adoc | 4 +- .../machine-config-daemon-metrics.adoc | 2 +- .../creating-infrastructure-machinesets.adoc | 2 +- migrating_from_ocp_3_to_4/index.adoc | 2 +- .../planning-migration-3-4.adoc | 2 +- .../accessing-metrics-outside-cluster.adoc | 1 - ...accessing-monitoring-web-service-apis.adoc | 2 +- modules/monitoring-about-managing-alerts.adoc | 18 ++ ...onitoring-about-monitoring-dashboards.adoc | 28 +++ .../monitoring-about-querying-metrics.adoc | 19 -- ...nd-requests-for-monitoring-components.adoc | 18 +- .../monitoring-accessing-the-alerting-ui.adoc | 40 +++- ...ret-to-the-alertmanager-configuration.adoc | 124 +++++------- ...-tolerations-to-monitoring-components.adoc | 116 +++++------- ...labels-to-your-time-series-and-alerts.adoc | 122 +++++------- ...choosing-a-metrics-collection-profile.adoc | 3 + modules/monitoring-common-terms.adoc | 2 +- ...ng-configurable-monitoring-components.adoc | 99 ++++++---- ...configuring-a-persistent-volume-claim.adoc | 137 ++++++-------- ...ng-configuring-alert-routing-console.adoc} | 11 +- ...lert-routing-default-platform-alerts.adoc} | 8 +- ...rt-routing-for-user-defined-projects.adoc} | 13 +- ...t-routing-user-defined-alerts-secret.adoc} | 16 +- ...ng-configuring-external-alertmanagers.adoc | 161 +++++++--------- ...nfiguring-metrics-collection-profiles.adoc | 16 +- ...uring-pod-topology-spread-constraints.adoc | 148 ++++++--------- ...ring-configuring-remote-write-storage.adoc | 177 +++++------------- ...-attributes-in-user-defined-projects.adoc} | 0 ...rting-rules-for-user-defined-projects.adoc | 2 +- ...reating-cluster-id-labels-for-metrics.adoc | 140 +++++--------- ...creating-cluster-monitoring-configmap.adoc | 2 +- modules/monitoring-editing-silences.adoc | 33 +++- ...ert-routing-for-user-defined-projects.adoc | 22 +++ ...-remote-write-authentication-settings.adoc | 38 ++-- ...mple-remote-write-queue-configuration.adoc | 28 +-- modules/monitoring-expiring-silences.adoc | 36 ++-- ...g-detailed-information-about-a-target.adoc | 33 ++-- ...ut-alerts-silences-and-alerting-rules.adoc | 38 ++-- ...ert-routing-for-user-defined-projects.adoc | 1 - ...sion-to-monitor-user-defined-projects.adoc | 2 +- ...-monitoring-for-user-defined-projects.adoc | 11 ++ ...les-for-all-projects-in-a-single-view.adoc | 2 +- .../monitoring-maintenance-and-support.adoc | 2 +- ...rting-rules-for-user-defined-projects.adoc | 2 - ...managing-core-platform-alerting-rules.adoc | 2 +- ...-and-size-for-prometheus-metrics-data.adoc | 162 +++++----------- ...ring-monitoring-stack-in-ha-clusters.adoc} | 4 +- ...itoring-components-to-different-nodes.adoc | 118 ++++++------ ...ng-alerting-for-user-defined-projects.adoc | 2 +- ...-for-all-projects-with-mon-dashboard.adoc} | 10 +- ...-defined-projects-with-mon-dashboard.adoc} | 12 +- ...rting-rules-for-user-defined-projects.adoc | 2 +- ...nitoring-resizing-a-persistent-volume.adoc | 142 +++++++------- ...e-for-the-cluster-monitoring-operator.adoc | 8 +- ...-and-size-for-prometheus-metrics-data.adoc | 33 ++++ ...reviewing-monitoring-dashboards-admin.adoc | 8 +- ...ewing-monitoring-dashboards-developer.adoc | 13 +- ...ng-alerts-silences-and-alerting-rules.adoc | 8 +- ...-log-levels-for-monitoring-components.adoc | 124 ++++++------ ...setting-query-log-file-for-prometheus.adoc | 148 +++++++-------- modules/monitoring-silencing-alerts.adoc | 70 ++++--- ...nd-requests-for-monitoring-components.adoc | 84 ++++++--- ...-remote-write-authentication-settings.adoc | 2 +- ...ert-routing-for-user-defined-projects.adoc | 4 +- ...ng-understanding-the-monitoring-stack.adoc | 6 +- ...lectors-to-move-monitoring-components.adoc | 7 +- ...ogy-spread-constraints-for-monitoring.adoc | 16 +- .../metallb/metallb-troubleshoot-support.adoc | 2 +- .../ingress-operator.adoc | 2 +- .../configuring-sriov-operator.adoc | 5 +- ...loud-events-consumer-dev-reference-v2.adoc | 2 +- ...p-cloud-events-consumer-dev-reference.adoc | 2 +- .../distr-tracing-tempo-configuring.adoc | 2 +- .../logging_alerts/custom-logging-alerts.adoc | 6 +- .../default-logging-alerts.adoc | 5 + .../troubleshooting-logging-alerts.adoc | 5 + .../about-ocp-monitoring/_attributes | 1 + .../about-ocp-monitoring.adoc | 26 +++ .../monitoring/about-ocp-monitoring/images | 1 + .../about-ocp-monitoring/key-concepts.adoc | 131 +++++++++++++ .../monitoring/about-ocp-monitoring/modules | 1 + .../monitoring-stack-architecture.adoc | 54 ++++++ .../monitoring/about-ocp-monitoring/snippets | 1 + .../monitoring/accessing-metrics/_attributes | 1 + .../accessing-metrics-as-a-developer.adoc | 37 ++++ ...accessing-metrics-as-an-administrator.adoc | 37 ++++ ...sing-monitoring-apis-by-using-the-cli.adoc | 52 +++++ .../monitoring/accessing-metrics/images | 1 + .../monitoring/accessing-metrics/modules | 1 + .../monitoring/accessing-metrics/snippets | 1 + ...accessing-third-party-monitoring-apis.adoc | 11 +- ...on-monitoring-configuration-scenarios.adoc | 14 +- ...e-for-the-cluster-monitoring-operator.adoc | 9 +- .../_attributes | 1 + .../configuring-alerts-and-notifications.adoc | 59 ++++++ .../configuring-metrics.adoc | 43 +++++ ...nfiguring-performance-and-scalability.adoc | 93 +++++++++ .../images | 1 + .../modules | 1 + ...ing-to-configure-the-monitoring-stack.adoc | 39 ++++ .../snippets | 1 + .../storing-and-recording-data.adoc | 60 ++++++ .../configuring-the-monitoring-stack.adoc | 100 ++++++---- .../_attributes | 1 + ...figuring-alerts-and-notifications-uwm.adoc | 60 ++++++ .../configuring-metrics-uwm.adoc | 60 ++++++ ...uring-performance-and-scalability-uwm.adoc | 98 ++++++++++ .../images | 1 + .../modules | 1 + ...to-configure-the-monitoring-stack-uwm.adoc | 76 ++++++++ .../snippets | 1 + .../storing-and-recording-data-uwm.adoc | 58 ++++++ ...ert-routing-for-user-defined-projects.adoc | 12 +- ...-monitoring-for-user-defined-projects.adoc | 16 +- .../monitoring/getting-started/_attributes | 1 + .../core-platform-monitoring-first-steps.adoc | 58 ++++++ ...developer-and-non-administrator-steps.adoc | 16 ++ .../monitoring/getting-started/images | 1 + ...aintenance-and-support-for-monitoring.adoc | 28 +++ .../monitoring/getting-started/modules | 1 + .../monitoring/getting-started/snippets | 1 + .../user-workload-monitoring-first-steps.adoc | 20 ++ observability/monitoring/managing-alerts.adoc | 32 ++-- .../monitoring/managing-alerts/_attributes | 1 + .../monitoring/managing-alerts/images | 1 + .../managing-alerts-as-a-developer.adoc | 79 ++++++++ .../managing-alerts-as-an-administrator.adoc | 113 +++++++++++ .../monitoring/managing-alerts/modules | 1 + .../monitoring/managing-alerts/snippets | 1 + .../monitoring/managing-metrics.adoc | 15 +- .../monitoring/monitoring-overview.adoc | 12 +- .../reviewing-monitoring-dashboards.adoc | 31 +-- .../troubleshooting-monitoring-issues.adoc | 16 +- .../metrics-alerts-dashboards.adoc | 2 +- ...ork-observability-operator-monitoring.adoc | 2 +- ...figuring-metrics-for-monitoring-stack.adoc | 12 +- .../otel-configuring-otelcol-metrics.adoc | 2 +- observability/overview/index.adoc | 6 + .../visualizing-power-monitoring-metrics.adoc | 2 +- .../cluster-tasks.adoc | 2 +- .../configuring-alert-notifications.adoc | 4 +- rosa_architecture/index.adoc | 6 +- .../learn_more_about_openshift.adoc | 4 +- .../telco-core-ref-design-components.adoc | 2 +- .../cert-manager-monitoring.adoc | 2 +- .../serverless-admin-metrics.adoc | 2 +- .../serverless-developer-metrics.adoc | 6 +- service_mesh/v2x/ossm-observability.adoc | 2 +- .../persistent-storage-local.adoc | 2 +- .../about-remote-health-monitoring.adoc | 12 +- .../investigating-monitoring-issues.adoc | 15 +- .../virt-exposing-custom-metrics-for-vms.adoc | 8 +- virt/monitoring/virt-monitoring-overview.adoc | 5 + virt/monitoring/virt-prometheus-queries.adoc | 12 +- virt/monitoring/virt-runbooks.adoc | 9 +- .../virt-4-18-release-notes.adoc | 2 +- virt/support/virt-collecting-virt-data.adoc | 17 +- welcome/learn_more_about_openshift.adoc | 7 +- 167 files changed, 2910 insertions(+), 1645 deletions(-) create mode 100644 modules/monitoring-about-managing-alerts.adoc create mode 100644 modules/monitoring-about-monitoring-dashboards.adoc delete mode 100644 modules/monitoring-about-querying-metrics.adoc rename modules/{monitoring-configuring-alert-receivers.adoc => monitoring-configuring-alert-routing-console.adoc} (83%) rename modules/{monitoring-configuring-notifications-for-default-platform-alerts.adoc => monitoring-configuring-alert-routing-default-platform-alerts.adoc} (91%) rename modules/{monitoring-creating-alert-routing-for-user-defined-projects.adoc => monitoring-configuring-alert-routing-for-user-defined-projects.adoc} (78%) rename modules/{monitoring-configuring-notifications-for-user-defined-alerts.adoc => monitoring-configuring-alert-routing-user-defined-alerts-secret.adoc} (74%) rename modules/{monitoring-limiting-scrape-samples-in-user-defined-projects.adoc => monitoring-controlling-the-impact-of-unbound-attributes-in-user-defined-projects.adoc} (100%) create mode 100644 modules/monitoring-enabling-alert-routing-for-user-defined-projects.adoc create mode 100644 modules/monitoring-intro-enabling-monitoring-for-user-defined-projects.adoc rename modules/{monitoring-understanding-monitoring-stack-in-ha-clusters.adoc => monitoring-monitoring-stack-in-ha-clusters.adoc} (90%) rename modules/{monitoring-querying-metrics-for-all-projects-as-an-administrator.adoc => monitoring-querying-metrics-for-all-projects-with-mon-dashboard.adoc} (86%) rename modules/{monitoring-querying-metrics-for-user-defined-projects-as-a-developer.adoc => monitoring-querying-metrics-for-user-defined-projects-with-mon-dashboard.adoc} (84%) create mode 100644 modules/monitoring-retention-time-and-size-for-prometheus-metrics-data.adoc create mode 120000 observability/monitoring/about-ocp-monitoring/_attributes create mode 100644 observability/monitoring/about-ocp-monitoring/about-ocp-monitoring.adoc create mode 120000 observability/monitoring/about-ocp-monitoring/images create mode 100644 observability/monitoring/about-ocp-monitoring/key-concepts.adoc create mode 120000 observability/monitoring/about-ocp-monitoring/modules create mode 100644 observability/monitoring/about-ocp-monitoring/monitoring-stack-architecture.adoc create mode 120000 observability/monitoring/about-ocp-monitoring/snippets create mode 120000 observability/monitoring/accessing-metrics/_attributes create mode 100644 observability/monitoring/accessing-metrics/accessing-metrics-as-a-developer.adoc create mode 100644 observability/monitoring/accessing-metrics/accessing-metrics-as-an-administrator.adoc create mode 100644 observability/monitoring/accessing-metrics/accessing-monitoring-apis-by-using-the-cli.adoc create mode 120000 observability/monitoring/accessing-metrics/images create mode 120000 observability/monitoring/accessing-metrics/modules create mode 120000 observability/monitoring/accessing-metrics/snippets create mode 120000 observability/monitoring/configuring-core-platform-monitoring/_attributes create mode 100644 observability/monitoring/configuring-core-platform-monitoring/configuring-alerts-and-notifications.adoc create mode 100644 observability/monitoring/configuring-core-platform-monitoring/configuring-metrics.adoc create mode 100644 observability/monitoring/configuring-core-platform-monitoring/configuring-performance-and-scalability.adoc create mode 120000 observability/monitoring/configuring-core-platform-monitoring/images create mode 120000 observability/monitoring/configuring-core-platform-monitoring/modules create mode 100644 observability/monitoring/configuring-core-platform-monitoring/preparing-to-configure-the-monitoring-stack.adoc create mode 120000 observability/monitoring/configuring-core-platform-monitoring/snippets create mode 100644 observability/monitoring/configuring-core-platform-monitoring/storing-and-recording-data.adoc create mode 120000 observability/monitoring/configuring-user-workload-monitoring/_attributes create mode 100644 observability/monitoring/configuring-user-workload-monitoring/configuring-alerts-and-notifications-uwm.adoc create mode 100644 observability/monitoring/configuring-user-workload-monitoring/configuring-metrics-uwm.adoc create mode 100644 observability/monitoring/configuring-user-workload-monitoring/configuring-performance-and-scalability-uwm.adoc create mode 120000 observability/monitoring/configuring-user-workload-monitoring/images create mode 120000 observability/monitoring/configuring-user-workload-monitoring/modules create mode 100644 observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc create mode 120000 observability/monitoring/configuring-user-workload-monitoring/snippets create mode 100644 observability/monitoring/configuring-user-workload-monitoring/storing-and-recording-data-uwm.adoc create mode 120000 observability/monitoring/getting-started/_attributes create mode 100644 observability/monitoring/getting-started/core-platform-monitoring-first-steps.adoc create mode 100644 observability/monitoring/getting-started/developer-and-non-administrator-steps.adoc create mode 120000 observability/monitoring/getting-started/images create mode 100644 observability/monitoring/getting-started/maintenance-and-support-for-monitoring.adoc create mode 120000 observability/monitoring/getting-started/modules create mode 120000 observability/monitoring/getting-started/snippets create mode 100644 observability/monitoring/getting-started/user-workload-monitoring-first-steps.adoc create mode 120000 observability/monitoring/managing-alerts/_attributes create mode 120000 observability/monitoring/managing-alerts/images create mode 100644 observability/monitoring/managing-alerts/managing-alerts-as-a-developer.adoc create mode 100644 observability/monitoring/managing-alerts/managing-alerts-as-an-administrator.adoc create mode 120000 observability/monitoring/managing-alerts/modules create mode 120000 observability/monitoring/managing-alerts/snippets diff --git a/_topic_maps/_topic_map.yml b/_topic_maps/_topic_map.yml index db7a7f7278..d85d9a060e 100644 --- a/_topic_maps/_topic_map.yml +++ b/_topic_maps/_topic_map.yml @@ -2904,26 +2904,68 @@ Topics: Dir: monitoring Distros: openshift-enterprise,openshift-origin Topics: - - Name: Monitoring overview - File: monitoring-overview - - Name: Common monitoring configuration scenarios - File: common-monitoring-configuration-scenarios - - Name: Configuring the monitoring stack - File: configuring-the-monitoring-stack - - Name: Enabling monitoring for user-defined projects - File: enabling-monitoring-for-user-defined-projects - - Name: Enabling alert routing for user-defined projects - File: enabling-alert-routing-for-user-defined-projects - - Name: Managing metrics - File: managing-metrics + - Name: About OpenShift Container Platform monitoring + Dir: about-ocp-monitoring + Topics: + - Name: About OpenShift Container Platform monitoring + File: about-ocp-monitoring + - Name: Monitoring stack architecture + File: monitoring-stack-architecture + - Name: Key concepts + File: key-concepts + - Name: Getting started + Dir: getting-started + Topics: + - Name: Maintenance and support for monitoring + File: maintenance-and-support-for-monitoring + - Name: Core platform monitoring first steps + File: core-platform-monitoring-first-steps + - Name: User workload monitoring first steps + File: user-workload-monitoring-first-steps + - Name: Developer and non-administrator steps + File: developer-and-non-administrator-steps + - Name: Configuring core platform monitoring + Dir: configuring-core-platform-monitoring + Topics: + - Name: Preparing to configure the monitoring stack + File: preparing-to-configure-the-monitoring-stack + - Name: Configuring performance and scalability + File: configuring-performance-and-scalability + - Name: Storing and recording data + File: storing-and-recording-data + - Name: Configuring metrics + File: configuring-metrics + - Name: Configuring alerts and notifications + File: configuring-alerts-and-notifications + - Name: Configuring user workload monitoring + Dir: configuring-user-workload-monitoring + Topics: + - Name: Preparing to configure the monitoring stack + File: preparing-to-configure-the-monitoring-stack-uwm + - Name: Configuring performance and scalability + File: configuring-performance-and-scalability-uwm + - Name: Storing and recording data + File: storing-and-recording-data-uwm + - Name: Configuring metrics + File: configuring-metrics-uwm + - Name: Configuring alerts and notifications + File: configuring-alerts-and-notifications-uwm + - Name: Accessing metrics + Dir: accessing-metrics + Topics: + - Name: Accessing metrics as an administrator + File: accessing-metrics-as-an-administrator + - Name: Accessing metrics as a developer + File: accessing-metrics-as-a-developer + - Name: Accessing monitoring APIs by using the CLI + File: accessing-monitoring-apis-by-using-the-cli - Name: Managing alerts - File: managing-alerts - - Name: Reviewing monitoring dashboards - File: reviewing-monitoring-dashboards - - Name: Monitoring clusters that run on RHOSO - File: shiftstack-prometheus-configuration - - Name: Accessing monitoring APIs by using the CLI - File: accessing-third-party-monitoring-apis + Dir: managing-alerts + Topics: + - Name: Managing alerts as an administrator + File: managing-alerts-as-an-administrator + - Name: Managing alerts as a developer + File: managing-alerts-as-a-developer - Name: Troubleshooting monitoring issues File: troubleshooting-monitoring-issues - Name: Config map reference for the Cluster Monitoring Operator diff --git a/applications/odc-monitoring-project-and-application-metrics-using-developer-perspective.adoc b/applications/odc-monitoring-project-and-application-metrics-using-developer-perspective.adoc index b6a72d2720..9c143803f6 100644 --- a/applications/odc-monitoring-project-and-application-metrics-using-developer-perspective.adoc +++ b/applications/odc-monitoring-project-and-application-metrics-using-developer-perspective.adoc @@ -31,4 +31,9 @@ include::modules/odc-monitoring-your-app-vulnerabilities.adoc[leveloffset=+1] [role="_additional-resources"] [id="additional-resources-odc-monitoring-project-and-application-metrics-using-developer-perspective"] == Additional resources +ifdef::openshift-rosa,openshift-dedicated[] * xref:../observability/monitoring/monitoring-overview.adoc#monitoring-overview[Monitoring overview] +endif::openshift-rosa,openshift-dedicated[] +ifndef::openshift-rosa,openshift-dedicated[] +* xref:../observability/monitoring/about-ocp-monitoring/about-ocp-monitoring.adoc#about-ocp-monitoring[About {product-title} monitoring] +endif::openshift-rosa,openshift-dedicated[] \ No newline at end of file diff --git a/backup_and_restore/application_backup_and_restore/troubleshooting.adoc b/backup_and_restore/application_backup_and_restore/troubleshooting.adoc index b940eed326..2c6742fe33 100644 --- a/backup_and_restore/application_backup_and_restore/troubleshooting.adoc +++ b/backup_and_restore/application_backup_and_restore/troubleshooting.adoc @@ -145,14 +145,14 @@ include::modules/migration-combining-must-gather.adoc[leveloffset=+2] include::modules/oadp-monitoring.adoc[leveloffset=+1] [role="_additional-resources"] .Additional resources -* xref:../../observability/monitoring/monitoring-overview.adoc#about-openshift-monitoring[Monitoring stack] +* xref:../../observability/monitoring/about-ocp-monitoring/about-ocp-monitoring.adoc#about-ocp-monitoring[About {product-title} monitoring] include::modules/oadp-monitoring-setup.adoc[leveloffset=+2] include::modules/oadp-creating-service-monitor.adoc[leveloffset=+2] include::modules/oadp-creating-alerting-rule.adoc[leveloffset=+2] [role="_additional-resources"] .Additional resources -* xref:../../observability/monitoring/managing-alerts.adoc#managing-alerts[Managing alerts] +* xref:../../observability/monitoring/managing-alerts/managing-alerts-as-an-administrator.adoc#managing-alerts-as-an-administrator[Managing alerts as an Administrator] include::modules/oadp-list-of-metrics.adoc[leveloffset=+2] include::modules/oadp-viewing-metrics-ui.adoc[leveloffset=+2] diff --git a/cicd/gitops/monitoring-argo-cd-custom-resource-workloads.adoc b/cicd/gitops/monitoring-argo-cd-custom-resource-workloads.adoc index 0e2f7bf946..43941ea9b6 100644 --- a/cicd/gitops/monitoring-argo-cd-custom-resource-workloads.adoc +++ b/cicd/gitops/monitoring-argo-cd-custom-resource-workloads.adoc @@ -19,7 +19,7 @@ You can enable and disable the setting for monitoring Argo CD custom resource wo * {gitops-title} is installed in your cluster. * The monitoring stack is configured in your cluster in the `openshift-monitoring` project. In addition, the Argo CD instance is in a namespace that you can monitor through Prometheus. * The `kube-state-metrics` service is running in your cluster. -* Optional: If you are enabling monitoring for an Argo CD instance already present in a user-defined project, ensure that the monitoring is xref:../../observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc#enabling-monitoring-for-user-defined-projects_enabling-monitoring-for-user-defined-projects[enabled for user-defined projects] in your cluster. +* Optional: If you are enabling monitoring for an Argo CD instance already present in a user-defined project, ensure that the monitoring is xref:../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-monitoring-for-user-defined-projects-uwm_preparing-to-configure-the-monitoring-stack-uwm[enabled for user-defined projects] in your cluster. + [NOTE] ==== @@ -35,4 +35,4 @@ include::modules/gitops-disabling-monitoring-for-argo-cd-custom-resource-workloa [role="_additional-resources"] [id="additional-resources_monitoring-argo-cd-custom-resource-workloads"] == Additional resources -* xref:../../observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc#enabling-monitoring-for-user-defined-projects[Enabling monitoring for user-defined projects] +* xref:../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-monitoring-for-user-defined-projects-uwm_preparing-to-configure-the-monitoring-stack-uwm[Enabling monitoring for user-defined projects] diff --git a/cicd/pipelines/creating-applications-with-cicd-pipelines.adoc b/cicd/pipelines/creating-applications-with-cicd-pipelines.adoc index 28e698bd31..817f094f93 100644 --- a/cicd/pipelines/creating-applications-with-cicd-pipelines.adoc +++ b/cicd/pipelines/creating-applications-with-cicd-pipelines.adoc @@ -68,7 +68,7 @@ include::modules/op-enabling-monitoring-of-event-listeners-for-triggers-for-user [role="_additional-resources"] .Additional resources -* xref:../../observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc#enabling-monitoring-for-user-defined-projects[Enabling monitoring for user-defined projects] +* xref:../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-monitoring-for-user-defined-projects-uwm_preparing-to-configure-the-monitoring-stack-uwm[Enabling monitoring for user-defined projects] include::modules/op-configuring-pull-request-capabilities-in-GitHub-interceptor.adoc[leveloffset=+1] diff --git a/edge_computing/day_2_core_cnf_clusters/observability/telco-observability.adoc b/edge_computing/day_2_core_cnf_clusters/observability/telco-observability.adoc index 30b84c6480..b9a55c7769 100644 --- a/edge_computing/day_2_core_cnf_clusters/observability/telco-observability.adoc +++ b/edge_computing/day_2_core_cnf_clusters/observability/telco-observability.adoc @@ -18,16 +18,16 @@ include::modules/telco-observability-monitoring-stack.adoc[leveloffset=+1] [role="_additional-resources"] .Additional resources -* xref:../../../observability/monitoring/monitoring-overview.adoc#understanding-the-monitoring-stack_monitoring-overview[Understanding the monitoring stack] +* xref:../../../observability/monitoring/about-ocp-monitoring/about-ocp-monitoring.adoc#about-ocp-monitoring[About {product-title} monitoring] -* xref:../../../observability/monitoring/configuring-the-monitoring-stack.adoc#configuring-the-monitoring-stack[Configuring the monitoring stack] +* xref:../../../observability/monitoring/getting-started/core-platform-monitoring-first-steps.adoc#core-platform-monitoring-first-steps[Core platform monitoring first steps] include::modules/telco-observability-key-performance-metrics.adoc[leveloffset=+1] [role="_additional-resources"] .Additional resources -* xref:../../../observability/monitoring/managing-metrics.adoc#managing-metrics[Managing metrics] +* xref:../../../observability/monitoring/accessing-metrics/accessing-metrics-as-an-administrator.adoc#accessing-metrics-as-an-administrator[Accessing metrics as an administrator] * xref:../../../storage/persistent_storage/persistent_storage_local/persistent-storage-local.adoc#local-storage-install_persistent-storage-local[Persistent storage using local volumes] * xref:../../../scalability_and_performance/telco_ref_design_specs/ran/telco-ran-ref-du-crs.adoc#cluster-tuning-crs_ran-ref-design-crs[Cluster tuning reference CRs] @@ -38,7 +38,7 @@ include::modules/telco-observability-alerting.adoc[leveloffset=+1] [role="_additional-resources"] .Additional resources -* xref:../../../observability/monitoring/managing-alerts.adoc#managing-alerts[Managing alerts] +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#about-managing-alerts_key-concepts[Managing alerts] include::modules/telco-observability-workload-monitoring.adoc[leveloffset=+1] @@ -47,6 +47,7 @@ include::modules/telco-observability-workload-monitoring.adoc[leveloffset=+1] * xref:../../../rest_api/monitoring_apis/servicemonitor-monitoring-coreos-com-v1.adoc#servicemonitor-monitoring-coreos-com-v1[ServiceMonitor[monitoring.coreos.com/v1]] -* xref:../../../observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc#enabling-monitoring-for-user-defined-projects[Enabling monitoring for user-defined projects] +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-monitoring-for-user-defined-projects-uwm_preparing-to-configure-the-monitoring-stack-uwm[Enabling monitoring for user-defined projects] + +* xref:../../../observability/monitoring/managing-alerts/managing-alerts-as-an-administrator.adoc#managing-alerting-rules-for-user-defined-projects_managing-alerts-as-an-administrator[Managing alerting rules for user-defined projects] -* xref:../../../observability/monitoring/managing-alerts.adoc#managing-alerting-rules-for-user-defined-projects_managing-alerts[Managing alerting rules for user-defined projects] diff --git a/getting_started/openshift-overview.adoc b/getting_started/openshift-overview.adoc index 8072f829fa..7646eb7a72 100644 --- a/getting_started/openshift-overview.adoc +++ b/getting_started/openshift-overview.adoc @@ -106,10 +106,10 @@ be reviewed by cluster administrators and xref:../operators/admin/olm-adding-ope * **xref:../scalability_and_performance/recommended-performance-scale-practices/recommended-infrastructure-practices.adoc#scaling-cluster-monitoring-operator[Scale] and xref:../scalability_and_performance/using-node-tuning-operator.adoc#using-node-tuning-operator[tune] clusters**: Set cluster limits, tune nodes, scale cluster monitoring, and optimize networking, storage, and routes for your environment. -* **xref:../disconnected/updating/disconnected-update-osus.adoc#update-service-overview_updating-disconnected-cluster-osus[Using the OpenShift Update Service in a disconnected environement]**: Learn about installing and managing a local OpenShift Update Service for recommending {product-title} updates in disconnected environments. +* **xref:../disconnected/updating/disconnected-update-osus.adoc#update-service-overview_updating-disconnected-cluster-osus[Using the OpenShift Update Service in a disconnected environment]**: Learn about installing and managing a local OpenShift Update Service for recommending {product-title} updates in disconnected environments. -* **xref:../observability/monitoring/monitoring-overview.adoc#monitoring-overview[Monitor clusters]**: -Learn to xref:../observability/monitoring/configuring-the-monitoring-stack.adoc#configuring-the-monitoring-stack[configure the monitoring stack]. -After configuring monitoring, use the web console to access xref:../observability/monitoring/reviewing-monitoring-dashboards.adoc#reviewing-monitoring-dashboards[monitoring dashboards]. In addition to infrastructure metrics, you can also scrape and view metrics for your own services. +* **xref:../observability/monitoring/about-ocp-monitoring/about-ocp-monitoring.adoc#about-ocp-monitoring[Monitor clusters]**: +Learn to xref:../observability/monitoring/getting-started/core-platform-monitoring-first-steps.adoc#core-platform-monitoring-first-steps[configure the monitoring stack]. +After configuring monitoring, use the web console to access xref:../observability/monitoring/accessing-metrics/accessing-metrics-as-an-administrator.adoc#reviewing-monitoring-dashboards-admin_accessing-metrics-as-an-administrator[monitoring dashboards]. In addition to infrastructure metrics, you can also scrape and view metrics for your own services. * **xref:../support/remote_health_monitoring/about-remote-health-monitoring.adoc#about-remote-health-monitoring_about-remote-health-monitoring[Remote health monitoring]**: {product-title} collects anonymized aggregated information about your cluster. Using Telemetry and the Insights Operator, this data is received by Red Hat and used to improve {product-title}. You can view the xref:../support/remote_health_monitoring/showing-data-collected-by-remote-health-monitoring.adoc#showing-data-collected-by-remote-health-monitoring_showing-data-collected-by-remote-health-monitoring[data collected by remote health monitoring]. diff --git a/installing/installing_vsphere/using-vsphere-problem-detector-operator.adoc b/installing/installing_vsphere/using-vsphere-problem-detector-operator.adoc index 0fe7525dfc..5dd7ccc8fd 100644 --- a/installing/installing_vsphere/using-vsphere-problem-detector-operator.adoc +++ b/installing/installing_vsphere/using-vsphere-problem-detector-operator.adoc @@ -30,4 +30,4 @@ include::modules/vsphere-problem-detector-metrics.adoc[leveloffset=+1] [role="_additional-resources"] == Additional resources -* xref:../../observability/monitoring/monitoring-overview.adoc#monitoring-overview[Monitoring overview] +* xref:../../observability/monitoring/about-ocp-monitoring/about-ocp-monitoring.adoc#about-ocp-monitoring[About {product-title} monitoring] diff --git a/installing/overview/installing-preparing.adoc b/installing/overview/installing-preparing.adoc index 2a3919f6b0..e2f89ec81f 100644 --- a/installing/overview/installing-preparing.adoc +++ b/installing/overview/installing-preparing.adoc @@ -110,12 +110,12 @@ For a production cluster, you must configure the following integrations: * xref:../../storage/understanding-persistent-storage.adoc#understanding-persistent-storage[Persistent storage] * xref:../../authentication/understanding-identity-provider.adoc#understanding-identity-provider[An identity provider] -* xref:../../observability/monitoring/configuring-the-monitoring-stack.adoc#configuring-the-monitoring-stack[Monitoring core OpenShift Container Platform components] +* xref:../../observability/monitoring/getting-started/core-platform-monitoring-first-steps.adoc#core-platform-monitoring-first-steps[Monitoring core {product-title} components] [id="installing-preparing-cluster-for-workloads"] == Preparing your cluster for workloads -Depending on your workload needs, you might need to take extra steps before you begin deploying applications. For example, after you prepare infrastructure to support your application xref:../../cicd/builds/build-strategies.adoc#build-strategies[build strategy], you might need to make provisions for xref:../../scalability_and_performance/cnf-tuning-low-latency-nodes-with-perf-profile.adoc#cnf-low-latency-perf-profile[low-latency] workloads or to xref:../../nodes/pods/nodes-pods-secrets.adoc#nodes-pods-secrets[protect sensitive workloads]. You can also configure xref:../../observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc#enabling-monitoring-for-user-defined-projects[monitoring] for application workloads. +Depending on your workload needs, you might need to take extra steps before you begin deploying applications. For example, after you prepare infrastructure to support your application xref:../../cicd/builds/build-strategies.adoc#build-strategies[build strategy], you might need to make provisions for xref:../../scalability_and_performance/cnf-tuning-low-latency-nodes-with-perf-profile.adoc#cnf-low-latency-perf-profile[low-latency] workloads or to xref:../../nodes/pods/nodes-pods-secrets.adoc#nodes-pods-secrets[protect sensitive workloads]. You can also configure xref:../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-monitoring-for-user-defined-projects-uwm_preparing-to-configure-the-monitoring-stack-uwm[monitoring] for application workloads. If you plan to run xref:../../windows_containers/enabling-windows-container-workloads.adoc#enabling-windows-container-workloads[Windows workloads], you must enable xref:../../networking/ovn_kubernetes_network_provider/configuring-hybrid-networking.adoc#configuring-hybrid-networking[hybrid networking with OVN-Kubernetes] during the installation process; hybrid networking cannot be enabled after your cluster is installed. [id="supported-installation-methods-for-different-platforms"] diff --git a/installing/validation_and_troubleshooting/validating-an-installation.adoc b/installing/validation_and_troubleshooting/validating-an-installation.adoc index 82b541feee..27f624f90f 100644 --- a/installing/validation_and_troubleshooting/validating-an-installation.adoc +++ b/installing/validation_and_troubleshooting/validating-an-installation.adoc @@ -56,7 +56,7 @@ include::modules/checking-cluster-resource-availability-and-utilization.adoc[lev [role="_additional-resources"] .Additional resources -* See xref:../../observability/monitoring/monitoring-overview.adoc#monitoring-overview[Monitoring overview] for more information about the {product-title} monitoring stack. +* See xref:../../observability/monitoring/about-ocp-monitoring/about-ocp-monitoring.adoc#about-ocp-monitoring[About {product-title} monitoring] for more information about the {product-title} monitoring stack. //Listing alerts that are firing include::modules/listing-alerts-that-are-firing.adoc[leveloffset=+1] @@ -64,7 +64,7 @@ include::modules/listing-alerts-that-are-firing.adoc[leveloffset=+1] [role="_additional-resources"] .Additional resources -* See xref:../../observability/monitoring/managing-alerts.adoc#managing-alerts[Managing alerts] for further details about alerting in {product-title}. +* See xref:../../observability/monitoring/managing-alerts/managing-alerts-as-an-administrator.adoc#managing-alerts-as-an-administrator[Managing alerts as an Administrator] for further details about alerting in {product-title}. [id="validating-an-installation-next-steps"] == Next steps diff --git a/machine_configuration/machine-config-daemon-metrics.adoc b/machine_configuration/machine-config-daemon-metrics.adoc index f2e2c9d423..adf0316e86 100644 --- a/machine_configuration/machine-config-daemon-metrics.adoc +++ b/machine_configuration/machine-config-daemon-metrics.adoc @@ -13,7 +13,7 @@ include::modules/machine-config-daemon-metrics-understanding.adoc[leveloffset=+1 [role="_additional-resources"] .Additional resources ifndef::openshift-rosa,openshift-dedicated[] -* xref:../observability/monitoring/monitoring-overview.adoc#monitoring-overview[Monitoring overview] +* xref:../observability/monitoring/about-ocp-monitoring/about-ocp-monitoring.adoc#about-ocp-monitoring[About {product-title} monitoring] * xref:../support/gathering-cluster-data.adoc#gathering-cluster-data[Gathering data about your cluster] endif::openshift-rosa,openshift-dedicated[] ifdef::openshift-rosa,openshift-dedicated[] diff --git a/machine_management/creating-infrastructure-machinesets.adoc b/machine_management/creating-infrastructure-machinesets.adoc index 15360cdeff..33e2299eaa 100644 --- a/machine_management/creating-infrastructure-machinesets.adoc +++ b/machine_management/creating-infrastructure-machinesets.adoc @@ -129,6 +129,6 @@ include::modules/nodes-cluster-resource-override-move-infra.adoc[leveloffset=+2] [role="_additional-resources"] .Additional resources -* xref:../observability/monitoring/configuring-the-monitoring-stack.adoc#moving-monitoring-components-to-different-nodes_configuring-the-monitoring-stack[Moving monitoring components to different nodes] +* xref:../observability/monitoring/configuring-core-platform-monitoring/configuring-performance-and-scalability.adoc#moving-monitoring-components-to-different-nodes-cpm_configuring-performance-and-scalability[Moving monitoring components to different nodes] * xref:../observability/logging/scheduling_resources/logging-node-selectors.adoc#logging-node-selectors[Using node selectors to move logging resources] * xref:../observability/logging/scheduling_resources/logging-taints-tolerations.adoc#cluster-logging-logstore-tolerations_logging-taints-tolerations[Using taints and tolerations to control logging pod placement] diff --git a/migrating_from_ocp_3_to_4/index.adoc b/migrating_from_ocp_3_to_4/index.adoc index cbf073deaa..bdf42af961 100644 --- a/migrating_from_ocp_3_to_4/index.adoc +++ b/migrating_from_ocp_3_to_4/index.adoc @@ -14,7 +14,7 @@ Before migrating from {product-title} 3 to 4, you can check xref:../migrating_fr * xref:../architecture/architecture.adoc#architecture[Architecture] * xref:../architecture/architecture-installation.adoc#architecture-installation[Installation and update] -* xref:../storage/index.adoc#index[Storage], xref:../networking/understanding-networking.adoc#understanding-networking[network], xref:../observability/logging/cluster-logging.adoc#cluster-logging[logging], xref:../security/index.adoc#index[security], and xref:../observability/monitoring/monitoring-overview.adoc#monitoring-overview[monitoring considerations] +* xref:../storage/index.adoc#index[Storage], xref:../networking/understanding-networking.adoc#understanding-networking[network], xref:../observability/logging/cluster-logging.adoc#cluster-logging[logging], xref:../security/index.adoc#index[security], and xref:../observability/monitoring/about-ocp-monitoring/about-ocp-monitoring.adoc#about-ocp-monitoring[monitoring considerations] [id="mtc-3-to-4-overview-planning-network-considerations-mtc"] == Planning network considerations diff --git a/migrating_from_ocp_3_to_4/planning-migration-3-4.adoc b/migrating_from_ocp_3_to_4/planning-migration-3-4.adoc index 4e8a20e175..83522b1462 100644 --- a/migrating_from_ocp_3_to_4/planning-migration-3-4.adoc +++ b/migrating_from_ocp_3_to_4/planning-migration-3-4.adoc @@ -253,4 +253,4 @@ Review the following monitoring changes when transitioning from {product-title} The default alert that triggers to ensure the availability of the monitoring structure was called `DeadMansSwitch` in {product-title} 3.11. This was renamed to `Watchdog` in {product-title} 4. If you had PagerDuty integration set up with this alert in {product-title} 3.11, you must set up the PagerDuty integration for the `Watchdog` alert in {product-title} 4. -For more information, see xref:../observability/monitoring/managing-alerts.adoc#applying-custom-alertmanager-configuration_managing-alerts[Applying custom Alertmanager configuration]. +For more information, see xref:../observability/monitoring/configuring-core-platform-monitoring/configuring-alerts-and-notifications.adoc#configuring-alert-routing-default-platform-alerts_configuring-alerts-and-notifications[Configuring alert routing for default platform alerts]. diff --git a/modules/accessing-metrics-outside-cluster.adoc b/modules/accessing-metrics-outside-cluster.adoc index d8788c0a52..cce1c57752 100644 --- a/modules/accessing-metrics-outside-cluster.adoc +++ b/modules/accessing-metrics-outside-cluster.adoc @@ -1,6 +1,5 @@ // Module included in the following assemblies: // -// * observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc // * observability/monitoring/accessing-third-party-monitoring-apis.adoc :_mod-docs-content-type: PROCEDURE diff --git a/modules/monitoring-about-accessing-monitoring-web-service-apis.adoc b/modules/monitoring-about-accessing-monitoring-web-service-apis.adoc index 224a4601aa..16d6aaaba0 100644 --- a/modules/monitoring-about-accessing-monitoring-web-service-apis.adoc +++ b/modules/monitoring-about-accessing-monitoring-web-service-apis.adoc @@ -13,7 +13,7 @@ You can directly access web service API endpoints from the command line for the * Thanos Ruler * Thanos Querier -[NOTE] +[IMPORTANT] ==== To access Thanos Ruler and Thanos Querier service APIs, the requesting account must have `get` permission on the namespaces resource, which can be granted by binding the `cluster-monitoring-view` cluster role to the account. ==== diff --git a/modules/monitoring-about-managing-alerts.adoc b/modules/monitoring-about-managing-alerts.adoc new file mode 100644 index 0000000000..9ba92e54ed --- /dev/null +++ b/modules/monitoring-about-managing-alerts.adoc @@ -0,0 +1,18 @@ +// Module included in the following assemblies: +// +// * observability/monitoring/managing-alerts.adoc + +:_mod-docs-content-type: CONCEPT +[id="about-managing-alerts_{context}"] += Managing alerts + +In the {product-title}, the Alerting UI enables you to manage alerts, silences, and alerting rules. + +* *Alerting rules*. Alerting rules contain a set of conditions that outline a particular state within a cluster. Alerts are triggered when those conditions are true. An alerting rule can be assigned a severity that defines how the alerts are routed. +* *Alerts*. An alert is fired when the conditions defined in an alerting rule are true. Alerts provide a notification that a set of circumstances are apparent within an {product-title} cluster. +* *Silences*. A silence can be applied to an alert to prevent notifications from being sent when the conditions for an alert are true. You can mute an alert after the initial notification, while you work on resolving the issue. + +[NOTE] +==== +The alerts, silences, and alerting rules that are available in the Alerting UI relate to the projects that you have access to. For example, if you are logged in as a user with the `cluster-admin` role, you can access all alerts, silences, and alerting rules. +==== diff --git a/modules/monitoring-about-monitoring-dashboards.adoc b/modules/monitoring-about-monitoring-dashboards.adoc new file mode 100644 index 0000000000..83359b1694 --- /dev/null +++ b/modules/monitoring-about-monitoring-dashboards.adoc @@ -0,0 +1,28 @@ +// Module included in the following assemblies: +// +// * observability/monitoring/reviewing-monitoring-dashboards.adoc + +:_mod-docs-content-type: CONCEPT +[id="mon-dashboards-adm-perspective_{context}"] += Monitoring dashboards in the Administrator perspective + +Use the *Administrator* perspective to access dashboards for the core {product-title} components, including the following items: + +* API performance +* etcd +* Kubernetes compute resources +* Kubernetes network resources +* Prometheus +* USE method dashboards relating to cluster and node performance +* Node performance metrics + +.Example dashboard in the Administrator perspective +image::monitoring-dashboard-administrator.png[] + +[id="mon-dashboards-dev-perspective_{context}"] += Monitoring dashboards in the Developer perspective + +In the *Developer* perspective, you can access only the Kubernetes compute resources dashboards: + +.Example dashboard in the Developer perspective +image::observe-dashboard-developer.png[] \ No newline at end of file diff --git a/modules/monitoring-about-querying-metrics.adoc b/modules/monitoring-about-querying-metrics.adoc deleted file mode 100644 index a8e4651001..0000000000 --- a/modules/monitoring-about-querying-metrics.adoc +++ /dev/null @@ -1,19 +0,0 @@ -// Module included in the following assemblies: -// -// * observability/monitoring/managing-metrics.adoc -// * virt/support/virt-prometheus-queries.adoc - -:_mod-docs-content-type: CONCEPT -[id="about-querying-metrics_{context}"] -= Querying metrics - -The {product-title} monitoring dashboard enables you to run Prometheus Query Language (PromQL) queries to examine metrics visualized on a plot. This functionality provides information about the state of a cluster and any user-defined workloads that you are monitoring. - -ifndef::openshift-dedicated,openshift-rosa[] -As a cluster administrator, you can query metrics for all core {product-title} and user-defined projects. -endif::openshift-dedicated,openshift-rosa[] -ifdef::openshift-dedicated,openshift-rosa[] -As a `dedicated-admin`, you can query one or more namespaces at a time for metrics about user-defined projects. -endif::openshift-dedicated,openshift-rosa[] - -As a developer, you must specify a project name when querying metrics. You must have the required privileges to view metrics for the selected project. diff --git a/modules/monitoring-about-specifying-limits-and-requests-for-monitoring-components.adoc b/modules/monitoring-about-specifying-limits-and-requests-for-monitoring-components.adoc index 1131a8e800..cb1d3ff54b 100644 --- a/modules/monitoring-about-specifying-limits-and-requests-for-monitoring-components.adoc +++ b/modules/monitoring-about-specifying-limits-and-requests-for-monitoring-components.adoc @@ -3,25 +3,29 @@ // * observability/monitoring/configuring-the-monitoring-stack.adoc :_mod-docs-content-type: CONCEPT + [id="about-specifying-limits-and-requests-for-monitoring-components_{context}"] = About specifying limits and requests for monitoring components -You can configure resource limits and request settings for core platform monitoring components and for the components that monitor user-defined projects, including the following components: +You can configure resource limits and requests for the following core platform monitoring components: -* Alertmanager (for core platform monitoring and for user-defined projects) +* Alertmanager * kube-state-metrics * monitoring-plugin * node-exporter * openshift-state-metrics -* Prometheus (for core platform monitoring and for user-defined projects) +* Prometheus * Metrics Server * Prometheus Operator and its admission webhook service * Telemeter Client * Thanos Querier + +You can configure resource limits and requests for the following components that monitor user-defined projects: + +* Alertmanager +* Prometheus * Thanos Ruler -By defining resource limits, you limit a container's resource usage, which prevents the container from exceeding the specified maximum values for CPU and memory resources. - -By defining resource requests, you specify that a container can be scheduled only on a node that has enough CPU and memory resources available to match the requested resources. - +By defining the resource limits, you limit a container's resource usage, which prevents the container from exceeding the specified maximum values for CPU and memory resources. +By defining the resource requests, you specify that a container can be scheduled only on a node that has enough CPU and memory resources available to match the requested resources. \ No newline at end of file diff --git a/modules/monitoring-accessing-the-alerting-ui.adoc b/modules/monitoring-accessing-the-alerting-ui.adoc index 8ef40b41fd..8bf74e776a 100644 --- a/modules/monitoring-accessing-the-alerting-ui.adoc +++ b/modules/monitoring-accessing-the-alerting-ui.adoc @@ -4,18 +4,46 @@ // * logging/logging_alerts/log-storage-alerts.adoc :_mod-docs-content-type: PROCEDURE -[id="monitoring-accessing-the-alerting-ui_{context}"] -= Accessing the Alerting UI in the Administrator and Developer perspectives -The Alerting UI is accessible through the *Administrator* perspective and the *Developer* perspective of the {product-title} web console. +// The ultimate solution DOES NOT NEED separate IDs and titles, it is just needed for now so that the tests will not break -* In the *Administrator* perspective, go to *Observe* -> *Alerting*. The three main pages in the Alerting UI in this perspective are the *Alerts*, *Silences*, and *Alerting rules* pages. +// tag::ADM[] +[id="monitoring-accessing-the-alerting-ui-adm_{context}"] += Accessing the Alerting UI from the Administrator perspective +// end::ADM[] -//Next to the title of each of these pages is a link to the Alertmanager interface. +// tag::DEV[] +[id="monitoring-accessing-the-alerting-ui-dev_{context}"] += Accessing the Alerting UI from the Developer perspective +// end::DEV[] -* In the *Developer* perspective, go to *Observe* -> ** -> *Alerts*. In this perspective, alerts, silences, and alerting rules are all managed from the *Alerts* page. The results shown in the *Alerts* page are specific to the selected project. +// Set attributes to distinguish between cluster monitoring example (core platform monitoring - CPM) and user workload monitoring (UWM) examples + +// tag::ADM[] +:perspective: Administrator +// end::ADM[] + +// tag::DEV[] +:perspective: Developer +// end::DEV[] + +The Alerting UI is accessible through the *{perspective}* perspective of the {product-title} web console. + +// tag::ADM[] +* From the *Administrator* perspective, go to *Observe* -> *Alerting*. The three main pages in the Alerting UI in this perspective are the *Alerts*, *Silences*, and *Alerting rules* pages. +// end::ADM[] + +// tag::DEV[] +* From the *Developer* perspective, go to *Observe* and go to the *Alerts* tab. +* Select the project that you want to manage alerts for from the *Project:* list. + +In this perspective, alerts, silences, and alerting rules are all managed from the *Alerts* tab. The results shown in the *Alerts* tab are specific to the selected project. [NOTE] ==== In the *Developer* perspective, you can select from core {product-title} and user-defined projects that you have access to in the *Project: * list. However, alerts, silences, and alerting rules relating to core {product-title} projects are not displayed if you are not logged in as a cluster administrator. ==== +// end::DEV[] + +// Unset the source code block attributes just to be safe. +:!perspective: diff --git a/modules/monitoring-adding-a-secret-to-the-alertmanager-configuration.adoc b/modules/monitoring-adding-a-secret-to-the-alertmanager-configuration.adoc index 17b599cd2c..383a6c45d3 100644 --- a/modules/monitoring-adding-a-secret-to-the-alertmanager-configuration.adoc +++ b/modules/monitoring-adding-a-secret-to-the-alertmanager-configuration.adoc @@ -2,64 +2,68 @@ // // * observability/monitoring/configuring-the-monitoring-stack.adoc -:_mod-docs-content-type: PROCEDURE [id="monitoring-adding-a-secret-to-the-alertmanager-configuration_{context}"] -= Adding a secret to the Alertmanager configuration += Adding a secret to the Alertmanager configuration -ifndef::openshift-dedicated,openshift-rosa[] -You can add secrets to the Alertmanager configuration for core platform monitoring components by editing the `cluster-monitoring-config` config map in the `openshift-monitoring` project. -endif::openshift-dedicated,openshift-rosa[] -ifdef::openshift-dedicated,openshift-rosa[] -You can add secrets to the Alertmanager configuration for user-defined projects by editing the `user-workload-monitoring-config` config map in the `openshift-user-workload-monitoring` project. -endif::openshift-dedicated,openshift-rosa[] +// Set attributes to distinguish between cluster monitoring example (core platform monitoring - CPM) and user workload monitoring (UWM) examples + +// tag::CPM[] +:configmap-name: cluster-monitoring-config +:namespace-name: openshift-monitoring +:component: alertmanagerMain +// end::CPM[] +// tag::UWM[] +:configmap-name: user-workload-monitoring-config +:namespace-name: openshift-user-workload-monitoring +:component: alertmanager +// end::UWM[] + +You can add secrets to the Alertmanager configuration by editing the `{configmap-name}` config map in the `{namespace-name}` project. After you add a secret to the config map, the secret is mounted as a volume at `/etc/alertmanager/secrets/` within the `alertmanager` container for the Alertmanager pods. .Prerequisites +// tag::CPM[] +* You have access to the cluster as a user with the `cluster-admin` cluster role. +* You have created the `cluster-monitoring-config` config map. +// end::CPM[] +// tag::UWM[] ifndef::openshift-dedicated,openshift-rosa[] -* *If you are configuring core {product-title} monitoring components in the `openshift-monitoring` project*: -** You have access to the cluster as a user with the `cluster-admin` cluster role. -** You have created the `cluster-monitoring-config` config map. -** You have created the secret to be configured in Alertmanager in the `openshift-monitoring` project. -* *If you are configuring components that monitor user-defined projects*: -** You have access to the cluster as a user with the `cluster-admin` cluster role, or as a user with the `user-workload-monitoring-config-edit` role in the `openshift-user-workload-monitoring` project. -** You have created the secret to be configured in Alertmanager in the `openshift-user-workload-monitoring` project. -** A cluster administrator has enabled monitoring for user-defined projects. +* You have access to the cluster as a user with the `cluster-admin` cluster role or as a user with the `user-workload-monitoring-config-edit` role in the `openshift-user-workload-monitoring` project. +* A cluster administrator has enabled monitoring for user-defined projects. endif::openshift-dedicated,openshift-rosa[] ifdef::openshift-dedicated,openshift-rosa[] * You have access to the cluster as a user with the `dedicated-admin` role. * The `user-workload-monitoring-config` `ConfigMap` object exists. This object is created by default when the cluster is created. -* You have created the secret to be configured in Alertmanager in the `openshift-user-workload-monitoring` project. endif::openshift-dedicated,openshift-rosa[] +// end::UWM[] +* You have created the secret to be configured in Alertmanager in the `{namespace-name}` project. * You have installed the OpenShift CLI (`oc`). .Procedure -. Edit the `ConfigMap` object. -ifndef::openshift-dedicated,openshift-rosa[] -** *To add a secret configuration to Alertmanager for core platform monitoring*: -.. Edit the `cluster-monitoring-config` config map in the `openshift-monitoring` project: +. Edit the `{configmap-name}` config map in the `{namespace-name}` project: + -[source,terminal] +[source,terminal,subs="attributes+"] ---- -$ oc -n openshift-monitoring edit configmap cluster-monitoring-config +$ oc -n {namespace-name} edit configmap {configmap-name} ---- -.. Add a `secrets:` section under `data/config.yaml/alertmanagerMain` with the following configuration: +. Add a `secrets:` section under `data/config.yaml/{component}` with the following configuration: + -[source,yaml] +[source,yaml,subs="attributes+"] ---- apiVersion: v1 kind: ConfigMap metadata: - name: cluster-monitoring-config - namespace: openshift-monitoring + name: {configmap-name} + namespace: {namespace-name} data: config.yaml: | - alertmanagerMain: - secrets: <1> - - <2> + {component}: + secrets: # <1> + - # <2> - ---- <1> This section contains the secrets to be mounted into Alertmanager. The secrets must be located within the same namespace as the Alertmanager object. @@ -67,67 +71,25 @@ data: + The following sample config map settings configure Alertmanager to use two `Secret` objects named `test-secret-basic-auth` and `test-secret-api-token`: + -[source,yaml] +[source,yaml,subs="attributes+"] ---- apiVersion: v1 kind: ConfigMap metadata: - name: cluster-monitoring-config - namespace: openshift-monitoring + name: {configmap-name} + namespace: {namespace-name} data: config.yaml: | - alertmanagerMain: + {component}: secrets: - test-secret-basic-auth - test-secret-api-token ---- -** *To add a secret configuration to Alertmanager for user-defined project monitoring*: -endif::openshift-dedicated,openshift-rosa[] - -.. Edit the `user-workload-monitoring-config` config map in the `openshift-user-workload-monitoring` project: -+ -[source,terminal] ----- -$ oc -n openshift-user-workload-monitoring edit configmap user-workload-monitoring-config ----- - -.. Add a `secrets:` section under `data/config.yaml/alertmanager/secrets` with the following configuration: -+ -[source,yaml] ----- -apiVersion: v1 -kind: ConfigMap -metadata: - name: user-workload-monitoring-config - namespace: openshift-user-workload-monitoring -data: - config.yaml: | - alertmanager: - secrets: <1> - - <2> - - ----- -<1> This section contains the secrets to be mounted into Alertmanager. The secrets must be located within the same namespace as the Alertmanager object. -<2> The name of the `Secret` object that contains authentication credentials for the receiver. If you add multiple secrets, place each one on a new line. -+ -The following sample config map settings configure Alertmanager to use two `Secret` objects named `test-secret` and `test-secret-api-token`: -+ -[source,yaml] ----- -apiVersion: v1 -kind: ConfigMap -metadata: - name: user-workload-monitoring-config - namespace: openshift-user-workload-monitoring -data: - config.yaml: | - alertmanager: - enabled: true - secrets: - - test-secret - - test-api-receiver-token ----- - . Save the file to apply the changes. The new configuration is applied automatically. +// Unset the source code block attributes just to be safe. +:!configmap-name: +:!namespace-name: +:!component: + diff --git a/modules/monitoring-assigning-tolerations-to-monitoring-components.adoc b/modules/monitoring-assigning-tolerations-to-monitoring-components.adoc index 947056f69d..f93507178b 100644 --- a/modules/monitoring-assigning-tolerations-to-monitoring-components.adoc +++ b/modules/monitoring-assigning-tolerations-to-monitoring-components.adoc @@ -3,54 +3,67 @@ // * observability/monitoring/configuring-the-monitoring-stack.adoc :_mod-docs-content-type: PROCEDURE + [id="assigning-tolerations-to-monitoring-components_{context}"] = Assigning tolerations to monitoring components -ifndef::openshift-dedicated,openshift-rosa[] -You can assign tolerations to any of the monitoring stack components to enable moving them to tainted nodes. -endif::openshift-dedicated,openshift-rosa[] +// Set attributes to distinguish between cluster monitoring example (core platform monitoring - CPM) and user workload monitoring (UWM) examples. +// tag::CPM[] +:configmap-name: cluster-monitoring-config +:namespace-name: openshift-monitoring +:component: alertmanagerMain +// end::CPM[] +// tag::UWM[] +:configmap-name: user-workload-monitoring-config +:namespace-name: openshift-user-workload-monitoring +:component: thanosRuler +// end::UWM[] -ifdef::openshift-dedicated,openshift-rosa[] +// tag::CPM[] +You can assign tolerations to any of the monitoring stack components to enable moving them to tainted nodes. +// end::CPM[] + +// tag::UWM[] You can assign tolerations to the components that monitor user-defined projects, to enable moving them to tainted worker nodes. Scheduling is not permitted on control plane or infrastructure nodes. -endif::openshift-dedicated,openshift-rosa[] +// end::UWM[] .Prerequisites +// tag::CPM[] +* You have access to the cluster as a user with the `cluster-admin` cluster role. +* You have created the `cluster-monitoring-config` `ConfigMap` object. +// end::CPM[] + +// tag::UWM[] ifndef::openshift-dedicated,openshift-rosa[] -* *If you are configuring core {product-title} monitoring components*: -** You have access to the cluster as a user with the `cluster-admin` cluster role. -** You have created the `cluster-monitoring-config` `ConfigMap` object. -* *If you are configuring components that monitor user-defined projects*: -** You have access to the cluster as a user with the `cluster-admin` cluster role, or as a user with the `user-workload-monitoring-config-edit` role in the `openshift-user-workload-monitoring` project. -** A cluster administrator has enabled monitoring for user-defined projects. +* You have access to the cluster as a user with the `cluster-admin` cluster role, or as a user with the `user-workload-monitoring-config-edit` role in the `openshift-user-workload-monitoring` project. +* A cluster administrator has enabled monitoring for user-defined projects. endif::openshift-dedicated,openshift-rosa[] ifdef::openshift-dedicated,openshift-rosa[] * You have access to the cluster as a user with the `dedicated-admin` role. * The `user-workload-monitoring-config` `ConfigMap` object exists in the `openshift-user-workload-monitoring` namespace. This object is created by default when the cluster is created. endif::openshift-dedicated,openshift-rosa[] +// end::UWM[] * You have installed the OpenShift CLI (`oc`). .Procedure -. Edit the `ConfigMap` object: -ifndef::openshift-dedicated,openshift-rosa[] -** *To assign tolerations to a component that monitors core {product-title} projects*: -.. Edit the `cluster-monitoring-config` `ConfigMap` object in the `openshift-monitoring` project: +. Edit the `{configmap-name}` config map in the `{namespace-name}` project: + -[source,terminal] +[source,terminal,subs="attributes+"] ---- -$ oc -n openshift-monitoring edit configmap cluster-monitoring-config +$ oc -n {namespace-name} edit configmap {configmap-name} ---- -.. Specify `tolerations` for the component: +. Specify `tolerations` for the component: + -[source,yaml] +[source,yaml,subs="attributes+"] ---- apiVersion: v1 kind: ConfigMap metadata: - name: cluster-monitoring-config - namespace: openshift-monitoring + name: {configmap-name} + namespace: {namespace-name} data: config.yaml: | : @@ -60,64 +73,18 @@ data: + Substitute `` and `` accordingly. + -For example, `oc adm taint nodes node1 key1=value1:NoSchedule` adds a taint to `node1` with the key `key1` and the value `value1`. This prevents monitoring components from deploying pods on `node1` unless a toleration is configured for that taint. The following example configures the `alertmanagerMain` component to tolerate the example taint: +For example, `oc adm taint nodes node1 key1=value1:NoSchedule` adds a taint to `node1` with the key `key1` and the value `value1`. This prevents monitoring components from deploying pods on `node1` unless a toleration is configured for that taint. The following example configures the `{component}` component to tolerate the example taint: + -[source,yaml,subs=quotes] +[source,yaml,subs="attributes+"] ---- apiVersion: v1 kind: ConfigMap metadata: - name: cluster-monitoring-config - namespace: openshift-monitoring + name: {configmap-name} + namespace: {namespace-name} data: config.yaml: | - alertmanagerMain: - tolerations: - - key: "key1" - operator: "Equal" - value: "value1" - effect: "NoSchedule" ----- - -** *To assign tolerations to a component that monitors user-defined projects*: -endif::openshift-dedicated,openshift-rosa[] -.. Edit the `user-workload-monitoring-config` `ConfigMap` object in the `openshift-user-workload-monitoring` project: -+ -[source,terminal] ----- -$ oc -n openshift-user-workload-monitoring edit configmap user-workload-monitoring-config ----- - -.. Specify `tolerations` for the component: -+ -[source,yaml] ----- -apiVersion: v1 -kind: ConfigMap -metadata: - name: user-workload-monitoring-config - namespace: openshift-user-workload-monitoring -data: - config.yaml: | - : - tolerations: - ----- -+ -Substitute `` and `` accordingly. -+ -For example, `oc adm taint nodes node1 key1=value1:NoSchedule` adds a taint to `node1` with the key `key1` and the value `value1`. This prevents monitoring components from deploying pods on `node1` unless a toleration is configured for that taint. The following example configures the `thanosRuler` component to tolerate the example taint: -+ -[source,yaml] ----- -apiVersion: v1 -kind: ConfigMap -metadata: - name: user-workload-monitoring-config - namespace: openshift-user-workload-monitoring -data: - config.yaml: | - thanosRuler: + {component}: tolerations: - key: "key1" operator: "Equal" @@ -126,3 +93,8 @@ data: ---- . Save the file to apply the changes. The pods affected by the new configuration are automatically redeployed. + +// Unset the source code block attributes just to be safe. +:!configmap-name: +:!namespace-name: +:!component: \ No newline at end of file diff --git a/modules/monitoring-attaching-additional-labels-to-your-time-series-and-alerts.adoc b/modules/monitoring-attaching-additional-labels-to-your-time-series-and-alerts.adoc index e947f69ecc..0e52543f4a 100644 --- a/modules/monitoring-attaching-additional-labels-to-your-time-series-and-alerts.adoc +++ b/modules/monitoring-attaching-additional-labels-to-your-time-series-and-alerts.adoc @@ -3,109 +3,68 @@ // * observability/monitoring/configuring-the-monitoring-stack.adoc :_mod-docs-content-type: PROCEDURE + [id="attaching-additional-labels-to-your-time-series-and-alerts_{context}"] = Attaching additional labels to your time series and alerts +// Set attributes to distinguish between cluster monitoring example (core platform monitoring - CPM) and user workload monitoring (UWM) examples + +// tag::CPM[] +:configmap-name: cluster-monitoring-config +:namespace-name: openshift-monitoring +:component: prometheusK8s +// end::CPM[] +// tag::UWM[] +:configmap-name: user-workload-monitoring-config +:namespace-name: openshift-user-workload-monitoring +:component: prometheus +// end::UWM[] + You can attach custom labels to all time series and alerts leaving Prometheus by using the external labels feature of Prometheus. .Prerequisites +// tag::CPM[] +* You have access to the cluster as a user with the `cluster-admin` cluster role. +* You have created the `cluster-monitoring-config` `ConfigMap` object. +// end::CPM[] +// tag::UWM[] ifndef::openshift-dedicated,openshift-rosa[] -* *If you are configuring core {product-title} monitoring components*: -** You have access to the cluster as a user with the `cluster-admin` cluster role. -** You have created the `cluster-monitoring-config` `ConfigMap` object. -* *If you are configuring components that monitor user-defined projects*: -** You have access to the cluster as a user with the `cluster-admin` cluster role, or as a user with the `user-workload-monitoring-config-edit` role in the `openshift-user-workload-monitoring` project. -** A cluster administrator has enabled monitoring for user-defined projects. +* You have access to the cluster as a user with the `cluster-admin` cluster role or as a user with the `user-workload-monitoring-config-edit` role in the `openshift-user-workload-monitoring` project. +* A cluster administrator has enabled monitoring for user-defined projects. endif::openshift-dedicated,openshift-rosa[] ifdef::openshift-dedicated,openshift-rosa[] * You have access to the cluster as a user with the `dedicated-admin` role. * The `user-workload-monitoring-config` `ConfigMap` object exists. This object is created by default when the cluster is created. endif::openshift-dedicated,openshift-rosa[] +// end::UWM[] * You have installed the OpenShift CLI (`oc`). .Procedure -. Edit the `ConfigMap` object: -ifndef::openshift-dedicated,openshift-rosa[] -** *To attach custom labels to all time series and alerts leaving the Prometheus instance that monitors core {product-title} projects*: -.. Edit the `cluster-monitoring-config` `ConfigMap` object in the `openshift-monitoring` project: +. Edit the `{configmap-name}` config map in the `{namespace-name}` project: + -[source,terminal] +[source,terminal,subs="attributes+"] ---- -$ oc -n openshift-monitoring edit configmap cluster-monitoring-config +$ oc -n {namespace-name} edit configmap {configmap-name} ---- -.. Define a map of labels you want to add for every metric under `data/config.yaml`: +. Define labels you want to add for every metric under `data/config.yaml`: + -[source,yaml] +[source,yaml,subs="attributes+"] ---- apiVersion: v1 kind: ConfigMap metadata: - name: cluster-monitoring-config - namespace: openshift-monitoring + name: {configmap-name} + namespace: {namespace-name} data: config.yaml: | - prometheusK8s: + {component}: externalLabels: : # <1> ---- -+ -<1> Substitute `: ` with a map of key-value pairs where `` is a unique name for the new label and `` is its value. -+ -[WARNING] -==== -* Do not use `prometheus` or `prometheus_replica` as key names, because they are reserved and will be overwritten. - -* Do not use `cluster` or `managed_cluster` as key names. Using them can cause issues where you are unable to see data in the developer dashboards. -==== -+ -For example, to add metadata about the region and environment to all time series and alerts, use the following example: -+ -[source,yaml] ----- -apiVersion: v1 -kind: ConfigMap -metadata: - name: cluster-monitoring-config - namespace: openshift-monitoring -data: - config.yaml: | - prometheusK8s: - externalLabels: - region: eu - environment: prod ----- - -.. Save the file to apply the changes. The new configuration is applied automatically. - -** *To attach custom labels to all time series and alerts leaving the Prometheus instance that monitors user-defined projects*: -endif::openshift-dedicated,openshift-rosa[] -.. Edit the `user-workload-monitoring-config` `ConfigMap` object in the `openshift-user-workload-monitoring` project: -+ -[source,terminal] ----- -$ oc -n openshift-user-workload-monitoring edit configmap user-workload-monitoring-config ----- - -.. Define a map of labels you want to add for every metric under `data/config.yaml`: -+ -[source,yaml] ----- -apiVersion: v1 -kind: ConfigMap -metadata: - name: user-workload-monitoring-config - namespace: openshift-user-workload-monitoring -data: - config.yaml: | - prometheus: - externalLabels: - : # <1> ----- -+ -<1> Substitute `: ` with a map of key-value pairs where `` is a unique name for the new label and `` is its value. +<1> Substitute `: ` with key-value pairs where `` is a unique name for the new label and `` is its value. + [WARNING] ==== @@ -113,27 +72,34 @@ data: * Do not use `cluster` or `managed_cluster` as key names. Using them can cause issues where you are unable to see data in the developer dashboards. ==== +// tag::UWM[] + [NOTE] ==== In the `openshift-user-workload-monitoring` project, Prometheus handles metrics and Thanos Ruler handles alerting and recording rules. Setting `externalLabels` for `prometheus` in the `user-workload-monitoring-config` `ConfigMap` object will only configure external labels for metrics and not for any rules. ==== +// end::UWM[] + -For example, to add metadata about the region and environment to all time series and alerts related to user-defined projects, use the following example: +For example, to add metadata about the region and environment to all time series and alerts, use the following example: + -[source,yaml] +[source,yaml,subs="attributes+"] ---- apiVersion: v1 kind: ConfigMap metadata: - name: user-workload-monitoring-config - namespace: openshift-user-workload-monitoring + name: {configmap-name} + namespace: {namespace-name} data: config.yaml: | - prometheus: + {component}: externalLabels: region: eu environment: prod ---- -.. Save the file to apply the changes. The pods affected by the new configuration are automatically redeployed. +. Save the file to apply the changes. The pods affected by the new configuration are automatically redeployed. + +// Unset the source code block attributes just to be safe. +:!configmap-name: +:!namespace-name: +:!component: diff --git a/modules/monitoring-choosing-a-metrics-collection-profile.adoc b/modules/monitoring-choosing-a-metrics-collection-profile.adoc index 2ebf9722d2..af8199a5eb 100644 --- a/modules/monitoring-choosing-a-metrics-collection-profile.adoc +++ b/modules/monitoring-choosing-a-metrics-collection-profile.adoc @@ -6,6 +6,9 @@ [id="choosing-a-metrics-collection-profile_{context}"] = Choosing a metrics collection profile +:FeatureName: Metrics collection profile +include::snippets/technology-preview.adoc[] + To choose a metrics collection profile for core {product-title} monitoring components, edit the `cluster-monitoring-config` `ConfigMap` object. .Prerequisites diff --git a/modules/monitoring-common-terms.adoc b/modules/monitoring-common-terms.adoc index fb6bee41e9..e57dd7ac9e 100644 --- a/modules/monitoring-common-terms.adoc +++ b/modules/monitoring-common-terms.adoc @@ -3,7 +3,7 @@ // * observability/monitoring/monitoring-overview.adoc :_mod-docs-content-type: REFERENCE -[id="openshift-monitoring-common-terms_{context}"] +[id="monitoring-common-terms_{context}"] = Glossary of common terms for {product-title} monitoring This glossary defines common terms that are used in {product-title} architecture. diff --git a/modules/monitoring-configurable-monitoring-components.adoc b/modules/monitoring-configurable-monitoring-components.adoc index 173a0ad2e2..7c94c92dc0 100644 --- a/modules/monitoring-configurable-monitoring-components.adoc +++ b/modules/monitoring-configurable-monitoring-components.adoc @@ -2,53 +2,82 @@ // // * observability/monitoring/configuring-the-monitoring-stack.adoc +:_mod-docs-content-type: REFERENCE + [id="configurable-monitoring-components_{context}"] = Configurable monitoring components -This table shows the monitoring components you can configure and the keys used to specify the components in the -ifndef::openshift-dedicated,openshift-rosa[] -`cluster-monitoring-config` and -endif::openshift-dedicated,openshift-rosa[] -`user-workload-monitoring-config` `ConfigMap` objects. +// Set attributes to distinguish between cluster monitoring example (core platform monitoring - CPM) and user workload monitoring (UWM) examples. +// tag::CPM[] +:configmap-name: cluster-monitoring-config +:alertmanager: alertmanagerMain +:prometheus: prometheusK8s +:thanosname: Thanos Querier +:thanos: thanosQuerier +// end::CPM[] +// tag::UWM[] +:configmap-name: user-workload-monitoring-config +:alertmanager: alertmanager +:prometheus: prometheus +:thanosname: Thanos Ruler +:thanos: thanosRuler +// end::UWM[] +This table shows the monitoring components you can configure and the keys used to specify the components in the `{configmap-name}` config map. + +// tag::UWM[] ifdef::openshift-dedicated,openshift-rosa[] [WARNING] ==== -Do not modify the monitoring components in the `cluster-monitoring-config` `ConfigMap` object. Red Hat Site Reliability Engineers (SRE) use these components to monitor the core cluster components and Kubernetes services. +Do not modify the monitoring components in the `cluster-monitoring-config` `ConfigMap` object. Red{nbsp}Hat Site Reliability Engineers (SRE) use these components to monitor the core cluster components and Kubernetes services. ==== endif::openshift-dedicated,openshift-rosa[] +// end::UWM[] + +// tag::CPM[] +.Configurable core platform monitoring components +// end::CPM[] +// tag::UWM[] +.Configurable monitoring components for user-defined projects +// end::UWM[] +[options="header"] +|==== +|Component |{configmap-name} config map key +|Prometheus Operator |`prometheusOperator` +|Prometheus |`{prometheus}` +|Alertmanager |`{alertmanager}` +|{thanosname} | `{thanos}` +// tag::CPM[] +|kube-state-metrics |`kubeStateMetrics` +|monitoring-plugin | `monitoringPlugin` +|openshift-state-metrics |`openshiftStateMetrics` +|Telemeter Client |`telemeterClient` +|Metrics Server |`metricsServer` +// end::CPM[] +|==== ifndef::openshift-dedicated,openshift-rosa[] -.Configurable monitoring components -[options="header"] -|==== -|Component |cluster-monitoring-config config map key |user-workload-monitoring-config config map key -|Prometheus Operator |`prometheusOperator` |`prometheusOperator` -|Prometheus |`prometheusK8s` |`prometheus` -|Alertmanager |`alertmanagerMain` | `alertmanager` -|kube-state-metrics |`kubeStateMetrics` | -|monitoring-plugin | `monitoringPlugin` | -|openshift-state-metrics |`openshiftStateMetrics` | -|Telemeter Client |`telemeterClient` | -|Metrics Server |`metricsServer` | -|Thanos Querier |`thanosQuerier` | -|Thanos Ruler | |`thanosRuler` -|==== - -[NOTE] +[WARNING] ==== -The Prometheus key is called `prometheusK8s` in the `cluster-monitoring-config` `ConfigMap` object and `prometheus` in the `user-workload-monitoring-config` `ConfigMap` object. +Different configuration changes to the `ConfigMap` object result in different outcomes: + +* The pods are not redeployed. Therefore, there is no service outage. + +* The affected pods are redeployed: + +** For single-node clusters, this results in temporary service outage. + +** For multi-node clusters, because of high-availability, the affected pods are gradually rolled out and the monitoring stack remains available. + +** Configuring and resizing a persistent volume always results in a service outage, regardless of high availability. + +Each procedure that requires a change in the config map includes its expected outcome. ==== endif::openshift-dedicated,openshift-rosa[] -ifdef::openshift-dedicated,openshift-rosa[] -.Configurable monitoring components -[options="header"] -|=== -|Component |user-workload-monitoring-config config map key -|Alertmanager |`alertmanager` -|Prometheus Operator |`prometheusOperator` -|Prometheus |`prometheus` -|Thanos Ruler |`thanosRuler` -|=== -endif::openshift-dedicated,openshift-rosa[] +// Unset the source code block attributes just to be safe. +:!configmap-name: +:!alertmanager: +:!prometheus: +:!thanosname: +:!thanos: diff --git a/modules/monitoring-configuring-a-persistent-volume-claim.adoc b/modules/monitoring-configuring-a-persistent-volume-claim.adoc index 68a6a40c3d..b420f282b6 100644 --- a/modules/monitoring-configuring-a-persistent-volume-claim.adoc +++ b/modules/monitoring-configuring-a-persistent-volume-claim.adoc @@ -3,143 +3,113 @@ // * observability/monitoring/configuring-the-monitoring-stack.adoc :_mod-docs-content-type: PROCEDURE -[id="configuring-a-persistent-volume-claim_{context}"] +[id="configuring-a-persistent-volume-claim_{context}"] = Configuring a persistent volume claim +// Set attributes to distinguish between cluster monitoring example (core platform monitoring - CPM) and user workload monitoring (UWM) examples + +// tag::CPM[] +:configmap-name: cluster-monitoring-config +:namespace-name: openshift-monitoring +:component: prometheusK8s +// end::CPM[] +// tag::UWM[] +:configmap-name: user-workload-monitoring-config +:namespace-name: openshift-user-workload-monitoring +:component: thanosRuler +// end::UWM[] + To use a persistent volume (PV) for monitoring components, you must configure a persistent volume claim (PVC). .Prerequisites +// tag::CPM[] +* You have access to the cluster as a user with the `cluster-admin` cluster role. +* You have created the `cluster-monitoring-config` `ConfigMap` object. +// end::CPM[] +// tag::UWM[] ifndef::openshift-dedicated,openshift-rosa[] -* *If you are configuring core {product-title} monitoring components*: -** You have access to the cluster as a user with the `cluster-admin` cluster role. -** You have created the `cluster-monitoring-config` `ConfigMap` object. -* *If you are configuring components that monitor user-defined projects*: -** You have access to the cluster as a user with the `cluster-admin` cluster role, or as a user with the `user-workload-monitoring-config-edit` role in the `openshift-user-workload-monitoring` project. -** A cluster administrator has enabled monitoring for user-defined projects. +* You have access to the cluster as a user with the `cluster-admin` cluster role, or as a user with the `user-workload-monitoring-config-edit` role in the `openshift-user-workload-monitoring` project. +* A cluster administrator has enabled monitoring for user-defined projects. endif::openshift-dedicated,openshift-rosa[] ifdef::openshift-dedicated,openshift-rosa[] * You have access to the cluster as a user with the `dedicated-admin` role. * The `user-workload-monitoring-config` `ConfigMap` object exists. This object is created by default when the cluster is created. endif::openshift-dedicated,openshift-rosa[] +// end::UWM[] * You have installed the OpenShift CLI (`oc`). .Procedure -. Edit the `ConfigMap` object: -ifndef::openshift-dedicated,openshift-rosa[] -** *To configure a PVC for a component that monitors core {product-title} projects*: -.. Edit the `cluster-monitoring-config` `ConfigMap` object in the `openshift-monitoring` project: +. Edit the `{configmap-name}` config map in the `{namespace-name}` project: + -[source,terminal] +[source,terminal,subs="attributes+"] ---- -$ oc -n openshift-monitoring edit configmap cluster-monitoring-config +$ oc -n {namespace-name} edit configmap {configmap-name} ---- -.. Add your PVC configuration for the component under `data/config.yaml`: +. Add your PVC configuration for the component under `data/config.yaml`: + -[source,yaml] +[source,yaml,subs="attributes+"] ---- apiVersion: v1 kind: ConfigMap metadata: - name: cluster-monitoring-config - namespace: openshift-monitoring + name: {configmap-name} + namespace: {namespace-name} data: config.yaml: | - : #<1> + : # <1> volumeClaimTemplate: spec: - storageClassName: #<2> + storageClassName: # <2> resources: requests: - storage: #<3> + storage: # <3> ---- -<1> Specify the core monitoring component for which you want to configure the PVC. +<1> Specify the monitoring component for which you want to configure the PVC. <2> Specify an existing storage class. If a storage class is not specified, the default storage class is used. <3> Specify the amount of required storage. + -See the link:https://kubernetes.io/docs/concepts/storage/persistent-volumes/#persistentvolumeclaims[Kubernetes documentation on PersistentVolumeClaims] for information on how to specify `volumeClaimTemplate`. +The following example configures a PVC that claims persistent storage for +// tag::CPM[] +Prometheus: +// end::CPM[] +// tag::UWM[] +Thanos Ruler: +// end::UWM[] + -The following example configures a PVC that claims persistent storage for the Prometheus instance that monitors core {product-title} components: -+ -[source,yaml] +.Example PVC configuration +[source,yaml,subs="attributes+"] ---- apiVersion: v1 kind: ConfigMap metadata: - name: cluster-monitoring-config - namespace: openshift-monitoring + name: {configmap-name} + namespace: {namespace-name} data: config.yaml: | - prometheusK8s: + {component}: volumeClaimTemplate: spec: storageClassName: my-storage-class resources: requests: +# tag::CPM[] storage: 40Gi ----- - -** *To configure a PVC for a component that monitors user-defined projects*: -endif::openshift-dedicated,openshift-rosa[] -.. Edit the `user-workload-monitoring-config` `ConfigMap` object in the `openshift-user-workload-monitoring` project: -+ -[source,terminal] ----- -$ oc -n openshift-user-workload-monitoring edit configmap user-workload-monitoring-config ----- - -.. Add your PVC configuration for the component under `data/config.yaml`: -+ -[source,yaml] ----- -apiVersion: v1 -kind: ConfigMap -metadata: - name: user-workload-monitoring-config - namespace: openshift-user-workload-monitoring -data: - config.yaml: | - : #<1> - volumeClaimTemplate: - spec: - storageClassName: #<2> - resources: - requests: - storage: #<3> ----- -<1> Specify the component for user-defined monitoring for which you want to configure the PVC. -<2> Specify an existing storage class. If a storage class is not specified, the default storage class is used. -<3> Specify the amount of required storage. -+ -See the link:https://kubernetes.io/docs/concepts/storage/persistent-volumes/#persistentvolumeclaims[Kubernetes documentation on PersistentVolumeClaims] for information on how to specify `volumeClaimTemplate`. -+ -The following example configures a PVC that claims persistent storage for Thanos Ruler: -+ -[source,yaml] ----- -apiVersion: v1 -kind: ConfigMap -metadata: - name: user-workload-monitoring-config - namespace: openshift-user-workload-monitoring -data: - config.yaml: | - thanosRuler: - volumeClaimTemplate: - spec: - storageClassName: my-storage-class - resources: - requests: +# end::CPM[] +# tag::UWM[] storage: 10Gi +# end::UWM[] ---- +// tag::UWM[] + [NOTE] ==== Storage requirements for the `thanosRuler` component depend on the number of rules that are evaluated and how many samples each rule generates. ==== +// end::UWM[] . Save the file to apply the changes. The pods affected by the new configuration are automatically redeployed and the new storage configuration is applied. + @@ -147,3 +117,8 @@ Storage requirements for the `thanosRuler` component depend on the number of rul ==== When you update the config map with a PVC configuration, the affected `StatefulSet` object is recreated, resulting in a temporary service outage. ==== + +// Unset the source code block attributes just to be safe. +:!configmap-name: +:!namespace-name: +:!component: diff --git a/modules/monitoring-configuring-alert-receivers.adoc b/modules/monitoring-configuring-alert-routing-console.adoc similarity index 83% rename from modules/monitoring-configuring-alert-receivers.adoc rename to modules/monitoring-configuring-alert-routing-console.adoc index c8d560794b..f1b7fae98f 100644 --- a/modules/monitoring-configuring-alert-receivers.adoc +++ b/modules/monitoring-configuring-alert-routing-console.adoc @@ -4,10 +4,15 @@ // * post_installation_configuration/configuring-alert-notifications.adoc :_mod-docs-content-type: PROCEDURE -[id="configuring-alert-receivers_{context}"] -= Configuring alert receivers +[id="configuring-alert-routing-console_{context}"] += Configuring alert routing with the {product-title} web console -You can configure alert receivers to ensure that you learn about important issues with your cluster. +You can configure alert routing through the {product-title} web console to ensure that you learn about important issues with your cluster. + +[NOTE] +==== +The {product-title} web console provides fewer settings to configure alert routing than the `alertmanager-main` secret. To configure alert routing with the access to more configuration settings, see "Configuring alert routing for default platform alerts". +==== .Prerequisites diff --git a/modules/monitoring-configuring-notifications-for-default-platform-alerts.adoc b/modules/monitoring-configuring-alert-routing-default-platform-alerts.adoc similarity index 91% rename from modules/monitoring-configuring-notifications-for-default-platform-alerts.adoc rename to modules/monitoring-configuring-alert-routing-default-platform-alerts.adoc index d029f9f22b..859e6c7d2b 100644 --- a/modules/monitoring-configuring-notifications-for-default-platform-alerts.adoc +++ b/modules/monitoring-configuring-alert-routing-default-platform-alerts.adoc @@ -3,14 +3,14 @@ // * observability/monitoring/managing-alerts.adoc :_mod-docs-content-type: PROCEDURE -[id="configuring-notifications-for-default-platform-alerts_{context}"] -= Configuring notifications for default platform alerts +[id="configuring-alert-routing-default-platform-alerts_{context}"] += Configuring alert routing for default platform alerts You can configure Alertmanager to send notifications. Customize where and how Alertmanager sends notifications about default platform alerts by editing the default configuration in the `alertmanager-main` secret in the `openshift-monitoring` namespace. -[IMPORTANT] +[NOTE] ==== -Alertmanager does not send notifications by default. It is recommended to configure Alertmanager to receive notifications by setting up notifications details in the `alertmanager-main` secret configuration file. +All features of a supported version of upstream Alertmanager are also supported in an {product-title} Alertmanager configuration. To check all the configuration options of a supported version of upstream Alertmanager, see link:https://prometheus.io/docs/alerting/0.27/configuration/[Alertmanager configuration] (Prometheus documentation). ==== .Prerequisites diff --git a/modules/monitoring-creating-alert-routing-for-user-defined-projects.adoc b/modules/monitoring-configuring-alert-routing-for-user-defined-projects.adoc similarity index 78% rename from modules/monitoring-creating-alert-routing-for-user-defined-projects.adoc rename to modules/monitoring-configuring-alert-routing-for-user-defined-projects.adoc index 691f8d8e1c..3c6d6f5dde 100644 --- a/modules/monitoring-creating-alert-routing-for-user-defined-projects.adoc +++ b/modules/monitoring-configuring-alert-routing-for-user-defined-projects.adoc @@ -3,10 +3,9 @@ // * observability/monitoring/managing-alerts.adoc :_mod-docs-content-type: PROCEDURE -[id="creating-alert-routing-for-user-defined-projects_{context}"] -= Creating alert routing for user-defined projects +[id="configuring-alert-routing-for-user-defined-projects_{context}"] += Configuring alert routing for user-defined projects -[role="_abstract"] If you are a non-administrator user who has been given the `alert-routing-edit` cluster role, you can create or edit alert routing for user-defined projects. .Prerequisites @@ -43,13 +42,7 @@ spec: webhookConfigs: - url: https://example.org/post ---- -+ -[NOTE] -==== -For user-defined alerting rules, user-defined routing is scoped to the namespace in which the resource is defined. -For example, a routing configuration defined in the `AlertmanagerConfig` object for namespace `ns1` only applies to `PrometheusRules` resources in the same namespace. -==== -+ + . Save the file. . Apply the resource to the cluster: diff --git a/modules/monitoring-configuring-notifications-for-user-defined-alerts.adoc b/modules/monitoring-configuring-alert-routing-user-defined-alerts-secret.adoc similarity index 74% rename from modules/monitoring-configuring-notifications-for-user-defined-alerts.adoc rename to modules/monitoring-configuring-alert-routing-user-defined-alerts-secret.adoc index d8852a26d3..a2b4197754 100644 --- a/modules/monitoring-configuring-notifications-for-user-defined-alerts.adoc +++ b/modules/monitoring-configuring-alert-routing-user-defined-alerts-secret.adoc @@ -3,19 +3,25 @@ // * observability/monitoring/managing-alerts.adoc :_mod-docs-content-type: PROCEDURE -[id="configuring-notifications-for-user-defined-alerts_{context}"] -= Configuring notifications for user-defined alerts +[id="configuring-alert-routing-user-defined-alerts-secret_{context}"] += Configuring alert routing for user-defined projects with the Alertmanager secret If you have enabled a separate instance of Alertmanager that is dedicated to user-defined alert routing, you can customize where and how the instance sends notifications by editing the `alertmanager-user-workload` secret in the `openshift-user-workload-monitoring` namespace. +[NOTE] +==== +All features of a supported version of upstream Alertmanager are also supported in an {product-title} Alertmanager configuration. To check all the configuration options of a supported version of upstream Alertmanager, see link:https://prometheus.io/docs/alerting/0.27/configuration/[Alertmanager configuration] (Prometheus documentation). +==== + .Prerequisites +ifndef::openshift-dedicated,openshift-rosa[] +* You have access to the cluster as a user with the `cluster-admin` cluster role. +* You have enabled a separate instance of Alertmanager for user-defined alert routing. +endif::openshift-dedicated,openshift-rosa[] ifdef::openshift-rosa,openshift-dedicated[] * You have access to the cluster as a user with the `dedicated-admin` role. endif::[] -ifndef::openshift-rosa,openshift-dedicated[] -* You have access to the cluster as a user with the `cluster-admin` cluster role. -endif::[] * You have installed the OpenShift CLI (`oc`). .Procedure diff --git a/modules/monitoring-configuring-external-alertmanagers.adoc b/modules/monitoring-configuring-external-alertmanagers.adoc index c3918c134a..4b53594e87 100644 --- a/modules/monitoring-configuring-external-alertmanagers.adoc +++ b/modules/monitoring-configuring-external-alertmanagers.adoc @@ -3,79 +3,107 @@ // * observability/monitoring/configuring-the-monitoring-stack.adoc :_mod-docs-content-type: PROCEDURE + [id="monitoring-configuring-external-alertmanagers_{context}"] = Configuring external Alertmanager instances +// Set attributes to distinguish between cluster monitoring example (core platform monitoring - CPM) and user workload monitoring (UWM) examples + +// tag::CPM[] +:configmap-name: cluster-monitoring-config +:namespace-name: openshift-monitoring +:component: prometheusK8s +:component-name: Prometheus +// end::CPM[] +// tag::UWM[] +:configmap-name: user-workload-monitoring-config +:namespace-name: openshift-user-workload-monitoring +:component: thanosRuler +:component-name: Thanos Ruler +// end::UWM[] + The {product-title} monitoring stack includes a local Alertmanager instance that routes alerts from Prometheus. -ifndef::openshift-dedicated,openshift-rosa[] -You can add external Alertmanager instances to route alerts for core {product-title} projects or user-defined projects. -endif::openshift-dedicated,openshift-rosa[] -ifdef::openshift-dedicated,openshift-rosa[] + +// tag::CPM[] +You can add external Alertmanager instances to route alerts for core {product-title} projects. +// end::CPM[] +// tag::UWM[] You can add external Alertmanager instances to route alerts for user-defined projects. -endif::openshift-dedicated,openshift-rosa[] +// end::UWM[] If you add the same external Alertmanager configuration for multiple clusters and disable the local instance for each cluster, you can then manage alert routing for multiple clusters by using a single external Alertmanager instance. .Prerequisites +// tag::CPM[] +* You have access to the cluster as a user with the `cluster-admin` cluster role. +* You have created the `cluster-monitoring-config` `ConfigMap` object. +// end::CPM[] +// tag::UWM[] ifndef::openshift-dedicated,openshift-rosa[] -* *If you are configuring core {product-title} monitoring components in the `openshift-monitoring` project*: -** You have access to the cluster as a user with the `cluster-admin` cluster role. -** You have created the `cluster-monitoring-config` config map. -* *If you are configuring components that monitor user-defined projects*: -** You have access to the cluster as a user with the `cluster-admin` cluster role, or as a user with the `user-workload-monitoring-config-edit` role in the `openshift-user-workload-monitoring` project. -** A cluster administrator has enabled monitoring for user-defined projects. +* You have access to the cluster as a user with the `cluster-admin` cluster role or as a user with the `user-workload-monitoring-config-edit` role in the `openshift-user-workload-monitoring` project. +* A cluster administrator has enabled monitoring for user-defined projects. endif::openshift-dedicated,openshift-rosa[] ifdef::openshift-dedicated,openshift-rosa[] * You have access to the cluster as a user with the `dedicated-admin` role. * The `user-workload-monitoring-config` `ConfigMap` object exists. This object is created by default when the cluster is created. endif::openshift-dedicated,openshift-rosa[] +// end::UWM[] * You have installed the OpenShift CLI (`oc`). .Procedure -. Edit the `ConfigMap` object. -ifndef::openshift-dedicated,openshift-rosa[] -** *To configure additional Alertmanagers for routing alerts from core {product-title} projects*: -.. Edit the `cluster-monitoring-config` config map in the `openshift-monitoring` project: +. Edit the `{configmap-name}` config map in the `{namespace-name}` project: + -[source,terminal] +[source,terminal,subs="attributes+"] ---- -$ oc -n openshift-monitoring edit configmap cluster-monitoring-config +$ oc -n {namespace-name} edit configmap {configmap-name} ---- -.. Add an `additionalAlertmanagerConfigs:` section under `data/config.yaml/prometheusK8s`. - -.. Add the configuration details for additional Alertmanagers in this section: +. Add an `additionalAlertmanagerConfigs` section with configuration details under +// tag::CPM[] +`data/config.yaml/prometheusK8s`: +// end::CPM[] +// tag::UWM[] +`data/config.yaml/`: +// end::UWM[] + -[source,yaml] +[source,yaml,subs="attributes+"] ---- apiVersion: v1 kind: ConfigMap metadata: - name: cluster-monitoring-config - namespace: openshift-monitoring + name: {configmap-name} + namespace: {namespace-name} data: config.yaml: | +# tag::CPM[] prometheusK8s: +# end::CPM[] +# tag::UWM[] + : # <2> +# end::UWM[] additionalAlertmanagerConfigs: - - + - # <1> ---- -+ -For ``, substitute authentication and other configuration details for additional Alertmanager instances. +<1> Substitute `` with authentication and other configuration details for additional Alertmanager instances. Currently supported authentication methods are bearer token (`bearerToken`) and client TLS (`tlsConfig`). -The following sample config map configures an additional Alertmanager using a bearer token with client TLS authentication: +// tag::UWM[] +<2> Substitute `` for one of two supported external Alertmanager components: `prometheus` or `thanosRuler`. +// end::UWM[] + -[source,yaml] +The following sample config map configures an additional Alertmanager for {component-name} by using a bearer token with client TLS authentication: ++ +[source,yaml,subs="attributes+"] ---- apiVersion: v1 kind: ConfigMap metadata: - name: cluster-monitoring-config - namespace: openshift-monitoring + name: {configmap-name} + namespace: {namespace-name} data: config.yaml: | - prometheusK8s: + {component}: additionalAlertmanagerConfigs: - scheme: https pathPrefix: / @@ -99,69 +127,10 @@ data: - external-alertmanager1-remote2.com ---- -** *To configure additional Alertmanager instances for routing alerts from user-defined projects*: -endif::openshift-dedicated,openshift-rosa[] +. Save the file to apply the changes. The pods affected by the new configuration are automatically redeployed. -.. Edit the `user-workload-monitoring-config` config map in the `openshift-user-workload-monitoring` project: -+ -[source,terminal] ----- -$ oc -n openshift-user-workload-monitoring edit configmap user-workload-monitoring-config ----- - -.. Add a `/additionalAlertmanagerConfigs:` section under `data/config.yaml/`. - -.. Add the configuration details for additional Alertmanagers in this section: -+ -[source,yaml] ----- -apiVersion: v1 -kind: ConfigMap -metadata: - name: user-workload-monitoring-config - namespace: openshift-user-workload-monitoring -data: - config.yaml: | - : - additionalAlertmanagerConfigs: - - ----- -+ -For ``, substitute one of two supported external Alertmanager components: `prometheus` or `thanosRuler`. -+ -For ``, substitute authentication and other configuration details for additional Alertmanager instances. Currently supported authentication methods are bearer token (`bearerToken`) and client TLS (`tlsConfig`). The following sample config map configures an additional Alertmanager using Thanos Ruler with a bearer token and client TLS authentication: -+ -[source,yaml] ----- -apiVersion: v1 -kind: ConfigMap -metadata: - name: user-workload-monitoring-config - namespace: openshift-user-workload-monitoring -data: - config.yaml: | - thanosRuler: - additionalAlertmanagerConfigs: - - scheme: https - pathPrefix: / - timeout: "30s" - apiVersion: v1 - bearerToken: - name: alertmanager-bearer-token - key: token - tlsConfig: - key: - name: alertmanager-tls - key: tls.key - cert: - name: alertmanager-tls - key: tls.crt - ca: - name: alertmanager-tls - key: tls.ca - staticConfigs: - - external-alertmanager1-remote.com - - external-alertmanager1-remote2.com ----- - -. Save the file to apply the changes. The pods affected by the new configuration are automatically redeployed. \ No newline at end of file +// Unset the source code block attributes just to be safe. +:!configmap-name: +:!namespace-name: +:!component: +:!component-name: \ No newline at end of file diff --git a/modules/monitoring-configuring-metrics-collection-profiles.adoc b/modules/monitoring-configuring-metrics-collection-profiles.adoc index 63a30051f1..ac518fe893 100644 --- a/modules/monitoring-configuring-metrics-collection-profiles.adoc +++ b/modules/monitoring-configuring-metrics-collection-profiles.adoc @@ -4,17 +4,10 @@ :_mod-docs-content-type: CONCEPT [id="configuring-metrics-collection-profiles_{context}"] -= Configuring metrics collection profiles += About metrics collection profiles -[IMPORTANT] -==== -[subs="attributes+"] -Using a metrics collection profile is a Technology Preview feature only. Technology Preview features are not supported with Red Hat production service level agreements (SLAs) and might not be functionally complete. -Red Hat does not recommend using them in production. -These features provide early access to upcoming product features, enabling customers to test functionality and provide feedback during the development process. - -For more information about the support scope of Red Hat Technology Preview features, see link:https://access.redhat.com/support/offerings/techpreview[https://access.redhat.com/support/offerings/techpreview]. -==== +:FeatureName: Metrics collection profile +include::snippets/technology-preview.adoc[] By default, Prometheus collects metrics exposed by all default metrics targets in {product-title} components. However, you might want Prometheus to collect fewer metrics from a cluster in certain scenarios: @@ -26,9 +19,6 @@ You can use a metrics collection profile to collect either the default amount of When you collect minimal metrics data, basic monitoring features such as alerting continue to work. At the same time, the CPU and memory resources required by Prometheus decrease. -[id="about-metrics-collection-profiles_{context}"] -== About metrics collection profiles - You can enable one of two metrics collection profiles: * *full*: Prometheus collects metrics data exposed by all platform components. This setting is the default. diff --git a/modules/monitoring-configuring-pod-topology-spread-constraints.adoc b/modules/monitoring-configuring-pod-topology-spread-constraints.adoc index 812bc588a6..71ec3ea8cc 100644 --- a/modules/monitoring-configuring-pod-topology-spread-constraints.adoc +++ b/modules/monitoring-configuring-pod-topology-spread-constraints.adoc @@ -3,63 +3,76 @@ // * observability/monitoring/configuring-the-monitoring-stack.adoc :_mod-docs-content-type: PROCEDURE + [id="configuring-pod-topology-spread-constraints_{context}"] = Configuring pod topology spread constraints +// Set attributes to distinguish between cluster monitoring example (core platform monitoring - CPM) and user workload monitoring (UWM) examples + +// tag::CPM[] +:configmap-name: cluster-monitoring-config +:namespace-name: openshift-monitoring +:component: prometheusK8s +:component-name: Prometheus +:label: prometheus +// end::CPM[] +// tag::UWM[] +:configmap-name: user-workload-monitoring-config +:namespace-name: openshift-user-workload-monitoring +:component: thanosRuler +:component-name: Thanos Ruler +:label: thanos-ruler +// end::UWM[] + You can configure pod topology spread constraints for -ifndef::openshift-dedicated,openshift-rosa[] +// tag::CPM[] all the pods deployed by the {cmo-full} -endif::openshift-dedicated,openshift-rosa[] -ifdef::openshift-dedicated,openshift-rosa[] +// end::CPM[] +// tag::UWM[] all the pods for user-defined monitoring -endif::openshift-dedicated,openshift-rosa[] +// end::UWM[] to control how pod replicas are scheduled to nodes across zones. This ensures that the pods are highly available and run more efficiently, because workloads are spread across nodes in different data centers or hierarchical infrastructure zones. -You can configure pod topology spread constraints for monitoring pods by using -ifndef::openshift-dedicated,openshift-rosa[] -the `cluster-monitoring-config` or -endif::openshift-dedicated,openshift-rosa[] -the `user-workload-monitoring-config` config map. +You can configure pod topology spread constraints for monitoring pods by using the `{configmap-name}` config map. .Prerequisites +// tag::CPM[] +* You have access to the cluster as a user with the `cluster-admin` cluster role. +* You have created the `cluster-monitoring-config` `ConfigMap` object. +// end::CPM[] +// tag::UWM[] ifndef::openshift-dedicated,openshift-rosa[] -* *If you are configuring pods for core {product-title} monitoring:* -** You have access to the cluster as a user with the `cluster-admin` cluster role. -** You have created the `cluster-monitoring-config` `ConfigMap` object. -* *If you are configuring pods for user-defined monitoring:* -** You have access to the cluster as a user with the `cluster-admin` cluster role, or as a user with the `user-workload-monitoring-config-edit` role in the `openshift-user-workload-monitoring` project. -** A cluster administrator has enabled monitoring for user-defined projects. +* You have access to the cluster as a user with the `cluster-admin` cluster role or as a user with the `user-workload-monitoring-config-edit` role in the `openshift-user-workload-monitoring` project. +* A cluster administrator has enabled monitoring for user-defined projects. endif::openshift-dedicated,openshift-rosa[] + ifdef::openshift-dedicated,openshift-rosa[] * You have access to the cluster as a user with the `dedicated-admin` role. * The `user-workload-monitoring-config` `ConfigMap` object exists. This object is created by default when the cluster is created. endif::openshift-dedicated,openshift-rosa[] - +// end::UWM[] * You have installed the OpenShift CLI (`oc`). .Procedure -ifndef::openshift-dedicated,openshift-rosa[] -* *To configure pod topology spread constraints for core {product-title} monitoring:* - -. Edit the `cluster-monitoring-config` config map in the `openshift-monitoring` project: +. Edit the `{configmap-name}` config map in the `{namespace-name}` project: + -[source,terminal] +[source,terminal,subs="attributes+"] ---- -$ oc -n openshift-monitoring edit configmap cluster-monitoring-config +$ oc -n {namespace-name} edit configmap {configmap-name} ---- . Add the following settings under the `data/config.yaml` field to configure pod topology spread constraints: + -[source,yaml] +[source,yaml,subs="attributes+"] ---- apiVersion: v1 kind: ConfigMap metadata: - name: cluster-monitoring-config - namespace: openshift-monitoring + name: {configmap-name} + namespace: {namespace-name} data: config.yaml: | : # <1> @@ -82,87 +95,36 @@ Specify `ScheduleAnyway` if you want the scheduler to still schedule the pod but <5> Specify `labelSelector` to find matching pods. Pods that match this label selector are counted to determine the number of pods in their corresponding topology domain. + -.Example configuration for Prometheus -[source,yaml] +.Example configuration for {component-name} +[source,yaml,subs="attributes+"] ---- apiVersion: v1 kind: ConfigMap metadata: - name: cluster-monitoring-config - namespace: openshift-monitoring + name: {configmap-name} + namespace: {namespace-name} data: config.yaml: | - prometheusK8s: + {component}: topologySpreadConstraints: - maxSkew: 1 topologyKey: monitoring +# tag::CPM[] whenUnsatisfiable: DoNotSchedule - labelSelector: - matchLabels: - app.kubernetes.io/name: prometheus ----- - -. Save the file to apply the changes. The pods affected by the new configuration are automatically redeployed. - -* *To configure pod topology spread constraints for user-defined monitoring:* -endif::openshift-dedicated,openshift-rosa[] - -. Edit the `user-workload-monitoring-config` config map in the `openshift-user-workload-monitoring` project: -+ -[source,terminal] ----- -$ oc -n openshift-user-workload-monitoring edit configmap user-workload-monitoring-config ----- - -. Add the following settings under the `data/config.yaml` field to configure pod topology spread constraints: -+ -[source,yaml] ----- -apiVersion: v1 -kind: ConfigMap -metadata: - name: user-workload-monitoring-config - namespace: openshift-user-workload-monitoring -data: - config.yaml: | - : # <1> - topologySpreadConstraints: - - maxSkew: # <2> - topologyKey: # <3> - whenUnsatisfiable: # <4> - labelSelector: # <5> - ----- -<1> Specify a name of the component for which you want to set up pod topology spread constraints. -<2> Specify a numeric value for `maxSkew`, which defines the degree to which pods are allowed to be unevenly distributed. -<3> Specify a key of node labels for `topologyKey`. -Nodes that have a label with this key and identical values are considered to be in the same topology. -The scheduler tries to put a balanced number of pods into each domain. -<4> Specify a value for `whenUnsatisfiable`. -Available options are `DoNotSchedule` and `ScheduleAnyway`. -Specify `DoNotSchedule` if you want the `maxSkew` value to define the maximum difference allowed between the number of matching pods in the target topology and the global minimum. -Specify `ScheduleAnyway` if you want the scheduler to still schedule the pod but to give higher priority to nodes that might reduce the skew. -<5> Specify `labelSelector` to find matching pods. -Pods that match this label selector are counted to determine the number of pods in their corresponding topology domain. -+ -.Example configuration for Thanos Ruler -[source,yaml] ----- -apiVersion: v1 -kind: ConfigMap -metadata: - name: user-workload-monitoring-config - namespace: openshift-user-workload-monitoring -data: - config.yaml: | - thanosRuler: - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: monitoring +# end::CPM[] +# tag::UWM[] whenUnsatisfiable: ScheduleAnyway +# end::UWM[] labelSelector: matchLabels: - app.kubernetes.io/name: thanos-ruler + app.kubernetes.io/name: {label} ---- . Save the file to apply the changes. The pods affected by the new configuration are automatically redeployed. + +// Unset the source code block attributes just to be safe. +:!configmap-name: +:!namespace-name: +:!component: +:!component-name: +:!label: \ No newline at end of file diff --git a/modules/monitoring-configuring-remote-write-storage.adoc b/modules/monitoring-configuring-remote-write-storage.adoc index d7fbff7e71..4284c66e38 100644 --- a/modules/monitoring-configuring-remote-write-storage.adoc +++ b/modules/monitoring-configuring-remote-write-storage.adoc @@ -3,25 +3,40 @@ // * observability/monitoring/configuring-the-monitoring-stack.adoc :_mod-docs-content-type: PROCEDURE + [id="configuring-remote-write-storage_{context}"] = Configuring remote write storage +// Set attributes to distinguish between cluster monitoring example (core platform monitoring - CPM) and user workload monitoring (UWM) examples +// tag::CPM[] +:configmap-name: cluster-monitoring-config +:namespace-name: openshift-monitoring +:component: prometheusK8s +// end::CPM[] +// tag::UWM[] +:configmap-name: user-workload-monitoring-config +:namespace-name: openshift-user-workload-monitoring +:component: prometheus +// end::UWM[] + You can configure remote write storage to enable Prometheus to send ingested metrics to remote systems for long-term storage. Doing so has no impact on how or for how long Prometheus stores metrics. .Prerequisites +// tag::CPM[] +* You have access to the cluster as a user with the `cluster-admin` cluster role. +* You have created the `cluster-monitoring-config` `ConfigMap` object. +// end::CPM[] +// tag::UWM[] ifndef::openshift-dedicated,openshift-rosa[] -* *If you are configuring core {product-title} monitoring components:* -** You have access to the cluster as a user with the `cluster-admin` cluster role. -** You have created the `cluster-monitoring-config` `ConfigMap` object. -* *If you are configuring components that monitor user-defined projects:* -** You have access to the cluster as a user with the `cluster-admin` cluster role or as a user with the `user-workload-monitoring-config-edit` role in the `openshift-user-workload-monitoring` project. -** A cluster administrator has enabled monitoring for user-defined projects. +* You have access to the cluster as a user with the `cluster-admin` cluster role or as a user with the `user-workload-monitoring-config-edit` role in the `openshift-user-workload-monitoring` project. +* A cluster administrator has enabled monitoring for user-defined projects. endif::openshift-dedicated,openshift-rosa[] ifdef::openshift-dedicated,openshift-rosa[] * You have access to the cluster as a user with the `dedicated-admin` role. * The `user-workload-monitoring-config` `ConfigMap` object exists. This object is created by default when the cluster is created. endif::openshift-dedicated,openshift-rosa[] +// end::UWM[] * You have installed the OpenShift CLI (`oc`). * You have set up a remote write compatible endpoint (such as Thanos) and know the endpoint URL. See the link:https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage[Prometheus remote endpoints and storage documentation] for information about endpoints that are compatible with the remote write feature. + @@ -29,13 +44,7 @@ endif::openshift-dedicated,openshift-rosa[] ==== Red{nbsp}Hat only provides information for configuring remote write senders and does not offer guidance on configuring receiver endpoints. Customers are responsible for setting up their own endpoints that are remote-write compatible. Issues with endpoint receiver configurations are not included in Red{nbsp}Hat production support. ==== -* You have set up authentication credentials in a `Secret` object for the remote write endpoint. You must create the secret in the -ifndef::openshift-dedicated,openshift-rosa[] -same namespace as the Prometheus object for which you configure remote write: the `openshift-monitoring` namespace for default platform monitoring or the `openshift-user-workload-monitoring` namespace for user workload monitoring. -endif::openshift-dedicated,openshift-rosa[] -ifdef::openshift-dedicated,openshift-rosa[] -`openshift-user-workload-monitoring` namespace. -endif::openshift-dedicated,openshift-rosa[] +* You have set up authentication credentials in a `Secret` object for the remote write endpoint. You must create the secret in the `{namespace-name}` namespace. + [WARNING] ==== @@ -44,49 +53,46 @@ To reduce security risks, use HTTPS and authentication to send metrics to an end .Procedure -. Edit the `ConfigMap` object: -ifndef::openshift-dedicated,openshift-rosa[] -** *To configure remote write for the Prometheus instance that monitors core {product-title} projects*: -.. Edit the `cluster-monitoring-config` `ConfigMap` object in the `openshift-monitoring` project: +. Edit the `{configmap-name}` config map in the `{namespace-name}` project: + -[source,terminal] +[source,terminal,subs="attributes+"] ---- -$ oc -n openshift-monitoring edit configmap cluster-monitoring-config +$ oc -n {namespace-name} edit configmap {configmap-name} ---- -.. Add a `remoteWrite:` section under `data/config.yaml/prometheusK8s`, as shown in the following example: +. Add a `remoteWrite:` section under `data/config.yaml/{component}`, as shown in the following example: + -[source,yaml] +[source,yaml,subs="attributes+"] ---- apiVersion: v1 kind: ConfigMap metadata: - name: cluster-monitoring-config - namespace: openshift-monitoring + name: {configmap-name} + namespace: {namespace-name} data: config.yaml: | - prometheusK8s: + {component}: remoteWrite: - - url: "https://remote-write-endpoint.example.com" #<1> - #<2> + - url: "https://remote-write-endpoint.example.com" # <1> + # <2> ---- <1> The URL of the remote write endpoint. <2> The authentication method and credentials for the endpoint. Currently supported authentication methods are AWS Signature Version 4, authentication using HTTP in an `Authorization` request header, Basic authentication, OAuth 2.0, and TLS client. See _Supported remote write authentication settings_ for sample configurations of supported authentication methods. -.. Add write relabel configuration values after the authentication credentials: +. Add write relabel configuration values after the authentication credentials: + -[source,yaml] +[source,yaml,subs="attributes+"] ---- apiVersion: v1 kind: ConfigMap metadata: - name: cluster-monitoring-config - namespace: openshift-monitoring + name: {configmap-name} + namespace: {namespace-name} data: config.yaml: | - prometheusK8s: + {component}: remoteWrite: - url: "https://remote-write-endpoint.example.com" @@ -96,16 +102,16 @@ data: <1> Add configuration for metrics that you want to send to the remote endpoint. + .Example of forwarding a single metric called `my_metric` -[source,yaml] +[source,yaml,subs="attributes+"] ---- apiVersion: v1 kind: ConfigMap metadata: - name: cluster-monitoring-config - namespace: openshift-monitoring + name: {configmap-name} + namespace: {namespace-name} data: config.yaml: | - prometheusK8s: + {component}: remoteWrite: - url: "https://remote-write-endpoint.example.com" writeRelabelConfigs: @@ -115,104 +121,16 @@ data: ---- + .Example of forwarding metrics called `my_metric_1` and `my_metric_2` in `my_namespace` namespace -[source,yaml] +[source,yaml,subs="attributes+"] ---- apiVersion: v1 kind: ConfigMap metadata: - name: cluster-monitoring-config - namespace: openshift-monitoring + name: {configmap-name} + namespace: {namespace-name} data: config.yaml: | - prometheusK8s: - remoteWrite: - - url: "https://remote-write-endpoint.example.com" - writeRelabelConfigs: - - sourceLabels: [__name__,namespace] - regex: '(my_metric_1|my_metric_2);my_namespace' - action: keep ----- - -** *To configure remote write for the Prometheus instance that monitors user-defined projects*: -endif::openshift-dedicated,openshift-rosa[] -.. Edit the `user-workload-monitoring-config` `ConfigMap` object in the `openshift-user-workload-monitoring` project: -+ -[source,terminal] ----- -$ oc -n openshift-user-workload-monitoring edit configmap user-workload-monitoring-config ----- - -.. Add a `remoteWrite:` section under `data/config.yaml/prometheus`, as shown in the following example: -+ -[source,yaml] ----- -apiVersion: v1 -kind: ConfigMap -metadata: - name: user-workload-monitoring-config - namespace: openshift-user-workload-monitoring -data: - config.yaml: | - prometheus: - remoteWrite: - - url: "https://remote-write-endpoint.example.com" #<1> - #<2> ----- -<1> The URL of the remote write endpoint. -<2> The authentication method and credentials for the endpoint. -Currently supported authentication methods are AWS Signature Version 4, authentication using HTTP an `Authorization` request header, basic authentication, OAuth 2.0, and TLS client. -See _Supported remote write authentication settings_ below for sample configurations of supported authentication methods. - -.. Add write relabel configuration values after the authentication credentials: -+ -[source,yaml] ----- -apiVersion: v1 -kind: ConfigMap -metadata: - name: user-workload-monitoring-config - namespace: openshift-user-workload-monitoring -data: - config.yaml: | - prometheus: - remoteWrite: - - url: "https://remote-write-endpoint.example.com" - - writeRelabelConfigs: - - #<1> ----- -<1> Add configuration for metrics that you want to send to the remote endpoint. -+ -.Example of forwarding a single metric called `my_metric` -[source,yaml] ----- -apiVersion: v1 -kind: ConfigMap -metadata: - name: user-workload-monitoring-config - namespace: openshift-user-workload-monitoring -data: - config.yaml: | - prometheus: - remoteWrite: - - url: "https://remote-write-endpoint.example.com" - writeRelabelConfigs: - - sourceLabels: [__name__] - regex: 'my_metric' - action: keep ----- -+ -.Example of forwarding metrics called `my_metric_1` and `my_metric_2` in `my_namespace` namespace -[source,yaml] ----- -apiVersion: v1 -kind: ConfigMap -metadata: - name: user-workload-monitoring-config - namespace: openshift-user-workload-monitoring -data: - config.yaml: | - prometheus: + {component}: remoteWrite: - url: "https://remote-write-endpoint.example.com" writeRelabelConfigs: @@ -222,3 +140,8 @@ data: ---- . Save the file to apply the changes. The new configuration is applied automatically. + +// Unset the source code block attributes just to be safe. +:!configmap-name: +:!namespace-name: +:!component: \ No newline at end of file diff --git a/modules/monitoring-limiting-scrape-samples-in-user-defined-projects.adoc b/modules/monitoring-controlling-the-impact-of-unbound-attributes-in-user-defined-projects.adoc similarity index 100% rename from modules/monitoring-limiting-scrape-samples-in-user-defined-projects.adoc rename to modules/monitoring-controlling-the-impact-of-unbound-attributes-in-user-defined-projects.adoc diff --git a/modules/monitoring-creating-alerting-rules-for-user-defined-projects.adoc b/modules/monitoring-creating-alerting-rules-for-user-defined-projects.adoc index e69f2455ff..84364167cb 100644 --- a/modules/monitoring-creating-alerting-rules-for-user-defined-projects.adoc +++ b/modules/monitoring-creating-alerting-rules-for-user-defined-projects.adoc @@ -16,7 +16,7 @@ To help users understand the impact and cause of the alert, ensure that your ale .Prerequisites * You have enabled monitoring for user-defined projects. -* You are logged in as a user that has the `monitoring-rules-edit` cluster role for the project where you want to create an alerting rule. +* You are logged in as a cluster administrator or as a user that has the `monitoring-rules-edit` cluster role for the project where you want to create an alerting rule. * You have installed the OpenShift CLI (`oc`). .Procedure diff --git a/modules/monitoring-creating-cluster-id-labels-for-metrics.adoc b/modules/monitoring-creating-cluster-id-labels-for-metrics.adoc index b4e4aed3a1..1a180301b4 100644 --- a/modules/monitoring-creating-cluster-id-labels-for-metrics.adoc +++ b/modules/monitoring-creating-cluster-id-labels-for-metrics.adoc @@ -3,98 +3,103 @@ // * observability/monitoring/configuring-the-monitoring-stack.adoc :_mod-docs-content-type: PROCEDURE + [id="creating-cluster-id-labels-for-metrics_{context}"] = Creating cluster ID labels for metrics +// Set attributes to distinguish between cluster monitoring example (core platform monitoring - CPM) and user workload monitoring (UWM) examples +// tag::CPM[] +:configmap-name: cluster-monitoring-config +:namespace-name: openshift-monitoring +:component: prometheusK8s +// end::CPM[] +// tag::UWM[] +:configmap-name: user-workload-monitoring-config +:namespace-name: openshift-user-workload-monitoring +:component: prometheus +// end::UWM[] + +You can create cluster ID labels for metrics by adding the `write_relabel` settings for remote write storage in the `{configmap-name}` config map in the `{namespace-name}` namespace. + ifndef::openshift-dedicated,openshift-rosa[] -You can create cluster ID labels for metrics for default platform monitoring and for user workload monitoring. - -For default platform monitoring, you add cluster ID labels for metrics in the `write_relabel` settings for remote write storage in the `cluster-monitoring-config` config map in the `openshift-monitoring` namespace. - -For user workload monitoring, you edit the settings in the `user-workload-monitoring-config` config map in the `openshift-user-workload-monitoring` namespace. - +// tag::UWM[] [NOTE] ==== When Prometheus scrapes user workload targets that expose a `namespace` label, the system stores this label as `exported_namespace`. This behavior ensures that the final namespace label value is equal to the namespace of the target pod. You cannot override this default configuration by setting the value of the `honorLabels` field to `true` for `PodMonitor` or `ServiceMonitor` objects. ==== - -endif::openshift-dedicated,openshift-rosa[] - -ifdef::openshift-dedicated,openshift-rosa[] -You can create cluster ID labels for metrics by editing the settings in the `user-workload-monitoring-config` config map in the `openshift-user-workload-monitoring` namespace. +// end::UWM[] endif::openshift-dedicated,openshift-rosa[] .Prerequisites +// tag::CPM[] +* You have access to the cluster as a user with the `cluster-admin` cluster role. +* You have created the `cluster-monitoring-config` `ConfigMap` object. +// end::CPM[] +// tag::UWM[] ifndef::openshift-dedicated,openshift-rosa[] -* *If you are configuring default platform monitoring components:* -** You have access to the cluster as a user with the `cluster-admin` cluster role. -** You have created the `cluster-monitoring-config` `ConfigMap` object. -* *If you are configuring components that monitor user-defined projects:* -** You have access to the cluster as a user with the `cluster-admin` cluster role or as a user with the `user-workload-monitoring-config-edit` role in the `openshift-user-workload-monitoring` project. -** A cluster administrator has enabled monitoring for user-defined projects. +* You have access to the cluster as a user with the `cluster-admin` cluster role, or as a user with the `user-workload-monitoring-config-edit` role in the `openshift-user-workload-monitoring` project. +* A cluster administrator has enabled monitoring for user-defined projects. endif::openshift-dedicated,openshift-rosa[] ifdef::openshift-dedicated,openshift-rosa[] * You have access to the cluster as a user with the `dedicated-admin` role. * The `user-workload-monitoring-config` ConfigMap object exists. This object is created by default when the cluster is created. endif::openshift-dedicated,openshift-rosa[] +// end::UWM[] * You have installed the OpenShift CLI (`oc`). * You have configured remote write storage. .Procedure -. Edit the `ConfigMap` object: -ifndef::openshift-dedicated,openshift-rosa[] -** *To create cluster ID labels for core {product-title} metrics:* -.. Edit the `cluster-monitoring-config` `ConfigMap` object in the `openshift-monitoring` project: +. Edit the `{configmap-name}` config map in the `{namespace-name}` project: + -[source,terminal] +[source,terminal,subs="attributes+"] ---- -$ oc -n openshift-monitoring edit configmap cluster-monitoring-config +$ oc -n {namespace-name} edit configmap {configmap-name} ---- -.. In the `writeRelabelConfigs:` section under `data/config.yaml/prometheusK8s/remoteWrite`, add cluster ID relabel configuration values: +. In the `writeRelabelConfigs:` section under `data/config.yaml/{component}/remoteWrite`, add cluster ID relabel configuration values: + -[source,yaml] +[source,yaml,subs="attributes+"] ---- apiVersion: v1 kind: ConfigMap metadata: - name: cluster-monitoring-config - namespace: openshift-monitoring + name: {configmap-name} + namespace: {namespace-name} data: config.yaml: | - prometheusK8s: + {component}: remoteWrite: - url: "https://remote-write-endpoint.example.com" - writeRelabelConfigs: <1> - - <2> + writeRelabelConfigs: # <1> + - # <2> ---- <1> Add a list of write relabel configurations for metrics that you want to send to the remote endpoint. <2> Substitute the label configuration for the metrics sent to the remote write endpoint. + -The following sample shows how to forward a metric with the cluster ID label `cluster_id` in default platform monitoring: +The following sample shows how to forward a metric with the cluster ID label `cluster_id`: + -[source,yaml] +[source,yaml,subs="attributes+"] ---- apiVersion: v1 kind: ConfigMap metadata: - name: cluster-monitoring-config - namespace: openshift-monitoring + name: {configmap-name} + namespace: {namespace-name} data: config.yaml: | - prometheusK8s: + {component}: remoteWrite: - url: "https://remote-write-endpoint.example.com" writeRelabelConfigs: - sourceLabels: - - __tmp_openshift_cluster_id__ <1> - targetLabel: cluster_id <2> - action: replace <3> + - __tmp_openshift_cluster_id__ # <1> + targetLabel: cluster_id # <2> + action: replace # <3> ---- <1> The system initially applies a temporary cluster ID source label named `+++__tmp_openshift_cluster_id__+++`. This temporary label gets replaced by the cluster ID label name that you specify. <2> Specify the name of the cluster ID label for metrics sent to remote write storage. @@ -103,58 +108,9 @@ For the label name, do not use `+++__tmp_openshift_cluster_id__+++`. The final r <3> The `replace` write relabel action replaces the temporary label with the target label for outgoing metrics. This action is the default and is applied if no action is specified. -** *To create cluster ID labels for user-defined project metrics:* -endif::openshift-dedicated,openshift-rosa[] -.. Edit the `user-workload-monitoring-config` `ConfigMap` object in the `openshift-user-workload-monitoring` project: -+ -[source,terminal] ----- -$ oc -n openshift-user-workload-monitoring edit configmap user-workload-monitoring-config ----- - -.. In the `writeRelabelConfigs:` section under `data/config.yaml/prometheus/remoteWrite`, add cluster ID relabel configuration values: -+ -[source,yaml] ----- -apiVersion: v1 -kind: ConfigMap -metadata: - name: user-workload-monitoring-config - namespace: openshift-user-workload-monitoring -data: - config.yaml: | - prometheus: - remoteWrite: - - url: "https://remote-write-endpoint.example.com" - - writeRelabelConfigs: <1> - - <2> ----- -<1> Add a list of write relabel configurations for metrics that you want to send to the remote endpoint. -<2> Substitute the label configuration for the metrics sent to the remote write endpoint. -+ -The following sample shows how to forward a metric with the cluster ID label `cluster_id` in user-workload monitoring: -+ -[source,yaml] ----- -apiVersion: v1 -kind: ConfigMap -metadata: - name: user-workload-monitoring-config - namespace: openshift-user-workload-monitoring -data: - config.yaml: | - prometheus: - remoteWrite: - - url: "https://remote-write-endpoint.example.com" - writeRelabelConfigs: - - sourceLabels: - - __tmp_openshift_cluster_id__ <1> - targetLabel: cluster_id <2> - action: replace <3> ----- -<1> The system initially applies a temporary cluster ID source label named `+++__tmp_openshift_cluster_id__+++`. This temporary label gets replaced by the cluster ID label name that you specify. -<2> Specify the name of the cluster ID label for metrics sent to remote write storage. If you use a label name that already exists for a metric, that value is overwritten with the name of this cluster ID label. For the label name, do not use `+++__tmp_openshift_cluster_id__+++`. The final relabeling step removes labels that use this name. -<3> The `replace` write relabel action replaces the temporary label with the target label for outgoing metrics. This action is the default and is applied if no action is specified. - . Save the file to apply the changes. The new configuration is applied automatically. + +// Unset the source code block attributes just to be safe. +:!configmap-name: +:!namespace-name: +:!component: diff --git a/modules/monitoring-creating-cluster-monitoring-configmap.adoc b/modules/monitoring-creating-cluster-monitoring-configmap.adoc index 053fc78929..ace2e6bafb 100644 --- a/modules/monitoring-creating-cluster-monitoring-configmap.adoc +++ b/modules/monitoring-creating-cluster-monitoring-configmap.adoc @@ -6,7 +6,7 @@ [id="creating-cluster-monitoring-configmap_{context}"] = Creating a cluster monitoring config map -You can configure the core {product-title} monitoring components by creating the `cluster-monitoring-config` `ConfigMap` object in the `openshift-monitoring` project. The {cmo-first} then configures the core components of the monitoring stack. +You can configure the core {product-title} monitoring components by creating and updating the `cluster-monitoring-config` config map in the `openshift-monitoring` project. The {cmo-first} then configures the core components of the monitoring stack. .Prerequisites diff --git a/modules/monitoring-editing-silences.adoc b/modules/monitoring-editing-silences.adoc index be7588044b..c36592bf7f 100644 --- a/modules/monitoring-editing-silences.adoc +++ b/modules/monitoring-editing-silences.adoc @@ -3,8 +3,18 @@ // * observability/monitoring/managing-alerts.adoc :_mod-docs-content-type: PROCEDURE -[id="editing-silences_{context}"] -= Editing silences + +// The ultimate solution DOES NOT NEED separate IDs and titles, it is just needed for now so that the tests will not break + +// tag::ADM[] +[id="editing-silences-adm_{context}"] += Editing silences from the Administrator perspective +// end::ADM[] + +// tag::DEV[] +[id="editing-silences-dev_{context}"] += Editing silences from the Developer perspective +// end::DEV[] You can edit a silence, which expires the existing silence and creates a new one with the changed configuration. @@ -18,14 +28,23 @@ ifdef::openshift-dedicated,openshift-rosa[] endif::openshift-dedicated,openshift-rosa[] * If you are a non-administrator user, you have access to the cluster as a user with the following user roles: ** The `cluster-monitoring-view` cluster role, which allows you to access Alertmanager. +// tag::ADM[] ** The `monitoring-alertmanager-edit` role, which permits you to create and silence alerts in the *Administrator* perspective in the web console. +// end::ADM[] +// tag::DEV[] ** The `monitoring-rules-edit` cluster role, which permits you to create and silence alerts in the *Developer* perspective in the web console. +// end::DEV[] .Procedure -To edit a silence in the *Administrator* perspective: +// tag::ADM[] +. From the *Administrator* perspective of the {product-title} web console, go to *Observe* -> *Alerting* -> *Silences*. +// end::ADM[] -. Go to *Observe* -> *Alerting* -> *Silences*. +// tag::DEV[] +. From the *Developer* perspective of the {product-title} web console, go to *Observe* and go to the *Silences* tab. +. Select the project that you want to edit silences for from the *Project:* list. +// end::DEV[] . For the silence you want to modify, click {kebab} and select *Edit silence*. + @@ -33,13 +52,7 @@ Alternatively, you can click *Actions* and select *Edit silence* on the *Silence . On the *Edit silence* page, make changes and click *Silence*. Doing so expires the existing silence and creates one with the updated configuration. -To edit a silence in the *Developer* perspective: -. Go to *Observe* -> ** -> *Silences*. -. For the silence you want to modify, click {kebab} and select *Edit silence*. -+ -Alternatively, you can click *Actions* and select *Edit silence* on the *Silence details* page for a silence. -. On the *Edit silence* page, make changes and click *Silence*. Doing so expires the existing silence and creates one with the updated configuration. diff --git a/modules/monitoring-enabling-alert-routing-for-user-defined-projects.adoc b/modules/monitoring-enabling-alert-routing-for-user-defined-projects.adoc new file mode 100644 index 0000000000..1948302436 --- /dev/null +++ b/modules/monitoring-enabling-alert-routing-for-user-defined-projects.adoc @@ -0,0 +1,22 @@ +// Module included in the following assemblies: +// +// * observability/monitoring/enabling-alert-routing-for-user-defined-projects.adoc + +:_mod-docs-content-type: CONCEPT +[id="enabling-alert-routing-for-user-defined-projects_{context}"] += Enabling alert routing for user-defined projects + +In {product-title}, an administrator can enable alert routing for user-defined projects. +This process consists of the following steps: + +ifndef::openshift-dedicated,openshift-rosa[] +* Enable alert routing for user-defined projects: +** Use the default platform Alertmanager instance. +** Use a separate Alertmanager instance only for user-defined projects. +endif::openshift-dedicated,openshift-rosa[] +ifdef::openshift-dedicated,openshift-rosa[] +* Enable alert routing for user-defined projects to use a separate Alertmanager instance. +endif::openshift-dedicated,openshift-rosa[] +* Grant users permission to configure alert routing for user-defined projects. + +After you complete these steps, developers and other users can configure custom alerts and alert routing for their user-defined projects. \ No newline at end of file diff --git a/modules/monitoring-example-remote-write-authentication-settings.adoc b/modules/monitoring-example-remote-write-authentication-settings.adoc index f8271a0428..baceb4e8b8 100644 --- a/modules/monitoring-example-remote-write-authentication-settings.adoc +++ b/modules/monitoring-example-remote-write-authentication-settings.adoc @@ -3,28 +3,29 @@ // * observability/monitoring/configuring-the-monitoring-stack.adoc :_mod-docs-content-type: REFERENCE + [id="example-remote-write-authentication-settings_{context}"] = Example remote write authentication settings -// Set attributes to distinguish between cluster monitoring examples and user workload monitoring examples. -ifndef::openshift-dedicated,openshift-rosa[] +// Set attributes to distinguish between cluster monitoring example (core platform monitoring - CPM) and user workload monitoring (UWM) examples +// tag::CPM[] :configmap-name: cluster-monitoring-config :namespace-name: openshift-monitoring -:prometheus-instance: prometheusK8s -endif::openshift-dedicated,openshift-rosa[] -ifdef::openshift-dedicated,openshift-rosa[] +:component: prometheusK8s +// end::CPM[] +// tag::UWM[] :configmap-name: user-workload-monitoring-config :namespace-name: openshift-user-workload-monitoring -:prometheus-instance: prometheus -endif::openshift-dedicated,openshift-rosa[] +:component: prometheus +// end::UWM[] The following samples show different authentication settings you can use to connect to a remote write endpoint. Each sample also shows how to configure a corresponding `Secret` object that contains authentication credentials and other relevant settings. Each sample configures authentication for use with -ifndef::openshift-dedicated,openshift-rosa[] +// tag::CPM[] default platform monitoring -endif::openshift-dedicated,openshift-rosa[] -ifdef::openshift-dedicated,openshift-rosa[] -monitoring user-defined projects -endif::openshift-dedicated,openshift-rosa[] +// end::CPM[] +// tag::UWM[] +monitoring for user-defined projects +// end::UWM[] in the `{namespace-name}` namespace. [id="remote-write-sample-yaml-aws-sigv4_{context}"] @@ -58,7 +59,7 @@ metadata: namespace: {namespace-name} data: config.yaml: | - {prometheus-instance}: + {component}: remoteWrite: - url: "https://authorization.example.com/api/write" sigv4: @@ -111,7 +112,7 @@ metadata: namespace: {namespace-name} data: config.yaml: | - {prometheus-instance}: + {component}: remoteWrite: - url: "https://basicauth.example.com/api/write" basicAuth: @@ -156,7 +157,7 @@ metadata: data: config.yaml: | enableUserWorkload: true - {prometheus-instance}: + {component}: remoteWrite: - url: "https://authorization.example.com/api/write" authorization: @@ -200,7 +201,7 @@ metadata: namespace: {namespace-name} data: config.yaml: | - {prometheus-instance}: + {component}: remoteWrite: - url: "https://test.example.com/api/write" oauth2: @@ -258,7 +259,7 @@ metadata: namespace: {namespace-name} data: config.yaml: | - {prometheus-instance}: + {component}: remoteWrite: - url: "https://remote-write-endpoint.example.com" tlsConfig: @@ -280,5 +281,6 @@ data: <4> The key in the specified `Secret` object that contains the client key secret. // Unset the source code block attributes just to be safe. +:!configmap-name: :!namespace-name: -:!prometheus-instance: +:!component: diff --git a/modules/monitoring-example-remote-write-queue-configuration.adoc b/modules/monitoring-example-remote-write-queue-configuration.adoc index 24c8f1d252..dfa60e9c34 100644 --- a/modules/monitoring-example-remote-write-queue-configuration.adoc +++ b/modules/monitoring-example-remote-write-queue-configuration.adoc @@ -3,28 +3,29 @@ // * observability/monitoring/configuring-the-monitoring-stack.adoc :_mod-docs-content-type: REFERENCE + [id="example-remote-write-queue-configuration_{context}"] = Example remote write queue configuration -// Set attributes to distinguish between cluster monitoring examples and user workload monitoring examples. -ifndef::openshift-dedicated,openshift-rosa[] +// Set attributes to distinguish between cluster monitoring example (core platform monitoring - CPM) and user workload monitoring (UWM) examples +// tag::CPM[] :configmap-name: cluster-monitoring-config :namespace-name: openshift-monitoring -:prometheus-instance: prometheusK8s -endif::openshift-dedicated,openshift-rosa[] -ifdef::openshift-dedicated,openshift-rosa[] +:component: prometheusK8s +// end::CPM[] +// tag::UWM[] :configmap-name: user-workload-monitoring-config :namespace-name: openshift-user-workload-monitoring -:prometheus-instance: prometheus -endif::openshift-dedicated,openshift-rosa[] +:component: prometheus +// end::UWM[] You can use the `queueConfig` object for remote write to tune the remote write queue parameters. The following example shows the queue parameters with their default values for -ifndef::openshift-dedicated,openshift-rosa[] +// tag::CPM[] default platform monitoring -endif::openshift-dedicated,openshift-rosa[] -ifdef::openshift-dedicated,openshift-rosa[] +// end::CPM[] +// tag::UWM[] monitoring for user-defined projects -endif::openshift-dedicated,openshift-rosa[] +// end::UWM[] in the `{namespace-name}` namespace. .Example configuration of remote write parameters with default values @@ -37,7 +38,7 @@ metadata: namespace: {namespace-name} data: config.yaml: | - {prometheus-instance}: + {component}: remoteWrite: - url: "https://remote-write-endpoint.example.com" @@ -63,6 +64,7 @@ data: <9> The samples that are older than the `sampleAgeLimit` limit are dropped from the queue. If the value is undefined or set to `0s`, the parameter is ignored. // Unset the source code block attributes just to be safe. +:!configmap-name: :!namespace-name: -:!prometheus-instance: +:!component: diff --git a/modules/monitoring-expiring-silences.adoc b/modules/monitoring-expiring-silences.adoc index 2451560e2a..30d6351b01 100644 --- a/modules/monitoring-expiring-silences.adoc +++ b/modules/monitoring-expiring-silences.adoc @@ -3,8 +3,18 @@ // * observability/monitoring/managing-alerts.adoc :_mod-docs-content-type: PROCEDURE -[id="expiring-silences_{context}"] -= Expiring silences + +// The ultimate solution DOES NOT NEED separate IDs and titles, it is just needed for now so that the tests will not break + +// tag::ADM[] +[id="expiring-silences-adm_{context}"] += Expiring silences from the Administrator perspective +// end::ADM[] + +// tag::DEV[] +[id="expiring-silences-dev_{context}"] += Expiring silences from the Developer perspective +// end::DEV[] You can expire a single silence or multiple silences. Expiring a silence deactivates it permanently. @@ -24,14 +34,24 @@ ifdef::openshift-dedicated,openshift-rosa[] endif::openshift-dedicated,openshift-rosa[] * If you are a non-administrator user, you have access to the cluster as a user with the following user roles: ** The `cluster-monitoring-view` cluster role, which allows you to access Alertmanager. +// tag::ADM[] ** The `monitoring-alertmanager-edit` role, which permits you to create and silence alerts in the *Administrator* perspective in the web console. +// end::ADM[] +// tag::DEV[] ** The `monitoring-rules-edit` cluster role, which permits you to create and silence alerts in the *Developer* perspective in the web console. +// end::DEV[] .Procedure -To expire a silence or silences in the *Administrator* perspective: - +// tag::ADM[] . Go to *Observe* -> *Alerting* -> *Silences*. +// end::ADM[] + +// tag::DEV[] +. From the *Developer* perspective of the {product-title} web console, go to *Observe* and go to the *Silences* tab. + +. Select the project that you want to expire a silence for from the *Project:* list. +// end::DEV[] . For the silence or silences you want to expire, select the checkbox in the corresponding row. @@ -39,12 +59,4 @@ To expire a silence or silences in the *Administrator* perspective: + Alternatively, to expire a single silence you can click *Actions* and select *Expire silence* on the *Silence details* page for a silence. -To expire a silence in the *Developer* perspective: -. Go to *Observe* -> ** -> *Silences*. - -. For the silence or silences you want to expire, select the checkbox in the corresponding row. - -. Click *Expire 1 silence* to expire a single selected silence or *Expire __ silences* to expire multiple selected silences, where __ is the number of silences you selected. -+ -Alternatively, to expire a single silence you can click *Actions* and select *Expire silence* on the *Silence details* page for a silence. diff --git a/modules/monitoring-getting-detailed-information-about-a-target.adoc b/modules/monitoring-getting-detailed-information-about-a-target.adoc index e2a9ad9566..2c6a78da1e 100644 --- a/modules/monitoring-getting-detailed-information-about-a-target.adoc +++ b/modules/monitoring-getting-detailed-information-about-a-target.adoc @@ -6,7 +6,7 @@ [id="getting-detailed-information-about-a-target_{context}"] = Getting detailed information about a metrics target -In the *Administrator* perspective in the {product-title} web console, you can use the *Metrics targets* page to view, search, and filter the endpoints that are currently targeted for scraping, which helps you to identify and troubleshoot problems. For example, you can view the current status of targeted endpoints to see when {product-title} Monitoring is not able to scrape metrics from a targeted component. +You can use the {product-title} web console to view, search, and filter the endpoints that are currently targeted for scraping, which helps you to identify and troubleshoot problems. For example, you can view the current status of targeted endpoints to see when {product-title} monitoring is not able to scrape metrics from a targeted component. ifndef::openshift-dedicated,openshift-rosa[] The *Metrics targets* page shows targets for default {product-title} projects and for user-defined projects. @@ -26,26 +26,24 @@ endif::openshift-dedicated,openshift-rosa[] .Procedure -. In the *Administrator* perspective, select *Observe* -> *Targets*. The *Metrics targets* page opens with a list of all service endpoint targets that are being scraped for metrics. +. In the *Administrator* perspective of the {product-title} web console, go to *Observe* -> *Targets*. The *Metrics targets* page opens with a list of all service endpoint targets that are being scraped for metrics. + --- This page shows details about targets for default {product-title} and user-defined projects. This page lists the following information for each target: -* Service endpoint URL being scraped -* ServiceMonitor component being monitored -* The **up** or **down** status of the target -* Namespace -* Last scrape time -* Duration of the last scrape --- +** Service endpoint URL being scraped +** The `ServiceMonitor` resource being monitored +** The **up** or **down** status of the target +** Namespace +** Last scrape time +** Duration of the last scrape -. Optional: The list of metrics targets can be long. To find a specific target, do any of the following: +. Optional: To find a specific target, perform any of the following actions: + |=== |Option |Description |Filter the targets by status and source. -a|Select filters in the *Filter* list. +a|Choose filters in the *Filter* list. The following filtering options are available: @@ -54,7 +52,7 @@ The following filtering options are available: ** **Down**. The target is currently down and not being scraped for metrics. * **Source** filters: -** **Platform**. Platform-level targets relate only to default Red Hat OpenShift Service on AWS projects. These projects provide core Red Hat OpenShift Service on AWS functionality. +** **Platform**. Platform-level targets relate only to default {product-rosa} projects. These projects provide core {product-rosa} functionality. ** **User**. User targets relate to user-defined projects. These projects are user-created and can be customized. |Search for a target by name or label. |Enter a search term in the **Text** or **Label** field next to the search box. @@ -62,13 +60,12 @@ The following filtering options are available: |Sort the targets. |Click one or more of the **Endpoint Status**, **Namespace**, **Last Scrape**, and **Scrape Duration** column headers. |=== -. Click the URL in the **Endpoint** column for a target to navigate to its **Target details** page. This page provides information about the target, including the following: -+ --- +. Click the URL in the **Endpoint** column for a target to go to its **Target details** page. This page provides information about the target, including the following information: + ** The endpoint URL being scraped for metrics ** The current *Up* or *Down* status of the target ** A link to the namespace -** A link to the ServiceMonitor details +** A link to the `ServiceMonitor` resource details ** Labels attached to the target ** The most recent time that the target was scraped for metrics --- + diff --git a/modules/monitoring-getting-information-about-alerts-silences-and-alerting-rules.adoc b/modules/monitoring-getting-information-about-alerts-silences-and-alerting-rules.adoc index ccd2d8c9e9..12470bac57 100644 --- a/modules/monitoring-getting-information-about-alerts-silences-and-alerting-rules.adoc +++ b/modules/monitoring-getting-information-about-alerts-silences-and-alerting-rules.adoc @@ -3,20 +3,33 @@ // * observability/monitoring/managing-alerts.adoc :_mod-docs-content-type: PROCEDURE -[id="getting-information-about-alerts-silences-and-alerting-rules_{context}"] -= Getting information about alerts, silences, and alerting rules + +// The ultimate solution DOES NOT NEED separate IDs and titles, it is just needed for now so that the tests will not break + +// tag::ADM[] +[id="getting-information-about-alerts-silences-and-alerting-rules-adm_{context}"] += Getting information about alerts, silences, and alerting rules from the Administrator perspective +// end::ADM[] + +// tag::DEV[] +[id="getting-information-about-alerts-silences-and-alerting-rules-dev_{context}"] += Getting information about alerts, silences, and alerting rules from the Developer perspective +// end::DEV[] + +// Set attributes to distinguish between cluster monitoring example (core platform monitoring - CPM) and user workload monitoring (UWM) examples The Alerting UI provides detailed information about alerts and their governing alerting rules and silences. .Prerequisites -* You have access to the cluster as a developer or as a user with view permissions for the project that you are viewing alerts for. +* You have access to the cluster as a user with view permissions for the project that you are viewing alerts for. .Procedure -*To obtain information about alerts in the Administrator perspective*: +// tag::ADM[] +To obtain information about alerts: -. Open the {product-title} web console and go to the *Observe* -> *Alerting* -> *Alerts* page. +. From the *Administrator* perspective of the {product-title} web console, go to the *Observe* -> *Alerting* -> *Alerts* page. . Optional: Search for alerts by name by using the *Name* field in the search list. @@ -32,9 +45,9 @@ The Alerting UI provides detailed information about alerts and their governing a * A link to its governing alerting rule * Silences for the alert, if any exist -*To obtain information about silences in the Administrator perspective*: +To obtain information about silences: -. Go to the *Observe* -> *Alerting* -> *Silences* page. +. From the *Administrator* perspective of the {product-title} web console, go to the *Observe* -> *Alerting* -> *Silences* page. . Optional: Filter the silences by name using the *Search by name* field. @@ -50,9 +63,9 @@ The Alerting UI provides detailed information about alerts and their governing a * Silence state * Number and list of firing alerts -*To obtain information about alerting rules in the Administrator perspective*: +To obtain information about alerting rules: -. Go to the *Observe* -> *Alerting* -> *Alerting rules* page. +. From the *Administrator* perspective of the {product-title} web console, go to the *Observe* -> *Alerting* -> *Alerting rules* page. . Optional: Filter alerting rules by state, severity, and source by selecting filters in the *Filter* list. @@ -65,10 +78,12 @@ The Alerting UI provides detailed information about alerts and their governing a * The time for which the condition should be true for an alert to fire. * A graph for each alert governed by the alerting rule, showing the value with which the alert is firing. * A table of all alerts governed by the alerting rule. +// end::ADM[] -*To obtain information about alerts, silences, and alerting rules in the Developer perspective*: +// tag::DEV[] +To obtain information about alerts, silences, and alerting rules: -. Go to the *Observe* -> ** -> *Alerts* page. +. From the *Developer* perspective of the {product-title} web console, go to the *Observe* -> ** -> *Alerts* page. . View details for an alert, silence, or an alerting rule: @@ -88,3 +103,4 @@ The Alerting UI provides detailed information about alerts and their governing a ==== Only alerts, silences, and alerting rules relating to the selected project are displayed in the *Developer* perspective. ==== +// end::DEV[] \ No newline at end of file diff --git a/modules/monitoring-granting-users-permission-to-configure-alert-routing-for-user-defined-projects.adoc b/modules/monitoring-granting-users-permission-to-configure-alert-routing-for-user-defined-projects.adoc index c1d2e71a62..041ff50a79 100644 --- a/modules/monitoring-granting-users-permission-to-configure-alert-routing-for-user-defined-projects.adoc +++ b/modules/monitoring-granting-users-permission-to-configure-alert-routing-for-user-defined-projects.adoc @@ -6,7 +6,6 @@ [id="granting-users-permission-to-configure-alert-routing-for-user-defined-projects_{context}"] = Granting users permission to configure alert routing for user-defined projects -[role="_abstract"] You can grant users permission to configure alert routing for user-defined projects. .Prerequisites diff --git a/modules/monitoring-granting-users-permission-to-monitor-user-defined-projects.adoc b/modules/monitoring-granting-users-permission-to-monitor-user-defined-projects.adoc index ec9f13cdec..674cc2ac97 100644 --- a/modules/monitoring-granting-users-permission-to-monitor-user-defined-projects.adoc +++ b/modules/monitoring-granting-users-permission-to-monitor-user-defined-projects.adoc @@ -4,7 +4,7 @@ :_mod-docs-content-type: CONCEPT [id="granting-users-permission-to-monitor-user-defined-projects_{context}"] -= Granting users permission to monitor user-defined projects += Granting users permissions for monitoring for user-defined projects As a cluster administrator, you can monitor all core {product-title} and user-defined projects. diff --git a/modules/monitoring-intro-enabling-monitoring-for-user-defined-projects.adoc b/modules/monitoring-intro-enabling-monitoring-for-user-defined-projects.adoc new file mode 100644 index 0000000000..e0f8198cfe --- /dev/null +++ b/modules/monitoring-intro-enabling-monitoring-for-user-defined-projects.adoc @@ -0,0 +1,11 @@ +// Module included in the following assemblies: +// +// * observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc + +:_mod-docs-content-type: CONCEPT +[id="intro-enabling-monitoring-for-user-defined-projects_{context}"] += Enabling monitoring for user-defined projects + +In {product-title}, you can enable monitoring for user-defined projects in addition to the default platform monitoring. You can monitor your own projects in {product-title} without the need for an additional monitoring solution. Using this feature centralizes monitoring for core platform components and user-defined projects. + +include::snippets/monitoring-custom-prometheus-note.adoc[] diff --git a/modules/monitoring-listing-alerting-rules-for-all-projects-in-a-single-view.adoc b/modules/monitoring-listing-alerting-rules-for-all-projects-in-a-single-view.adoc index 197d58772c..56c306ae30 100644 --- a/modules/monitoring-listing-alerting-rules-for-all-projects-in-a-single-view.adoc +++ b/modules/monitoring-listing-alerting-rules-for-all-projects-in-a-single-view.adoc @@ -26,7 +26,7 @@ endif::[] .Procedure -. In the *Administrator* perspective, navigate to *Observe* -> *Alerting* -> *Alerting rules*. +. From the *Administrator* perspective of the {product-title} web console, go to *Observe* -> *Alerting* -> *Alerting rules*. . Select the *Platform* and *User* sources in the *Filter* drop-down menu. + diff --git a/modules/monitoring-maintenance-and-support.adoc b/modules/monitoring-maintenance-and-support.adoc index ac299b862a..8e52994e74 100644 --- a/modules/monitoring-maintenance-and-support.adoc +++ b/modules/monitoring-maintenance-and-support.adoc @@ -5,7 +5,7 @@ [id="maintenance-and-support_{context}"] = Maintenance and support for monitoring -Not all configuration options for the monitoring stack are exposed. The only supported way of configuring {product-title} monitoring is by configuring the {cmo-first} using the options described in the "Config map reference for the {cmo-short}". *Do not use other configurations, as they are unsupported.* +Not all configuration options for the monitoring stack are exposed. The only supported way of configuring {product-title} monitoring is by configuring the {cmo-first} using the options described in the "Config map reference for the {cmo-short}". _Do not use other configurations, as they are unsupported._ Configuration paradigms might change across Prometheus releases, and such cases can only be handled gracefully if all configuration possibilities are controlled. If you use configurations other than those described in the "Config map reference for the {cmo-full}", your changes will disappear because the {cmo-short} automatically reconciles any differences and resets any unsupported changes back to the originally defined state by default and by design. diff --git a/modules/monitoring-managing-alerting-rules-for-user-defined-projects.adoc b/modules/monitoring-managing-alerting-rules-for-user-defined-projects.adoc index d144090983..a612517845 100644 --- a/modules/monitoring-managing-alerting-rules-for-user-defined-projects.adoc +++ b/modules/monitoring-managing-alerting-rules-for-user-defined-projects.adoc @@ -7,8 +7,6 @@ [id="managing-alerting-rules-for-user-defined-projects_{context}"] = Managing alerting rules for user-defined projects -{product-title} monitoring ships with a set of default alerting rules. As a cluster administrator, you can view the default alerting rules. - In {product-title}, you can view, edit, and remove alerting rules in user-defined projects. ifdef::openshift-rosa,openshift-dedicated[] diff --git a/modules/monitoring-managing-core-platform-alerting-rules.adoc b/modules/monitoring-managing-core-platform-alerting-rules.adoc index 60b7fa166b..2f65acfd0f 100644 --- a/modules/monitoring-managing-core-platform-alerting-rules.adoc +++ b/modules/monitoring-managing-core-platform-alerting-rules.adoc @@ -6,7 +6,7 @@ [id="managing-core-platform-alerting-rules_{context}"] = Managing alerting rules for core platform monitoring -{product-title} {product-version} monitoring ships with a large set of default alerting rules for platform metrics. +The {product-title} monitoring includes a large set of default alerting rules for platform metrics. As a cluster administrator, you can customize this set of rules in two ways: * Modify the settings for existing platform alerting rules by adjusting thresholds or by adding and modifying labels. diff --git a/modules/monitoring-modifying-retention-time-and-size-for-prometheus-metrics-data.adoc b/modules/monitoring-modifying-retention-time-and-size-for-prometheus-metrics-data.adoc index 476111a1e2..028744396c 100644 --- a/modules/monitoring-modifying-retention-time-and-size-for-prometheus-metrics-data.adoc +++ b/modules/monitoring-modifying-retention-time-and-size-for-prometheus-metrics-data.adoc @@ -3,52 +3,31 @@ // * observability/monitoring/configuring-the-monitoring-stack.adoc :_mod-docs-content-type: PROCEDURE + [id="modifying-retention-time-and-size-for-prometheus-metrics-data_{context}"] -= Modifying the retention time and size for Prometheus metrics data += Modifying retention time and size for Prometheus metrics data -By default, Prometheus retains metrics data for the following durations: +// Set attributes to distinguish between cluster monitoring example (core platform monitoring - CPM) and user workload monitoring (UWM) examples -ifndef::openshift-dedicated,openshift-rosa[] -* *Core platform monitoring*: 15 days -* *Monitoring for user-defined projects*: 24 hours -endif::openshift-dedicated,openshift-rosa[] +// tag::CPM[] +:configmap-name: cluster-monitoring-config +:namespace-name: openshift-monitoring +:component: prometheusK8s +// end::CPM[] +// tag::UWM[] +:configmap-name: user-workload-monitoring-config +:namespace-name: openshift-user-workload-monitoring +:component: prometheus +// end::UWM[] -ifdef::openshift-dedicated,openshift-rosa[] -* *Core platform monitoring*: 11 days -* *Monitoring for user-defined projects*: 24 hours -endif::openshift-dedicated,openshift-rosa[] - -You can modify the retention time for -ifndef::openshift-dedicated,openshift-rosa[] -Prometheus -endif::openshift-dedicated,openshift-rosa[] -ifdef::openshift-dedicated,openshift-rosa[] -the Prometheus instance that monitors user-defined projects, -endif::openshift-dedicated,openshift-rosa[] -to change how soon the data is deleted. You can also set the maximum amount of disk space the retained metrics data uses. If the data reaches this size limit, Prometheus deletes the oldest data first until the disk space used is again below the limit. - -Note the following behaviors of these data retention settings: - -* The size-based retention policy applies to all data block directories in the `/prometheus` directory, including persistent blocks, write-ahead log (WAL) data, and m-mapped chunks. -* Data in the `/wal` and `/head_chunks` directories counts toward the retention size limit, but Prometheus never purges data from these directories based on size- or time-based retention policies. -Thus, if you set a retention size limit lower than the maximum size set for the `/wal` and `/head_chunks` directories, you have configured the system not to retain any data blocks in the `/prometheus` data directories. -* The size-based retention policy is applied only when Prometheus cuts a new data block, which occurs every two hours after the WAL contains at least three hours of data. -ifndef::openshift-dedicated,openshift-rosa[] -* If you do not explicitly define values for either `retention` or `retentionSize`, retention time defaults to 15 days for core platform monitoring and 24 hours for user-defined project monitoring. Retention size is not set. -endif::openshift-dedicated,openshift-rosa[] -ifdef::openshift-dedicated,openshift-rosa[] -* If you do not explicitly define values for either `retention` or `retentionSize`, retention time defaults to 11 days for core platform monitoring and 24 hours for user-defined project monitoring. Retention size is not set. -endif::openshift-dedicated,openshift-rosa[] -* If you define values for both `retention` and `retentionSize`, both values apply. -If any data blocks exceed the defined retention time or the defined size limit, Prometheus purges these data blocks. -* If you define a value for `retentionSize` and do not define `retention`, only the `retentionSize` value applies. -* If you do not define a value for `retentionSize` and only define a value for `retention`, only the `retention` value applies. -ifndef::openshift-dedicated,openshift-rosa[] -* If you set the `retentionSize` or `retention` value to `0`, the default settings apply. The default settings set retention time to 15 days for core platform monitoring and 24 hours for user-defined project monitoring. By default, retention size is not set. -endif::openshift-dedicated,openshift-rosa[] -ifdef::openshift-dedicated,openshift-rosa[] -* If you set the `retentionSize` or `retention` value to `0`, the default settings apply. The default settings set retention time to 11 days for core platform monitoring and 24 hours for user-defined project monitoring. By default, retention size is not set. -endif::openshift-dedicated,openshift-rosa[] +By default, Prometheus retains metrics data for +// tag::CPM[] +15 days for core platform monitoring. +// end::CPM[] +// tag::UWM[] +24 hours for monitoring for user-defined projects. +// end::UWM[] +You can modify the retention time for the Prometheus instance to change when the data is deleted. You can also set the maximum amount of disk space the retained metrics data uses. [NOTE] ==== @@ -57,110 +36,69 @@ Data compaction occurs every two hours. Therefore, a persistent volume (PV) migh .Prerequisites +// tag::CPM[] +* You have access to the cluster as a user with the `cluster-admin` cluster role. +* You have created the `cluster-monitoring-config` `ConfigMap` object. +// end::CPM[] +// tag::UWM[] ifndef::openshift-dedicated,openshift-rosa[] -* *If you are configuring core {product-title} monitoring components*: -** You have access to the cluster as a user with the `cluster-admin` cluster role. -** You have created the `cluster-monitoring-config` `ConfigMap` object. -* *If you are configuring components that monitor user-defined projects*: -** You have access to the cluster as a user with the `cluster-admin` cluster role, or as a user with the `user-workload-monitoring-config-edit` role in the `openshift-user-workload-monitoring` project. -** A cluster administrator has enabled monitoring for user-defined projects. +* You have access to the cluster as a user with the `cluster-admin` cluster role, or as a user with the `user-workload-monitoring-config-edit` role in the `openshift-user-workload-monitoring` project. +* A cluster administrator has enabled monitoring for user-defined projects. endif::openshift-dedicated,openshift-rosa[] ifdef::openshift-dedicated,openshift-rosa[] * You have access to the cluster as a user with the `dedicated-admin` role. * The `user-workload-monitoring-config` `ConfigMap` object exists. This object is created by default when the cluster is created. endif::openshift-dedicated,openshift-rosa[] +// end::UWM[] * You have installed the OpenShift CLI (`oc`). .Procedure -. Edit the `ConfigMap` object: -ifndef::openshift-dedicated,openshift-rosa[] -** *To modify the retention time and size for the Prometheus instance that monitors core {product-title} projects*: -.. Edit the `cluster-monitoring-config` `ConfigMap` object in the `openshift-monitoring` project: +. Edit the `{configmap-name}` config map in the `{namespace-name}` project: + -[source,terminal] +[source,terminal,subs="attributes+"] ---- -$ oc -n openshift-monitoring edit configmap cluster-monitoring-config +$ oc -n {namespace-name} edit configmap {configmap-name} ---- -.. Add the retention time and size configuration under `data/config.yaml`: +. Add the retention time and size configuration under `data/config.yaml`: + -[source,yaml] +[source,yaml,subs="attributes+"] ---- apiVersion: v1 kind: ConfigMap metadata: - name: cluster-monitoring-config - namespace: openshift-monitoring + name: {configmap-name} + namespace: {namespace-name} data: config.yaml: | - prometheusK8s: - retention: <1> - retentionSize: <2> + {component}: + retention: # <1> + retentionSize: # <2> ---- -+ <1> The retention time: a number directly followed by `ms` (milliseconds), `s` (seconds), `m` (minutes), `h` (hours), `d` (days), `w` (weeks), or `y` (years). You can also combine time values for specific times, such as `1h30m15s`. <2> The retention size: a number directly followed by `B` (bytes), `KB` (kilobytes), `MB` (megabytes), `GB` (gigabytes), `TB` (terabytes), `PB` (petabytes), and `EB` (exabytes). + -The following example sets the retention time to 24 hours and the retention size to 10 gigabytes for the Prometheus instance that monitors core {product-title} components: +The following example sets the retention time to 24 hours and the retention size to 10 gigabytes for the Prometheus instance: + -[source,yaml] +.Example of setting retention time for Prometheus +[source,yaml,subs="attributes+"] ---- apiVersion: v1 kind: ConfigMap metadata: - name: cluster-monitoring-config - namespace: openshift-monitoring + name: {configmap-name} + namespace: {namespace-name} data: config.yaml: | - prometheusK8s: - retention: 24h - retentionSize: 10GB ----- - -** *To modify the retention time and size for the Prometheus instance that monitors user-defined projects*: -endif::openshift-dedicated,openshift-rosa[] -.. Edit the `user-workload-monitoring-config` `ConfigMap` object in the `openshift-user-workload-monitoring` project: -+ -[source,terminal] ----- -$ oc -n openshift-user-workload-monitoring edit configmap user-workload-monitoring-config ----- - -.. Add the retention time and size configuration under `data/config.yaml`: -+ -[source,yaml] ----- -apiVersion: v1 -kind: ConfigMap -metadata: - name: user-workload-monitoring-config - namespace: openshift-user-workload-monitoring -data: - config.yaml: | - prometheus: - retention: <1> - retentionSize: <2> ----- -+ -<1> The retention time: a number directly followed by `ms` (milliseconds), `s` (seconds), `m` (minutes), `h` (hours), `d` (days), `w` (weeks), or `y` (years). -You can also combine time values for specific times, such as `1h30m15s`. -<2> The retention size: a number directly followed by `B` (bytes), `KB` (kilobytes), `MB` (megabytes), `GB` (gigabytes), `TB` (terabytes), `PB` (petabytes), or `EB` (exabytes). -+ -The following example sets the retention time to 24 hours and the retention size to 10 gigabytes for the Prometheus instance that monitors user-defined projects: -+ -[source,yaml] ----- -apiVersion: v1 -kind: ConfigMap -metadata: - name: user-workload-monitoring-config - namespace: openshift-user-workload-monitoring -data: - config.yaml: | - prometheus: + {component}: retention: 24h retentionSize: 10GB ---- . Save the file to apply the changes. The pods affected by the new configuration are automatically redeployed. + +// Unset the source code block attributes just to be safe. +:!configmap-name: +:!namespace-name: +:!component: \ No newline at end of file diff --git a/modules/monitoring-understanding-monitoring-stack-in-ha-clusters.adoc b/modules/monitoring-monitoring-stack-in-ha-clusters.adoc similarity index 90% rename from modules/monitoring-understanding-monitoring-stack-in-ha-clusters.adoc rename to modules/monitoring-monitoring-stack-in-ha-clusters.adoc index 778717ed9e..a0246553c4 100644 --- a/modules/monitoring-understanding-monitoring-stack-in-ha-clusters.adoc +++ b/modules/monitoring-monitoring-stack-in-ha-clusters.adoc @@ -3,8 +3,8 @@ // * observability/monitoring/monitoring-overview.adoc :_mod-docs-content-type: CONCEPT -[id="understanding-monitoring-stack-in-ha-clusters_{context}"] -= Understanding the monitoring stack in high-availability clusters +[id="monitoring-stack-in-ha-clusters_{context}"] += The monitoring stack in high-availability clusters By default, in multi-node clusters, the following components run in high-availability (HA) mode to prevent data loss and service interruption: diff --git a/modules/monitoring-moving-monitoring-components-to-different-nodes.adoc b/modules/monitoring-moving-monitoring-components-to-different-nodes.adoc index fd865e3a7d..d35c95cea3 100644 --- a/modules/monitoring-moving-monitoring-components-to-different-nodes.adoc +++ b/modules/monitoring-moving-monitoring-components-to-different-nodes.adoc @@ -3,36 +3,57 @@ // * observability/monitoring/configuring-the-monitoring-stack.adoc :_mod-docs-content-type: PROCEDURE + [id="moving-monitoring-components-to-different-nodes_{context}"] = Moving monitoring components to different nodes -ifndef::openshift-dedicated,openshift-rosa[] -To specify the nodes in your cluster on which monitoring stack components will run, configure the `nodeSelector` constraint in the component's `ConfigMap` object to match labels assigned to the nodes. +// Set attributes to distinguish between cluster monitoring example (core platform monitoring - CPM) and user workload monitoring (UWM) examples. +// tag::CPM[] +:configmap-name: cluster-monitoring-config +:namespace-name: openshift-monitoring +// end::CPM[] +// tag::UWM[] +:configmap-name: user-workload-monitoring-config +:namespace-name: openshift-user-workload-monitoring +// end::UWM[] + +// tag::CPM[] +To specify the nodes in your cluster on which monitoring stack components will run, configure the `nodeSelector` constraint for the components in the `cluster-monitoring-config` config map to match labels assigned to the nodes. [NOTE] ==== You cannot add a node selector constraint directly to an existing scheduled pod. ==== -endif::openshift-dedicated,openshift-rosa[] +// end::CPM[] -ifdef::openshift-dedicated,openshift-rosa[] -You can move any of the components that monitor workloads for user-defined projects to specific worker nodes. It is not permitted to move components to control plane or infrastructure nodes. -endif::openshift-dedicated,openshift-rosa[] +// tag::UWM[] +You can move any of the components that monitor workloads for user-defined projects to specific worker nodes. + +[WARNING] +==== +It is not permitted to move components to control plane or infrastructure nodes. +==== +// end::UWM[] .Prerequisites + +// tag::CPM[] +* You have access to the cluster as a user with the `cluster-admin` cluster role. +* You have created the `cluster-monitoring-config` `ConfigMap` object. +* You have installed the OpenShift CLI (`oc`). +// end::CPM[] + +// tag::UWM[] ifndef::openshift-dedicated,openshift-rosa[] -* *If you are configuring core {product-title} monitoring components*: -** You have access to the cluster as a user with the `cluster-admin` cluster role. -** You have created the `cluster-monitoring-config` `ConfigMap` object. -* *If you are configuring components that monitor user-defined projects*: -** You have access to the cluster as a user with the `cluster-admin` cluster role or as a user with the `user-workload-monitoring-config-edit` role in the `openshift-user-workload-monitoring` project. -** A cluster administrator has enabled monitoring for user-defined projects. +* You have access to the cluster as a user with the `cluster-admin` cluster role or as a user with the `user-workload-monitoring-config-edit` role in the `openshift-user-workload-monitoring` project. +* A cluster administrator has enabled monitoring for user-defined projects. endif::openshift-dedicated,openshift-rosa[] ifdef::openshift-dedicated,openshift-rosa[] * You have access to the cluster as a user with the `dedicated-admin` role. * The `user-workload-monitoring-config` `ConfigMap` object exists. This object is created by default when the cluster is created. endif::openshift-dedicated,openshift-rosa[] * You have installed the OpenShift CLI (`oc`). +// end::UWM[] .Procedure @@ -40,75 +61,38 @@ endif::openshift-dedicated,openshift-rosa[] + [source,terminal] ---- -$ oc label nodes +$ oc label nodes <1> ---- -. Edit the `ConfigMap` object: -ifndef::openshift-dedicated,openshift-rosa[] -** *To move a component that monitors core {product-title} projects*: +<1> Replace `` with the name of the node where you want to add the label. +Replace `` with the name of the wanted label. -.. Edit the `cluster-monitoring-config` `ConfigMap` object in the `openshift-monitoring` project: +. Edit the `{configmap-name}` `ConfigMap` object in the `{namespace-name}` project: + -[source,terminal] +[source,terminal,subs="attributes+"] ---- -$ oc -n openshift-monitoring edit configmap cluster-monitoring-config +$ oc -n {namespace-name} edit configmap {configmap-name} ---- -.. Specify the node labels for the `nodeSelector` constraint for the component under `data/config.yaml`: +. Specify the node labels for the `nodeSelector` constraint for the component under `data/config.yaml`: + -[source,yaml] +[source,yaml,subs="attributes+"] ---- apiVersion: v1 kind: ConfigMap metadata: - name: cluster-monitoring-config - namespace: openshift-monitoring + name: {configmap-name} + namespace: {namespace-name} data: config.yaml: | - : <1> + # ... + : #<1> nodeSelector: - <2> - <3> - <...> + #<2> + #<3> + # ... ---- <1> Substitute `` with the appropriate monitoring stack component name. -<2> Substitute `` with the label you added to the node. -<3> Optional: Specify additional labels. -If you specify additional labels, the pods for the component are only scheduled on the nodes that contain all of the specified labels. -+ -[NOTE] -==== -If monitoring components remain in a `Pending` state after configuring the `nodeSelector` constraint, check the pod events for errors relating to taints and tolerations. -==== - -** *To move a component that monitors user-defined projects*: -endif::openshift-dedicated,openshift-rosa[] - -.. Edit the `user-workload-monitoring-config` `ConfigMap` object in the `openshift-user-workload-monitoring` project: -+ -[source,terminal] ----- -$ oc -n openshift-user-workload-monitoring edit configmap user-workload-monitoring-config ----- - -.. Specify the node labels for the `nodeSelector` constraint for the component under `data/config.yaml`: -+ -[source,yaml] ----- -apiVersion: v1 -kind: ConfigMap -metadata: - name: user-workload-monitoring-config - namespace: openshift-user-workload-monitoring -data: - config.yaml: | - : <1> - nodeSelector: - <2> - <3> - <...> ----- -<1> Substitute `` with the appropriate monitoring stack component name. -<2> Substitute `` with the label you added to the node. +<2> Substitute `` with the label you added to the node. <3> Optional: Specify additional labels. If you specify additional labels, the pods for the component are only scheduled on the nodes that contain all of the specified labels. + @@ -118,3 +102,7 @@ If monitoring components remain in a `Pending` state after configuring the `node ==== . Save the file to apply the changes. The components specified in the new configuration are automatically moved to the new nodes, and the pods affected by the new configuration are redeployed. + +// Unset the source code block attributes just to be safe. +:!configmap-name: +:!namespace-name: diff --git a/modules/monitoring-optimizing-alerting-for-user-defined-projects.adoc b/modules/monitoring-optimizing-alerting-for-user-defined-projects.adoc index 17b2b1ecbf..cd2065ba06 100644 --- a/modules/monitoring-optimizing-alerting-for-user-defined-projects.adoc +++ b/modules/monitoring-optimizing-alerting-for-user-defined-projects.adoc @@ -3,7 +3,7 @@ // * observability/monitoring/managing-alerts.adoc :_mod-docs-content-type: CONCEPT -[id="Optimizing-alerting-for-user-defined-projects_{context}"] +[id="optimizing-alerting-for-user-defined-projects_{context}"] = Optimizing alerting for user-defined projects You can optimize alerting for your own projects by considering the following recommendations when creating alerting rules: diff --git a/modules/monitoring-querying-metrics-for-all-projects-as-an-administrator.adoc b/modules/monitoring-querying-metrics-for-all-projects-with-mon-dashboard.adoc similarity index 86% rename from modules/monitoring-querying-metrics-for-all-projects-as-an-administrator.adoc rename to modules/monitoring-querying-metrics-for-all-projects-with-mon-dashboard.adoc index e1bdf4bf8d..3ac1e7f0de 100644 --- a/modules/monitoring-querying-metrics-for-all-projects-as-an-administrator.adoc +++ b/modules/monitoring-querying-metrics-for-all-projects-with-mon-dashboard.adoc @@ -4,8 +4,12 @@ // * virt/support/virt-prometheus-queries.adoc :_mod-docs-content-type: PROCEDURE -[id="querying-metrics-for-all-projects-as-an-administrator_{context}"] -= Querying metrics for all projects as a cluster administrator +[id="querying-metrics-for-all-projects-with-mon-dashboard_{context}"] += Querying metrics for all projects with the {product-title} web console + +// The following section will be included in the administrator section, hence there is no need to include "administrator" in the title + +You can use the {product-title} metrics query browser to run Prometheus Query Language (PromQL) queries to examine metrics visualized on a plot. This functionality provides information about the state of a cluster and any user-defined workloads that you are monitoring. As a ifndef::openshift-dedicated,openshift-rosa[] @@ -69,7 +73,7 @@ Use the keyboard arrows to select one of these suggested items and then press En * By default, the query table shows an expanded view that lists every metric and its current value. Click the *Ë…* down arrowhead to minimize the expanded view for a query. ==== -. Optional: The page URL now contains the queries you ran. To use this set of queries again in the future, save this URL. +. Optional: Save the page URL to use this set of queries again in the future. . Explore the visualized metrics. Initially, all metrics from all enabled queries are shown on the plot. Select which metrics are shown by performing any of the following actions: + diff --git a/modules/monitoring-querying-metrics-for-user-defined-projects-as-a-developer.adoc b/modules/monitoring-querying-metrics-for-user-defined-projects-with-mon-dashboard.adoc similarity index 84% rename from modules/monitoring-querying-metrics-for-user-defined-projects-as-a-developer.adoc rename to modules/monitoring-querying-metrics-for-user-defined-projects-with-mon-dashboard.adoc index 152a39e904..dd2c7cbd5d 100644 --- a/modules/monitoring-querying-metrics-for-user-defined-projects-as-a-developer.adoc +++ b/modules/monitoring-querying-metrics-for-user-defined-projects-with-mon-dashboard.adoc @@ -4,10 +4,12 @@ // * virt/support/virt-prometheus-queries.adoc :_mod-docs-content-type: PROCEDURE -[id="querying-metrics-for-user-defined-projects-as-a-developer_{context}"] -= Querying metrics for user-defined projects as a developer +[id="querying-metrics-for-user-defined-projects-with-mon-dashboard_{context}"] += Querying metrics for user-defined projects with the {product-title} web console -You can access metrics for a user-defined project as a developer or as a user with view permissions for the project. +You can use the {product-title} metrics query browser to run Prometheus Query Language (PromQL) queries to examine metrics visualized on a plot. This functionality provides information about any user-defined workloads that you are monitoring. + +As a developer, you must specify a project name when querying metrics. You must have the required privileges to view metrics for the selected project. The Metrics UI includes predefined queries, for example, CPU, memory, bandwidth, or network packet. These queries are restricted to the selected project. You can also run custom Prometheus Query Language (PromQL) queries for the project. @@ -30,7 +32,7 @@ endif::openshift-dedicated,openshift-rosa[] . In the *Developer* perspective of the {product-title} web console, click *Observe* and go to the *Metrics* tab. -. Select the project that you want to view metrics for in the *Project:* list. +. Select the project that you want to view metrics for from the *Project:* list. . To add one or more queries, perform any of the following actions: + @@ -62,7 +64,7 @@ Use the keyboard arrows to select one of these suggested items and then press En * By default, the query table shows an expanded view that lists every metric and its current value. Click the *Ë…* down arrowhead to minimize the expanded view for a query. ==== -. Optional: The page URL now contains the queries you ran. To use this set of queries again in the future, save this URL. +. Optional: Save the page URL to use this set of queries again in the future. . Explore the visualized metrics. Initially, all metrics from all enabled queries are shown on the plot. Select which metrics are shown by performing any of the following actions: + diff --git a/modules/monitoring-removing-alerting-rules-for-user-defined-projects.adoc b/modules/monitoring-removing-alerting-rules-for-user-defined-projects.adoc index fd21d052db..99f5993765 100644 --- a/modules/monitoring-removing-alerting-rules-for-user-defined-projects.adoc +++ b/modules/monitoring-removing-alerting-rules-for-user-defined-projects.adoc @@ -11,7 +11,7 @@ You can remove alerting rules for user-defined projects. .Prerequisites * You have enabled monitoring for user-defined projects. -* You are logged in as a user that has the `monitoring-rules-edit` cluster role for the project where you want to create an alerting rule. +* You are logged in as a cluster administrator or as a user that has the `monitoring-rules-edit` cluster role for the project where you want to create an alerting rule. * You have installed the OpenShift CLI (`oc`). .Procedure diff --git a/modules/monitoring-resizing-a-persistent-volume.adoc b/modules/monitoring-resizing-a-persistent-volume.adoc index 229eefa28a..6e2d60be54 100644 --- a/modules/monitoring-resizing-a-persistent-volume.adoc +++ b/modules/monitoring-resizing-a-persistent-volume.adoc @@ -3,10 +3,30 @@ // * observability/monitoring/configuring-the-monitoring-stack.adoc :_mod-docs-content-type: PROCEDURE + [id="resizing-a-persistent-volume_{context}"] = Resizing a persistent volume -You can resize a persistent volume (PV) for monitoring components, such as Prometheus, Thanos Ruler, or Alertmanager. You need to manually expand a persistent volume claim (PVC), and then update the config map in which the component is configured. +// Set attributes to distinguish between cluster monitoring example (core platform monitoring - CPM) and user workload monitoring (UWM) examples + +// tag::CPM[] +:configmap-name: cluster-monitoring-config +:namespace-name: openshift-monitoring +:component: prometheusK8s +// end::CPM[] +// tag::UWM[] +:configmap-name: user-workload-monitoring-config +:namespace-name: openshift-user-workload-monitoring +:component: thanosRuler +// end::UWM[] + +// tag::CPM[] +You can resize a persistent volume (PV) for monitoring components, such as Prometheus or Alertmanager. +// end::CPM[] +// tag::UWM[] +You can resize a persistent volume (PV) for the instances of Prometheus, Thanos Ruler, and Alertmanager. +// end::UWM[] +You need to manually expand a persistent volume claim (PVC), and then update the config map in which the component is configured. [IMPORTANT] ==== @@ -14,128 +34,87 @@ You can only expand the size of the PVC. Shrinking the storage size is not possi ==== .Prerequisites - +// tag::CPM[] +* You have access to the cluster as a user with the `cluster-admin` cluster role. +* You have created the `cluster-monitoring-config` `ConfigMap` object. +* You have configured at least one PVC for core {product-title} monitoring components. +// end::CPM[] +// tag::UWM[] +* You have access to the cluster as a user with the `cluster-admin` cluster role, or as a user with the `user-workload-monitoring-config-edit` role in the `openshift-user-workload-monitoring` project. +* A cluster administrator has enabled monitoring for user-defined projects. +* You have configured at least one PVC for components that monitor user-defined projects. +// end::UWM[] * You have installed the OpenShift CLI (`oc`). -* *If you are configuring core {product-title} monitoring components*: -** You have access to the cluster as a user with the `cluster-admin` cluster role. -** You have created the `cluster-monitoring-config` `ConfigMap` object. -** You have configured at least one PVC for core {product-title} monitoring components. -* *If you are configuring components that monitor user-defined projects*: -** You have access to the cluster as a user with the `cluster-admin` cluster role, or as a user with the `user-workload-monitoring-config-edit` role in the `openshift-user-workload-monitoring` project. -** A cluster administrator has enabled monitoring for user-defined projects. -** You have configured at least one PVC for components that monitor user-defined projects. .Procedure . Manually expand a PVC with the updated storage request. For more information, see "Expanding persistent volume claims (PVCs) with a file system" in _Expanding persistent volumes_. -. Edit the `ConfigMap` object: -** *If you are configuring core {product-title} monitoring components*: -.. Edit the `cluster-monitoring-config` `ConfigMap` object in the `openshift-monitoring` project: +. Edit the `{configmap-name}` config map in the `{namespace-name}` project: + -[source,terminal] +[source,terminal,subs="attributes+"] ---- -$ oc -n openshift-monitoring edit configmap cluster-monitoring-config +$ oc -n {namespace-name} edit configmap {configmap-name} ---- -.. Add a new storage size for the PVC configuration for the component under `data/config.yaml`: +. Add a new storage size for the PVC configuration for the component under `data/config.yaml`: + -[source,yaml] +[source,yaml,subs="attributes+"] ---- apiVersion: v1 kind: ConfigMap metadata: - name: cluster-monitoring-config - namespace: openshift-monitoring + name: {configmap-name} + namespace: {namespace-name} data: config.yaml: | - : #<1> + : # <1> volumeClaimTemplate: spec: resources: requests: - storage: #<2> + storage: # <2> ---- <1> The component for which you want to change the storage size. <2> Specify the new size for the storage volume. It must be greater than the previous value. + -The following example sets the new PVC request to 100 gigabytes for the Prometheus instance that monitors core {product-title} components: +The following example sets the new PVC request to +// tag::CPM[] +100 gigabytes for the Prometheus instance: +// end::CPM[] +// tag::UWM[] +20 gigabytes for Thanos Ruler: +// end::UWM[] + -[source,yaml] +.Example storage configuration for `{component}` +[source,yaml,subs="attributes+"] ---- apiVersion: v1 kind: ConfigMap metadata: - name: cluster-monitoring-config - namespace: openshift-monitoring + name: {configmap-name} + namespace: {namespace-name} data: config.yaml: | - prometheusK8s: + {component}: volumeClaimTemplate: spec: resources: requests: +# tag::CPM[] storage: 100Gi ----- - -** *If you are configuring components that monitor user-defined projects*: -+ -[NOTE] -==== -You can resize the volumes for the Thanos Ruler and for instances of Alertmanager and Prometheus that monitor user-defined projects. -==== -+ -.. Edit the `user-workload-monitoring-config` `ConfigMap` object in the `openshift-user-workload-monitoring` project: -+ -[source,terminal] ----- -$ oc -n openshift-user-workload-monitoring edit configmap user-workload-monitoring-config ----- - -.. Update the PVC configuration for the monitoring component under `data/config.yaml`: -+ -[source,yaml] ----- -apiVersion: v1 -kind: ConfigMap -metadata: - name: user-workload-monitoring-config - namespace: openshift-user-workload-monitoring -data: - config.yaml: | - : #<1> - volumeClaimTemplate: - spec: - resources: - requests: - storage: #<2> ----- -<1> The component for which you want to change the storage size. -<2> Specify the new size for the storage volume. It must be greater than the previous value. -+ -The following example sets the new PVC request to 20 gigabytes for Thanos Ruler: -+ -[source,yaml] ----- -apiVersion: v1 -kind: ConfigMap -metadata: - name: user-workload-monitoring-config - namespace: openshift-user-workload-monitoring -data: - config.yaml: | - thanosRuler: - volumeClaimTemplate: - spec: - resources: - requests: +# end::CPM[] +# tag::UWM[] storage: 20Gi +# end::UWM[] ---- +// tag::UWM[] + [NOTE] ==== Storage requirements for the `thanosRuler` component depend on the number of rules that are evaluated and how many samples each rule generates. ==== +// end::UWM[] . Save the file to apply the changes. The pods affected by the new configuration are automatically redeployed. + @@ -143,3 +122,8 @@ Storage requirements for the `thanosRuler` component depend on the number of rul ==== When you update the config map with a new storage size, the affected `StatefulSet` object is recreated, resulting in a temporary service outage. ==== + +// Unset the source code block attributes just to be safe. +:!configmap-name: +:!namespace-name: +:!component: diff --git a/modules/monitoring-resources-reference-for-the-cluster-monitoring-operator.adoc b/modules/monitoring-resources-reference-for-the-cluster-monitoring-operator.adoc index 09b3544535..072b187e85 100644 --- a/modules/monitoring-resources-reference-for-the-cluster-monitoring-operator.adoc +++ b/modules/monitoring-resources-reference-for-the-cluster-monitoring-operator.adoc @@ -9,8 +9,8 @@ This document describes the following resources deployed and managed by the Cluster Monitoring Operator (CMO): -* link:#cmo-routes-resources[Routes] -* link:#cmo-services-resources[Services] +* link:#cmo-routes-resources_{context}[Routes] +* link:#cmo-services-resources_{context}[Services] Use this information when you want to configure API endpoint connections to retrieve, send, or query metrics data. @@ -23,7 +23,7 @@ To avoid these issues, follow these recommendations: * Avoid querying endpoints frequently. Limit queries to a maximum of one every 30 seconds. * Do not try to retrieve all metrics data via the `/federate` endpoint. Query it only when you want to retrieve a limited, aggregated data set. For example, retrieving fewer than 1,000 samples for each request helps minimize the risk of performance degradation. ==== -[id="cmo-routes-resources"] +[id="cmo-routes-resources_{context}"] == CMO routes resources === openshift-monitoring/alertmanager-main @@ -50,7 +50,7 @@ Expose the `/api` endpoints of the `thanos-querier` service via a router. Expose the `/api` endpoints of the `thanos-ruler` service via a router. -[id="cmo-services-resources"] +[id="cmo-services-resources_{context}"] == CMO services resources === openshift-monitoring/prometheus-operator-admission-webhook diff --git a/modules/monitoring-retention-time-and-size-for-prometheus-metrics-data.adoc b/modules/monitoring-retention-time-and-size-for-prometheus-metrics-data.adoc new file mode 100644 index 0000000000..0bb05c1806 --- /dev/null +++ b/modules/monitoring-retention-time-and-size-for-prometheus-metrics-data.adoc @@ -0,0 +1,33 @@ +// Module included in the following assemblies: +// +// * observability/monitoring/configuring-the-monitoring-stack.adoc + +:_mod-docs-content-type: CONCEPT + +[id="retention-time-and-size-for-prometheus-metrics-data_{context}"] += Retention time and size for Prometheus metrics + +By default, Prometheus retains metrics data for the following durations: + +* *Core platform monitoring*: 15 days +* *Monitoring for user-defined projects*: 24 hours + +You can modify the retention time for the Prometheus instance to change how soon the data is deleted. You can also set the maximum amount of disk space the retained metrics data uses. If the data reaches this size limit, Prometheus deletes the oldest data first until the disk space used is again below the limit. + +Note the following behaviors of these data retention settings: + +* The size-based retention policy applies to all data block directories in the `/prometheus` directory, including persistent blocks, write-ahead log (WAL) data, and m-mapped chunks. +* Data in the `/wal` and `/head_chunks` directories counts toward the retention size limit, but Prometheus never purges data from these directories based on size- or time-based retention policies. +Thus, if you set a retention size limit lower than the maximum size set for the `/wal` and `/head_chunks` directories, you have configured the system not to retain any data blocks in the `/prometheus` data directories. +* The size-based retention policy is applied only when Prometheus cuts a new data block, which occurs every two hours after the WAL contains at least three hours of data. +* If you do not explicitly define values for either `retention` or `retentionSize`, retention time defaults to 15 days for core platform monitoring and 24 hours for user-defined project monitoring. Retention size is not set. +* If you define values for both `retention` and `retentionSize`, both values apply. +If any data blocks exceed the defined retention time or the defined size limit, Prometheus purges these data blocks. +* If you define a value for `retentionSize` and do not define `retention`, only the `retentionSize` value applies. +* If you do not define a value for `retentionSize` and only define a value for `retention`, only the `retention` value applies. +* If you set the `retentionSize` or `retention` value to `0`, the default settings apply. The default settings set retention time to 15 days for core platform monitoring and 24 hours for user-defined project monitoring. By default, retention size is not set. + +[NOTE] +==== +Data compaction occurs every two hours. Therefore, a persistent volume (PV) might fill up before compaction, potentially exceeding the `retentionSize` limit. In such cases, the `KubePersistentVolumeFillingUp` alert fires until the space on a PV is lower than the `retentionSize` limit. +==== diff --git a/modules/monitoring-reviewing-monitoring-dashboards-admin.adoc b/modules/monitoring-reviewing-monitoring-dashboards-admin.adoc index 13a9abb419..e28ae308c0 100644 --- a/modules/monitoring-reviewing-monitoring-dashboards-admin.adoc +++ b/modules/monitoring-reviewing-monitoring-dashboards-admin.adoc @@ -24,10 +24,10 @@ endif::openshift-dedicated,openshift-rosa[] . Choose a dashboard in the *Dashboard* list. Some dashboards, such as *etcd* and *Prometheus* dashboards, produce additional sub-menus when selected. . Optional: Select a time range for the graphs in the *Time Range* list. -+ + ** Select a pre-defined time period. -+ -** Set a custom time range by selecting *Custom time range* in the *Time Range* list. + +** Set a custom time range by clicking *Custom time range* in the *Time Range* list. + .. Input or select the *From* and *To* dates and times. + @@ -35,4 +35,4 @@ endif::openshift-dedicated,openshift-rosa[] . Optional: Select a *Refresh Interval*. -. Hover over each of the graphs within a dashboard to display detailed information about specific items. +. Hover over each of the graphs within a dashboard to display detailed information about specific items. \ No newline at end of file diff --git a/modules/monitoring-reviewing-monitoring-dashboards-developer.adoc b/modules/monitoring-reviewing-monitoring-dashboards-developer.adoc index 044d50840a..8e2ee61481 100644 --- a/modules/monitoring-reviewing-monitoring-dashboards-developer.adoc +++ b/modules/monitoring-reviewing-monitoring-dashboards-developer.adoc @@ -6,7 +6,12 @@ [id="reviewing-monitoring-dashboards-developer_{context}"] = Reviewing monitoring dashboards as a developer -In the *Developer* perspective, you can view dashboards relating to a selected project. You must have access to monitor a project to view dashboard information for it. +In the *Developer* perspective, you can view dashboards relating to a selected project. + +[NOTE] +==== +In the *Developer* perspective, you can view dashboards for only one project at a time. +==== .Prerequisites @@ -27,10 +32,10 @@ All dashboards produce additional sub-menus when selected, except *Kubernetes / ==== + . Optional: Select a time range for the graphs in the *Time Range* list. -+ + ** Select a pre-defined time period. -+ -** Set a custom time range by selecting *Custom time range* in the *Time Range* list. + +** Set a custom time range by clicking *Custom time range* in the *Time Range* list. + .. Input or select the *From* and *To* dates and times. + diff --git a/modules/monitoring-searching-alerts-silences-and-alerting-rules.adoc b/modules/monitoring-searching-alerts-silences-and-alerting-rules.adoc index 214aa7534e..1ccd821978 100644 --- a/modules/monitoring-searching-alerts-silences-and-alerting-rules.adoc +++ b/modules/monitoring-searching-alerts-silences-and-alerting-rules.adoc @@ -8,7 +8,7 @@ You can filter the alerts, silences, and alerting rules that are displayed in the Alerting UI. This section provides a description of each of the available filtering options. -[discrete] +[id="understanding-alert-filters_{context}"] == Understanding alert filters In the *Administrator* perspective, the *Alerts* page in the Alerting UI provides details about alerts relating to default {product-title} and user-defined projects. The page includes a summary of severity, state, and source for each alert. The time at which an alert went into its current state is also shown. @@ -31,7 +31,7 @@ You can filter by alert state, severity, and source. By default, only *Platform* ** *Platform*. Platform-level alerts relate only to default {product-title} projects. These projects provide core {product-title} functionality. ** *User*. User alerts relate to user-defined projects. These alerts are user-created and are customizable. User-defined workload monitoring can be enabled postinstallation to provide observability into your own workloads. -[discrete] +[id="understanding-silence-filters_{context}"] == Understanding silence filters In the *Administrator* perspective, the *Silences* page in the Alerting UI provides details about silences applied to alerts in default {product-title} and user-defined projects. The page includes a summary of the state of each silence and the time at which a silence ends. @@ -43,7 +43,7 @@ You can filter by silence state. By default, only *Active* and *Pending* silence ** *Pending*. The silence has been scheduled and it is not yet active. ** *Expired*. The silence has expired and notifications will be sent if the conditions for an alert are true. -[discrete] +[id="understanding-alerting-rule-filters_{context}"] == Understanding alerting rule filters In the *Administrator* perspective, the *Alerting rules* page in the Alerting UI provides details about alerting rules relating to default {product-title} and user-defined projects. The page includes a summary of the state, severity, and source for each alerting rule. @@ -67,7 +67,7 @@ You can filter alerting rules by alert state, severity, and source. By default, ** *Platform*. Platform-level alerting rules relate only to default {product-title} projects. These projects provide core {product-title} functionality. ** *User*. User-defined workload alerting rules relate to user-defined projects. These alerting rules are user-created and are customizable. User-defined workload monitoring can be enabled postinstallation to provide observability into your own workloads. -[discrete] +[id="searching-filtering-alerts-dev-perspective_{context}"] == Searching and filtering alerts, silences, and alerting rules in the Developer perspective In the *Developer* perspective, the *Alerts* page in the Alerting UI provides a combined view of alerts and silences relating to the selected project. A link to the governing alerting rule is provided for each displayed alert. diff --git a/modules/monitoring-setting-log-levels-for-monitoring-components.adoc b/modules/monitoring-setting-log-levels-for-monitoring-components.adoc index d21b6635e7..d4c873db78 100644 --- a/modules/monitoring-setting-log-levels-for-monitoring-components.adoc +++ b/modules/monitoring-setting-log-levels-for-monitoring-components.adoc @@ -3,22 +3,33 @@ // * observability/monitoring/configuring-the-monitoring-stack.adoc :_mod-docs-content-type: PROCEDURE + [id="setting-log-levels-for-monitoring-components_{context}"] = Setting log levels for monitoring components -You can configure the log level for -ifndef::openshift-dedicated,openshift-rosa[] -Alertmanager, Prometheus Operator, Prometheus, Thanos Querier, and Thanos Ruler. -endif::openshift-dedicated,openshift-rosa[] -ifdef::openshift-dedicated,openshift-rosa[] -Alertmanager, Prometheus Operator, Prometheus, and Thanos Ruler. -endif::openshift-dedicated,openshift-rosa[] +// Set attributes to distinguish between cluster monitoring example (core platform monitoring - CPM) and user workload monitoring (UWM) examples -The following log levels can be applied to the relevant component in the -ifndef::openshift-dedicated,openshift-rosa[] -`cluster-monitoring-config` and -endif::openshift-dedicated,openshift-rosa[] -`user-workload-monitoring-config` `ConfigMap` objects: +// tag::CPM[] +:configmap-name: cluster-monitoring-config +:namespace-name: openshift-monitoring +:prometheus: prometheusK8s +:alertmanager: alertmanagerMain +:thanos: thanosQuerier +:component-name: Thanos Querier +// end::CPM[] +// tag::UWM[] +:configmap-name: user-workload-monitoring-config +:namespace-name: openshift-user-workload-monitoring +:prometheus: prometheus +:alertmanager: alertmanager +:thanos: thanosRuler +:component-name: Thanos Ruler +// end::UWM[] + +You can configure the log level for Alertmanager, Prometheus Operator, Prometheus, and {component-name}. + + +The following log levels can be applied to the relevant component in the `{configmap-name}` `ConfigMap` object: * `debug`. Log debug, informational, warning, and error messages. * `info`. Log informational, warning, and error messages. @@ -29,103 +40,84 @@ The default log level is `info`. .Prerequisites +// tag::CPM[] +* You have access to the cluster as a user with the `cluster-admin` cluster role. +* You have created the `cluster-monitoring-config` `ConfigMap` object. +// end::CPM[] +// tag::UWM[] ifndef::openshift-dedicated,openshift-rosa[] -* *If you are setting a log level for Alertmanager, Prometheus Operator, Prometheus, or Thanos Querier in the `openshift-monitoring` project*: -** You have access to the cluster as a user with the `cluster-admin` cluster role. -** You have created the `cluster-monitoring-config` `ConfigMap` object. -* *If you are setting a log level for Prometheus Operator, Prometheus, or Thanos Ruler in the `openshift-user-workload-monitoring` project*: -** You have access to the cluster as a user with the `cluster-admin` cluster role, or as a user with the `user-workload-monitoring-config-edit` role in the `openshift-user-workload-monitoring` project. -** A cluster administrator has enabled monitoring for user-defined projects. +* You have access to the cluster as a user with the `cluster-admin` cluster role or as a user with the `user-workload-monitoring-config-edit` role in the `openshift-user-workload-monitoring` project. +* A cluster administrator has enabled monitoring for user-defined projects. endif::openshift-dedicated,openshift-rosa[] + ifdef::openshift-dedicated,openshift-rosa[] * You have access to the cluster as a user with the `dedicated-admin` role. * The `user-workload-monitoring-config` `ConfigMap` object exists. This object is created by default when the cluster is created. endif::openshift-dedicated,openshift-rosa[] +// end::UWM[] * You have installed the OpenShift CLI (`oc`). .Procedure -. Edit the `ConfigMap` object: -ifndef::openshift-dedicated,openshift-rosa[] -** *To set a log level for a component in the `openshift-monitoring` project*: -.. Edit the `cluster-monitoring-config` `ConfigMap` object in the `openshift-monitoring` project: +. Edit the `{configmap-name}` config map in the `{namespace-name}` project: + -[source,terminal] +[source,terminal,subs="attributes+"] ---- -$ oc -n openshift-monitoring edit configmap cluster-monitoring-config +$ oc -n {namespace-name} edit configmap {configmap-name} ---- -.. Add `logLevel: ` for a component under `data/config.yaml`: +. Add `logLevel: ` for a component under `data/config.yaml`: + -[source,yaml] +[source,yaml,subs="attributes+"] ---- apiVersion: v1 kind: ConfigMap metadata: - name: cluster-monitoring-config - namespace: openshift-monitoring + name: {configmap-name} + namespace: {namespace-name} data: config.yaml: | - : <1> - logLevel: <2> + : # <1> + logLevel: # <2> ---- <1> The monitoring stack component for which you are setting a log level. -For default platform monitoring, available component values are `prometheusK8s`, `alertmanagerMain`, `prometheusOperator`, and `thanosQuerier`. +Available component values are `{prometheus}`, `{alertmanager}`, `prometheusOperator`, and `{thanos}`. <2> The log level to set for the component. The available values are `error`, `warn`, `info`, and `debug`. The default value is `info`. -** *To set a log level for a component in the `openshift-user-workload-monitoring` project*: -endif::openshift-dedicated,openshift-rosa[] - -.. Edit the `user-workload-monitoring-config` `ConfigMap` object in the `openshift-user-workload-monitoring` project: -+ -[source,terminal] ----- -$ oc -n openshift-user-workload-monitoring edit configmap user-workload-monitoring-config ----- - -.. Add `logLevel: ` for a component under `data/config.yaml`: -+ -[source,yaml] ----- -apiVersion: v1 -kind: ConfigMap -metadata: - name: user-workload-monitoring-config - namespace: openshift-user-workload-monitoring -data: - config.yaml: | - : <1> - logLevel: <2> ----- -<1> The monitoring stack component for which you are setting a log level. -For user workload monitoring, available component values are `alertmanager`, `prometheus`, `prometheusOperator`, and `thanosRuler`. -<2> The log level to apply to the component. The available values are `error`, `warn`, `info`, and `debug`. The default value is `info`. - . Save the file to apply the changes. The pods affected by the new configuration are automatically redeployed. -. Confirm that the log-level has been applied by reviewing the deployment or pod configuration in the related project. The following example checks the log level in the `prometheus-operator` deployment in the `openshift-user-workload-monitoring` project: +. Confirm that the log level has been applied by reviewing the deployment or pod configuration in the related project. +The following example checks the log level for the `prometheus-operator` deployment: + -[source,terminal] +[source,terminal,subs="attributes+"] ---- -$ oc -n openshift-user-workload-monitoring get deploy prometheus-operator -o yaml | grep "log-level" +$ oc -n {namespace-name} get deploy prometheus-operator -o yaml | grep "log-level" ---- + .Example output -[source,terminal] +[source,terminal,subs="attributes+"] ---- - --log-level=debug ---- -. Check that the pods for the component are running. The following example lists the status of pods in the `openshift-user-workload-monitoring` project: +. Check that the pods for the component are running. The following example lists the status of pods: + -[source,terminal] +[source,terminal,subs="attributes+"] ---- -$ oc -n openshift-user-workload-monitoring get pods +$ oc -n {namespace-name} get pods ---- + [NOTE] ==== If an unrecognized `logLevel` value is included in the `ConfigMap` object, the pods for the component might not restart successfully. ==== + +// Unset the source code block attributes just to be safe. +:!configmap-name: +:!namespace-name: +:!prometheus: +:!alertmanager: +:!thanos: +:!component-name: diff --git a/modules/monitoring-setting-query-log-file-for-prometheus.adoc b/modules/monitoring-setting-query-log-file-for-prometheus.adoc index 5aef6e5a24..5171d28100 100644 --- a/modules/monitoring-setting-query-log-file-for-prometheus.adoc +++ b/modules/monitoring-setting-query-log-file-for-prometheus.adoc @@ -3,14 +3,26 @@ // * observability/monitoring/configuring-the-monitoring-stack.adoc :_mod-docs-content-type: PROCEDURE + [id="setting-query-log-file-for-prometheus_{context}"] = Enabling the query log file for Prometheus -[role="_abstract"] +// Set attributes to distinguish between cluster monitoring example (core platform monitoring - CPM) and user workload monitoring (UWM) examples + +// tag::CPM[] +:configmap-name: cluster-monitoring-config +:namespace-name: openshift-monitoring +:component: prometheusK8s +:pod: prometheus-k8s-0 +// end::CPM[] +// tag::UWM[] +:configmap-name: user-workload-monitoring-config +:namespace-name: openshift-user-workload-monitoring +:component: prometheus +:pod: prometheus-user-workload-0 +// end::UWM[] + You can configure Prometheus to write all queries that have been run by the engine to a log file. -ifndef::openshift-dedicated,openshift-rosa[] -You can do so for default platform monitoring and for user-defined workload monitoring. -endif::openshift-dedicated,openshift-rosa[] [IMPORTANT] ==== @@ -19,61 +31,89 @@ Because log rotation is not supported, only enable this feature temporarily when .Prerequisites +// tag::CPM[] +* You have access to the cluster as a user with the `cluster-admin` cluster role. +* You have created the `cluster-monitoring-config` `ConfigMap` object. +// end::CPM[] +// tag::UWM[] ifndef::openshift-dedicated,openshift-rosa[] -* *If you are enabling the query log file feature for Prometheus in the `openshift-monitoring` project*: -** You have access to the cluster as a user with the `cluster-admin` cluster role. -** You have created the `cluster-monitoring-config` `ConfigMap` object. -* *If you are enabling the query log file feature for Prometheus in the `openshift-user-workload-monitoring` project*: -** You have access to the cluster as a user with the `cluster-admin` cluster role, or as a user with the `user-workload-monitoring-config-edit` role in the `openshift-user-workload-monitoring` project. -** A cluster administrator has enabled monitoring for user-defined projects. +* You have access to the cluster as a user with the `cluster-admin` cluster role or as a user with the `user-workload-monitoring-config-edit` role in the `openshift-user-workload-monitoring` project. +* A cluster administrator has enabled monitoring for user-defined projects. endif::openshift-dedicated,openshift-rosa[] + ifdef::openshift-dedicated,openshift-rosa[] * You have access to the cluster as a user with the `dedicated-admin` role. * The `user-workload-monitoring-config` `ConfigMap` object exists. This object is created by default when the cluster is created. endif::openshift-dedicated,openshift-rosa[] +// end::UWM[] * You have installed the OpenShift CLI (`oc`). .Procedure -ifndef::openshift-dedicated,openshift-rosa[] -** *To set the query log file for Prometheus in the `openshift-monitoring` project*: -. Edit the `cluster-monitoring-config` `ConfigMap` object in the `openshift-monitoring` project: +. Edit the `{configmap-name}` config map in the `{namespace-name}` project: + -[source,terminal] +[source,terminal,subs="attributes+"] ---- -$ oc -n openshift-monitoring edit configmap cluster-monitoring-config +$ oc -n {namespace-name} edit configmap {configmap-name} ---- + +. Add the `queryLogFile` parameter for Prometheus under `data/config.yaml`: + -. Add `queryLogFile: ` for `prometheusK8s` under `data/config.yaml`: -+ -[source,yaml] +[source,yaml,subs="attributes+"] ---- apiVersion: v1 kind: ConfigMap metadata: - name: cluster-monitoring-config - namespace: openshift-monitoring + name: {configmap-name} + namespace: {namespace-name} data: config.yaml: | - prometheusK8s: - queryLogFile: <1> + {component}: + queryLogFile: # <1> ---- -<1> The full path to the file in which queries will be logged. -+ +<1> Add the full path to the file in which queries will be logged. + . Save the file to apply the changes. The pods affected by the new configuration are automatically redeployed. + +. Verify that the pods for the component are running. The following sample command lists the status of pods: + -. Verify that the pods for the component are running. The following sample command lists the status of pods in the `openshift-monitoring` project: +[source,terminal,subs="attributes+"] +---- +$ oc -n {namespace-name} get pods +---- + +// tag::CPM[] +.Example output [source,terminal] ---- -$ oc -n openshift-monitoring get pods +... +prometheus-operator-567c9bc75c-96wkj 2/2 Running 0 62m +prometheus-k8s-0 6/6 Running 1 57m +prometheus-k8s-1 6/6 Running 1 57m +thanos-querier-56c76d7df4-2xkpc 6/6 Running 0 57m +thanos-querier-56c76d7df4-j5p29 6/6 Running 0 57m +... ---- -+ +// end::CPM[] +// tag::UWM[] +.Example output +[source,terminal] +---- +... +prometheus-operator-776fcbbd56-2nbfm 2/2 Running 0 132m +prometheus-user-workload-0 5/5 Running 1 132m +prometheus-user-workload-1 5/5 Running 1 132m +thanos-ruler-user-workload-0 3/3 Running 0 132m +thanos-ruler-user-workload-1 3/3 Running 0 132m +... +---- +// end::UWM[] + . Read the query log: + -[source,terminal] +[source,terminal,subs="attributes+"] ---- -$ oc -n openshift-monitoring exec prometheus-k8s-0 -- cat +$ oc -n {namespace-name} exec {pod} -- cat ---- + [IMPORTANT] @@ -81,48 +121,8 @@ $ oc -n openshift-monitoring exec prometheus-k8s-0 -- cat Revert the setting in the config map after you have examined the logged query information. ==== -** *To set the query log file for Prometheus in the `openshift-user-workload-monitoring` project*: -endif::openshift-dedicated,openshift-rosa[] -. Edit the `user-workload-monitoring-config` `ConfigMap` object in the `openshift-user-workload-monitoring` project: -+ -[source,terminal] ----- -$ oc -n openshift-user-workload-monitoring edit configmap user-workload-monitoring-config ----- -+ -. Add `queryLogFile: ` for `prometheus` under `data/config.yaml`: -+ -[source,yaml] ----- -apiVersion: v1 -kind: ConfigMap -metadata: - name: user-workload-monitoring-config - namespace: openshift-user-workload-monitoring -data: - config.yaml: | - prometheus: - queryLogFile: <1> ----- -<1> The full path to the file in which queries will be logged. -+ -. Save the file to apply the changes. The pods affected by the new configuration are automatically redeployed. -+ -. Verify that the pods for the component are running. The following example command lists the status of pods in the `openshift-user-workload-monitoring` project: -+ -[source,terminal] ----- -$ oc -n openshift-user-workload-monitoring get pods ----- -+ -. Read the query log: -+ -[source,terminal] ----- -$ oc -n openshift-user-workload-monitoring exec prometheus-user-workload-0 -- cat ----- -+ -[IMPORTANT] -==== -Revert the setting in the config map after you have examined the logged query information. -==== +// Unset the source code block attributes just to be safe. +:!configmap-name: +:!namespace-name: +:!component: +:!pod: diff --git a/modules/monitoring-silencing-alerts.adoc b/modules/monitoring-silencing-alerts.adoc index d6674aa83f..0a20e4f2fa 100644 --- a/modules/monitoring-silencing-alerts.adoc +++ b/modules/monitoring-silencing-alerts.adoc @@ -3,13 +3,28 @@ // * observability/monitoring/managing-alerts.adoc :_mod-docs-content-type: PROCEDURE -[id="silencing-alerts_{context}"] -= Silencing alerts + +// The ultimate solution DOES NOT NEED separate IDs and titles, it is just needed for now so that the tests will not break + +// tag::ADM[] +[id="silencing-alerts-adm_{context}"] += Silencing alerts from the Administrator perspective +// end::ADM[] + +// tag::DEV[] +[id="silencing-alerts-dev_{context}"] += Silencing alerts from the Developer perspective +// end::DEV[] You can silence a specific alert or silence alerts that match a specification that you define. .Prerequisites +// tag::ADM[] +* You have access to the cluster as a user with the `cluster-admin` role. +// end::ADM[] + +// tag::DEV[] ifndef::openshift-dedicated,openshift-rosa[] * If you are a cluster administrator, you have access to the cluster as a user with the `cluster-admin` role. endif::openshift-dedicated,openshift-rosa[] @@ -20,12 +35,14 @@ endif::openshift-dedicated,openshift-rosa[] ** The `cluster-monitoring-view` cluster role, which allows you to access Alertmanager. ** The `monitoring-alertmanager-edit` role, which permits you to create and silence alerts in the *Administrator* perspective in the web console. ** The `monitoring-rules-edit` cluster role, which permits you to create and silence alerts in the *Developer* perspective in the web console. +// end::DEV[] .Procedure -To silence a specific alert in the *Administrator* perspective: +// tag::ADM[] +To silence a specific alert: -. Go to *Observe* -> *Alerting* -> *Alerts* in the {product-title} web console. +. From the *Administrator* perspective of the {product-title} web console, go to *Observe* -> *Alerting* -> *Alerts*. . For the alert that you want to silence, click {kebab} and select *Silence alert* to open the *Silence alert* page with a default configuration for the chosen alert. @@ -38,11 +55,30 @@ You must add a comment before saving a silence. . To save the silence, click *Silence*. -To silence a specific alert in the *Developer* perspective: +To silence a set of alerts: -. Go to *Observe* -> ** -> *Alerts* in the {product-title} web console. +. From the *Administrator* perspective of the {product-title} web console, go to *Observe* -> *Alerting* -> *Silences*. -. If necessary, expand the details for the alert by selecting a greater than symbol (*>*) next to the alert name. +. Click *Create silence*. + +. On the *Create silence* page, set the schedule, duration, and label details for an alert. ++ +[NOTE] +==== +You must add a comment before saving a silence. +==== + +. To create silences for alerts that match the labels that you entered, click *Silence*. +// end::ADM[] + +// tag::DEV[] +To silence a specific alert: + +. From the *Developer* perspective of the {product-title} web console, go to *Observe* and go to the *Alerts* tab. + +. Select the project that you want to silence an alert for from the *Project:* list. + +. If necessary, expand the details for the alert by clicking a greater than symbol (*>*) next to the alert name. . Click the alert message in the expanded view to open the *Alert details* page for the alert. @@ -57,24 +93,11 @@ You must add a comment before saving a silence. . To save the silence, click *Silence*. -To silence a set of alerts by creating a silence configuration in the *Administrator* perspective: +To silence a set of alerts: -. Go to *Observe* -> *Alerting* -> *Silences* in the {product-title} web console. +. From the *Developer* perspective of the {product-title} web console, go to *Observe* and go to the *Silences* tab. -. Click *Create silence*. - -. On the *Create silence* page, set the schedule, duration, and label details for an alert. -+ -[NOTE] -==== -You must add a comment before saving a silence. -==== - -. To create silences for alerts that match the labels that you entered, click *Silence*. - -To silence a set of alerts by creating a silence configuration in the *Developer* perspective: - -. Go to *Observe* -> ** -> *Silences* in the {product-title} web console. +. Select the project that you want to silence alerts for from the *Project:* list. . Click *Create silence*. @@ -86,3 +109,4 @@ You must add a comment before saving a silence. ==== . To create silences for alerts that match the labels that you entered, click *Silence*. +// end::DEV[] \ No newline at end of file diff --git a/modules/monitoring-specifying-limits-and-requests-for-monitoring-components.adoc b/modules/monitoring-specifying-limits-and-requests-for-monitoring-components.adoc index 7287f7ecec..89f9a0da4c 100644 --- a/modules/monitoring-specifying-limits-and-requests-for-monitoring-components.adoc +++ b/modules/monitoring-specifying-limits-and-requests-for-monitoring-components.adoc @@ -3,52 +3,69 @@ // * observability/monitoring/configuring-the-monitoring-stack.adoc :_mod-docs-content-type: PROCEDURE + [id="specifying-limits-and-resource-requests-for-monitoring-components_{context}"] -= Specifying limits and requests for monitoring components += Specifying limits and requests -To configure CPU and memory resources, specify values for resource limits and requests in the appropriate `ConfigMap` object for the namespace in which the monitoring component is located: +// Set attributes to distinguish between cluster monitoring example (core platform monitoring - CPM) and user workload monitoring (UWM) examples. +// tag::CPM[] +:configmap-name: cluster-monitoring-config +:namespace-name: openshift-monitoring +:alertmanager: alertmanagerMain +:prometheus: prometheusK8s +:thanos: thanosQuerier +// end::CPM[] +// tag::UWM[] +:configmap-name: user-workload-monitoring-config +:namespace-name: openshift-user-workload-monitoring +:alertmanager: alertmanager +:prometheus: prometheus +:thanos: thanosRuler +// end::UWM[] -* The `cluster-monitoring-config` config map in the `openshift-monitoring` namespace for core platform monitoring -* The `user-workload-monitoring-config` config map in the `openshift-user-workload-monitoring` namespace for components that monitor user-defined projects +To configure CPU and memory resources, specify values for resource limits and requests in the `{configmap-name}` `ConfigMap` object in the `{namespace-name}` namespace. .Prerequisites -* *If you are configuring core platform monitoring components*: -** You have access to the cluster as a user with the `cluster-admin` cluster role. -** You have created a `ConfigMap` object named `cluster-monitoring-config`. -* *If you are configuring components that monitor user-defined projects*: -** You have access to the cluster as a user with the `cluster-admin` cluster role, or as a user with the `user-workload-monitoring-config-edit` role in the `openshift-user-workload-monitoring` project. +// tag::CPM[] +* You have access to the cluster as a user with the `cluster-admin` cluster role. +* You have created the `ConfigMap` object named `cluster-monitoring-config`. +// end::CPM[] + +// tag::UWM[] +* You have access to the cluster as a user with the `cluster-admin` cluster role, or as a user with the `user-workload-monitoring-config-edit` role in the `openshift-user-workload-monitoring` project. +// end::UWM[] * You have installed the OpenShift CLI (`oc`). .Procedure -. To configure core platform monitoring components, edit the `cluster-monitoring-config` config map object in the `openshift-monitoring` namespace: +. Edit the `{configmap-name}` config map in the `{namespace-name}` project: + -[source,terminal] +[source,terminal,subs="attributes+"] ---- -$ oc -n openshift-monitoring edit configmap cluster-monitoring-config +$ oc -n {namespace-name} edit configmap {configmap-name} ---- -. Add values to define resource limits and requests for each core platform monitoring component you want to configure. +. Add values to define resource limits and requests for each component you want to configure. + [IMPORTANT] ==== -Make sure that the value set for a limit is always higher than the value set for a request. +Ensure that the value set for a limit is always higher than the value set for a request. Otherwise, an error will occur, and the container will not run. ==== + -.Example +.Example of setting resource limits and requests + -[source,yaml] +[source,yaml,subs="attributes+"] ---- apiVersion: v1 kind: ConfigMap metadata: - name: cluster-monitoring-config - namespace: openshift-monitoring + name: {configmap-name} + namespace: {namespace-name} data: config.yaml: | - alertmanagerMain: + {alertmanager}: resources: limits: cpu: 500m @@ -56,7 +73,7 @@ data: requests: cpu: 200m memory: 500Mi - prometheusK8s: + {prometheus}: resources: limits: cpu: 500m @@ -64,6 +81,15 @@ data: requests: cpu: 200m memory: 500Mi + {thanos}: + resources: + limits: + cpu: 500m + memory: 1Gi + requests: + cpu: 200m + memory: 500Mi +# tag::CPM[] prometheusOperator: resources: limits: @@ -104,14 +130,6 @@ data: requests: cpu: 200m memory: 500Mi - thanosQuerier: - resources: - limits: - cpu: 500m - memory: 1Gi - requests: - cpu: 200m - memory: 500Mi nodeExporter: resources: limits: @@ -136,10 +154,14 @@ data: requests: cpu: 20m memory: 50Mi +# end::CPM[] ---- . Save the file to apply the changes. The pods affected by the new configuration are automatically redeployed. -[role="_additional-resources"] -.Additional resources -* link:https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#requests-and-limits[Kubernetes requests and limits documentation] +// Unset the source code block attributes just to be safe. +:!configmap-name: +:!namespace-name: +:!alertmanager: +:!prometheus: +:!thanos: \ No newline at end of file diff --git a/modules/monitoring-supported-remote-write-authentication-settings.adoc b/modules/monitoring-supported-remote-write-authentication-settings.adoc index 864c4e70a2..55aff7883b 100644 --- a/modules/monitoring-supported-remote-write-authentication-settings.adoc +++ b/modules/monitoring-supported-remote-write-authentication-settings.adoc @@ -3,7 +3,7 @@ // * observability/monitoring/configuring-the-monitoring-stack.adoc :_mod-docs-content-type: REFERENCE -[id="supported_remote_write_authentication_settings_{context}"] +[id="supported-remote-write-authentication-settings_{context}"] = Supported remote write authentication settings You can use different methods to authenticate with a remote write endpoint. Currently supported authentication methods are AWS Signature Version 4, basic authentication, authorization, OAuth 2.0, and TLS client. The following table provides details about supported authentication methods for use with remote write. diff --git a/modules/monitoring-understanding-alert-routing-for-user-defined-projects.adoc b/modules/monitoring-understanding-alert-routing-for-user-defined-projects.adoc index a53525e3e5..00373dfc23 100644 --- a/modules/monitoring-understanding-alert-routing-for-user-defined-projects.adoc +++ b/modules/monitoring-understanding-alert-routing-for-user-defined-projects.adoc @@ -13,7 +13,7 @@ endif::openshift-dedicated,openshift-rosa[] ifdef::openshift-dedicated,openshift-rosa[] As a `dedicated-admin`, you can enable alert routing for user-defined projects. endif::openshift-dedicated,openshift-rosa[] -With this feature, you can allow users with the **alert-routing-edit** role to configure alert notification routing and receivers for user-defined projects. +With this feature, you can allow users with the `alert-routing-edit` cluster role to configure alert notification routing and receivers for user-defined projects. ifndef::openshift-dedicated,openshift-rosa[] These notifications are routed by the default Alertmanager instance or, if enabled, an optional Alertmanager instance dedicated to user-defined monitoring. endif::openshift-dedicated,openshift-rosa[] @@ -36,7 +36,7 @@ endif::openshift-dedicated,openshift-rosa[] [NOTE] ==== -The following are limitations of alert routing for user-defined projects: +Review the following limitations of alert routing for user-defined projects: * For user-defined alerting rules, user-defined routing is scoped to the namespace in which the resource is defined. For example, a routing configuration in namespace `ns1` only applies to `PrometheusRules` resources in the same namespace. diff --git a/modules/monitoring-understanding-the-monitoring-stack.adoc b/modules/monitoring-understanding-the-monitoring-stack.adoc index 87b1359ec5..e6922b3648 100644 --- a/modules/monitoring-understanding-the-monitoring-stack.adoc +++ b/modules/monitoring-understanding-the-monitoring-stack.adoc @@ -11,11 +11,7 @@ [id="understanding-the-monitoring-stack_{context}"] = Understanding the monitoring stack -The {product-title} -ifdef::openshift-rosa[] -(ROSA) -endif::openshift-rosa[] -monitoring stack is based on the link:https://prometheus.io/[Prometheus] open source project and its wider ecosystem. The monitoring stack includes the following: +The monitoring stack includes the following components: * *Default platform monitoring components*. ifndef::openshift-dedicated,openshift-rosa[] diff --git a/modules/monitoring-using-node-selectors-to-move-monitoring-components.adoc b/modules/monitoring-using-node-selectors-to-move-monitoring-components.adoc index 766176960e..ecbadb1b46 100644 --- a/modules/monitoring-using-node-selectors-to-move-monitoring-components.adoc +++ b/modules/monitoring-using-node-selectors-to-move-monitoring-components.adoc @@ -9,16 +9,15 @@ By using the `nodeSelector` constraint with labeled nodes, you can move any of the monitoring stack components to specific nodes. By doing so, you can control the placement and distribution of the monitoring components across a cluster. -By controlling placement and distribution of monitoring components, you can optimize system resource use, improve performance, and segregate workloads based on specific requirements or policies. +By controlling placement and distribution of monitoring components, you can optimize system resource use, improve performance, and separate workloads based on specific requirements or policies. -[id="how-node-selectors-work-with-other-constraints_{context}"] +[discrete] == How node selectors work with other constraints - If you move monitoring components by using node selector constraints, be aware that other constraints to control pod scheduling might exist for a cluster: * Topology spread constraints might be in place to control pod placement. -* Hard anti-affinity rules are in place for Prometheus, Thanos Querier, Alertmanager, and other monitoring components to ensure that multiple pods for these components are always spread across different nodes and are therefore always highly available. +* Hard anti-affinity rules are in place for Prometheus, Alertmanager, and other monitoring components to ensure that multiple pods for these components are always spread across different nodes and are therefore always highly available. When scheduling pods onto nodes, the pod scheduler tries to satisfy all existing constraints when determining pod placement. That is, all constraints compound when the pod scheduler determines which pods will be placed on which nodes. diff --git a/modules/monitoring-using-pod-topology-spread-constraints-for-monitoring.adoc b/modules/monitoring-using-pod-topology-spread-constraints-for-monitoring.adoc index 9eb16490f8..240c4c20b4 100644 --- a/modules/monitoring-using-pod-topology-spread-constraints-for-monitoring.adoc +++ b/modules/monitoring-using-pod-topology-spread-constraints-for-monitoring.adoc @@ -4,16 +4,12 @@ :_mod-docs-content-type: CONCEPT [id="using-pod-topology-spread-constraints-for-monitoring_{context}"] -= Using pod topology spread constraints for monitoring += About pod topology spread constraints for monitoring -You can use pod topology spread constraints to control how -ifndef::openshift-dedicated,openshift-rosa[] -the monitoring pods -endif::openshift-dedicated,openshift-rosa[] -ifdef::openshift-dedicated,openshift-rosa[] -the pods for user-defined monitoring -endif::openshift-dedicated,openshift-rosa[] -are spread across a network topology when {product-title} pods are deployed in multiple availability zones. +You can use pod topology spread constraints to control how the monitoring pods are spread across a network topology when {product-title} pods are deployed in multiple availability zones. Pod topology spread constraints are suitable for controlling pod scheduling within hierarchical topologies in which nodes are spread across different infrastructure levels, such as regions and zones within those regions. -Additionally, by being able to schedule pods in different zones, you can improve network latency in certain scenarios. \ No newline at end of file +Additionally, by being able to schedule pods in different zones, you can improve network latency in certain scenarios. + +You can configure pod topology spread constraints for all the pods deployed by the {cmo-full} to control how pod replicas are scheduled to nodes across zones. This ensures that the pods are highly available and run more efficiently, because workloads are spread across nodes in different data centers or hierarchical infrastructure zones. + diff --git a/networking/metallb/metallb-troubleshoot-support.adoc b/networking/metallb/metallb-troubleshoot-support.adoc index 837db22619..a16d895d1e 100644 --- a/networking/metallb/metallb-troubleshoot-support.adoc +++ b/networking/metallb/metallb-troubleshoot-support.adoc @@ -25,7 +25,7 @@ include::modules/nw-metallb-metrics.adoc[leveloffset=+1] .Additional resources -* See xref:../../observability/monitoring/managing-metrics.adoc#about-querying-metrics_managing-metrics[Querying metrics] for information about using the monitoring dashboard. +* See xref:../../observability/monitoring/accessing-metrics/accessing-metrics-as-an-administrator.adoc#querying-metrics-for-all-projects-with-mon-dashboard_accessing-metrics-as-an-administrator[Querying metrics for all projects with the monitoring dashboard] for information about using the monitoring dashboard. // Collecting data include::modules/nw-metallb-collecting-data.adoc[leveloffset=+1] diff --git a/networking/networking_operators/ingress-operator.adoc b/networking/networking_operators/ingress-operator.adoc index cdeebd15d8..1eab471946 100644 --- a/networking/networking_operators/ingress-operator.adoc +++ b/networking/networking_operators/ingress-operator.adoc @@ -62,7 +62,7 @@ ifndef::openshift-rosa,openshift-dedicated[] * xref:../../nodes/cma/nodes-cma-autoscaling-custom-install.adoc#nodes-cma-autoscaling-custom-install_nodes-cma-autoscaling-custom-install[Installing the custom metrics autoscaler] -* xref:../../observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc#enabling-monitoring-for-user-defined-projects_enabling-monitoring-for-user-defined-projects[Enabling monitoring for user-defined projects] +* xref:../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-monitoring-for-user-defined-projects-uwm_preparing-to-configure-the-monitoring-stack-uwm[Enabling monitoring for user-defined projects] * xref:../../nodes/cma/nodes-cma-autoscaling-custom-trigger-auth.adoc#nodes-cma-autoscaling-custom-trigger-auth[Understanding custom metrics autoscaler trigger authentications] diff --git a/networking/networking_operators/sr-iov-operator/configuring-sriov-operator.adoc b/networking/networking_operators/sr-iov-operator/configuring-sriov-operator.adoc index db67049c50..41f55cfbd7 100644 --- a/networking/networking_operators/sr-iov-operator/configuring-sriov-operator.adoc +++ b/networking/networking_operators/sr-iov-operator/configuring-sriov-operator.adoc @@ -19,9 +19,8 @@ include::modules/sriov-operator-metrics.adoc[leveloffset=+2] [role="_additional-resources"] .Additional resources -* xref:../../../observability/monitoring/managing-metrics.adoc#about-querying-metrics_managing-metrics[Querying metrics] -* xref:../../../observability/monitoring/managing-metrics.adoc#querying-metrics-for-all-projects-as-an-administrator_managing-metrics[Querying metrics for all projects as a cluster administrator] -* xref:../../../observability/monitoring/managing-metrics.adoc#querying-metrics-for-user-defined-projects-as-a-developer_managing-metrics[Querying metrics for user-defined projects as a developer] +* xref:../../../observability/monitoring/accessing-metrics/accessing-metrics-as-an-administrator.adoc#querying-metrics-for-all-projects-with-mon-dashboard_accessing-metrics-as-an-administrator[Querying metrics for all projects with the monitoring dashboard] +* xref:../../../observability/monitoring/accessing-metrics/accessing-metrics-as-a-developer.adoc#querying-metrics-for-user-defined-projects-with-mon-dashboard_accessing-metrics-as-a-developer[Querying metrics for user-defined projects as a developer] [id="configuring-sriov-operator-next-steps"] == Next steps diff --git a/networking/ptp/ptp-cloud-events-consumer-dev-reference-v2.adoc b/networking/ptp/ptp-cloud-events-consumer-dev-reference-v2.adoc index e483b1084c..5e99fc5620 100644 --- a/networking/ptp/ptp-cloud-events-consumer-dev-reference-v2.adoc +++ b/networking/ptp/ptp-cloud-events-consumer-dev-reference-v2.adoc @@ -50,6 +50,6 @@ include::modules/cnf-monitoring-fast-events-metrics.adoc[leveloffset=+1] [role="_additional-resources"] .Additional resources -* xref:../../observability/monitoring/managing-metrics.adoc#managing-metrics[Managing metrics] +* xref:../../observability/monitoring/accessing-metrics/accessing-metrics-as-a-developer.adoc#accessing-metrics-as-a-developer[Accessing metrics as a developer] include::modules/nw-ptp-operator-metrics-reference.adoc[leveloffset=+1] diff --git a/networking/ptp/ptp-cloud-events-consumer-dev-reference.adoc b/networking/ptp/ptp-cloud-events-consumer-dev-reference.adoc index 0855cf1ecf..a429a3f3e2 100644 --- a/networking/ptp/ptp-cloud-events-consumer-dev-reference.adoc +++ b/networking/ptp/ptp-cloud-events-consumer-dev-reference.adoc @@ -53,6 +53,6 @@ include::modules/cnf-monitoring-fast-events-metrics.adoc[leveloffset=+1] [role="_additional-resources"] .Additional resources -* xref:../../observability/monitoring/managing-metrics.adoc#managing-metrics[Managing metrics] +* xref:../../observability/monitoring/accessing-metrics/accessing-metrics-as-a-developer.adoc#accessing-metrics-as-a-developer[Accessing metrics as a developer] include::modules/nw-ptp-operator-metrics-reference.adoc[leveloffset=+1] diff --git a/observability/distr_tracing/distr_tracing_tempo/distr-tracing-tempo-configuring.adoc b/observability/distr_tracing/distr_tracing_tempo/distr-tracing-tempo-configuring.adoc index 2d66a2dcf6..85ffc53b57 100644 --- a/observability/distr_tracing/distr_tracing_tempo/distr-tracing-tempo-configuring.adoc +++ b/observability/distr_tracing/distr_tracing_tempo/distr-tracing-tempo-configuring.adoc @@ -67,6 +67,6 @@ include::modules/distr-tracing-tempo-configuring-tempostack-metrics-and-alerts.a [role="_additional-resources"] .Additional resources -* xref:../../../observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc#enabling-monitoring-for-user-defined-projects[Enabling monitoring for user-defined projects] +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-monitoring-for-user-defined-projects-uwm_preparing-to-configure-the-monitoring-stack-uwm[Enabling monitoring for user-defined projects] include::modules/distr-tracing-tempo-configuring-tempooperator-metrics-and-alerts.adoc[leveloffset=+2] diff --git a/observability/logging/logging_alerts/custom-logging-alerts.adoc b/observability/logging/logging_alerts/custom-logging-alerts.adoc index 027049a27b..6c7cb78279 100644 --- a/observability/logging/logging_alerts/custom-logging-alerts.adoc +++ b/observability/logging/logging_alerts/custom-logging-alerts.adoc @@ -30,8 +30,12 @@ include::modules/logging-enabling-loki-alerts.adoc[leveloffset=+1] [role="_additional-resources"] [id="additional-resources_custom-logging-alerts"] == Additional resources +ifdef::openshift-dedicated,openshift-rosa[] * xref:../../../observability/monitoring/monitoring-overview.adoc#about-openshift-monitoring[About {product-title} monitoring] - +endif::openshift-dedicated,openshift-rosa[] +ifndef::openshift-dedicated,openshift-rosa[] +* xref:../../../observability/monitoring/about-ocp-monitoring/about-ocp-monitoring.adoc#about-ocp-monitoring[About {product-title} monitoring] +endif::openshift-dedicated,openshift-rosa[] ifdef::openshift-enterprise[] * xref:../../../post_installation_configuration/configuring-alert-notifications.adoc#configuring-alert-notifications[Configuring alert notifications] endif::[] diff --git a/observability/logging/logging_alerts/default-logging-alerts.adoc b/observability/logging/logging_alerts/default-logging-alerts.adoc index 5c6200089c..5f8d0836c4 100644 --- a/observability/logging/logging_alerts/default-logging-alerts.adoc +++ b/observability/logging/logging_alerts/default-logging-alerts.adoc @@ -19,4 +19,9 @@ include::modules/cluster-logging-elasticsearch-rules.adoc[leveloffset=+1] [role="_additional-resources"] [id="additional-resources_default-logging-alerts"] == Additional resources +ifdef::openshift-dedicated,openshift-rosa[] * xref:../../../observability/monitoring/managing-alerts.adoc#modifying-core-platform-alerting-rules_managing-alerts[Modifying core platform alerting rules] +endif::openshift-dedicated,openshift-rosa[] +ifndef::openshift-dedicated,openshift-rosa[] +* xref:../../../observability/monitoring/managing-alerts/managing-alerts-as-an-administrator.adoc#modifying-core-platform-alerting-rules_managing-alerts-as-an-administrator[Modifying core platform alerting rules] +endif::openshift-dedicated,openshift-rosa[] diff --git a/observability/logging/troubleshooting/troubleshooting-logging-alerts.adoc b/observability/logging/troubleshooting/troubleshooting-logging-alerts.adoc index 66d130f11f..d2190f18b7 100644 --- a/observability/logging/troubleshooting/troubleshooting-logging-alerts.adoc +++ b/observability/logging/troubleshooting/troubleshooting-logging-alerts.adoc @@ -12,7 +12,12 @@ include::modules/es-cluster-health-is-red.adoc[leveloffset=+1] [role="_additional-resources"] .Additional resources +ifdef::openshift-dedicated,openshift-rosa[] * xref:../../../observability/monitoring/reviewing-monitoring-dashboards.adoc#reviewing-monitoring-dashboards[Reviewing monitoring dashboards] +endif::openshift-dedicated,openshift-rosa[] +ifndef::openshift-dedicated,openshift-rosa[] +* xref:../../../observability/monitoring/accessing-metrics/accessing-metrics-as-an-administrator.adoc#reviewing-monitoring-dashboards-admin_accessing-metrics-as-an-administrator[Reviewing monitoring dashboards as a cluster administrator] +endif::openshift-dedicated,openshift-rosa[] * link:https://www.elastic.co/guide/en/elasticsearch/reference/7.13/fix-common-cluster-issues.html#fix-red-yellow-cluster-status[Fix a red or yellow cluster status] [id="elasticsearch-cluster-health-is-yellow"] diff --git a/observability/monitoring/about-ocp-monitoring/_attributes b/observability/monitoring/about-ocp-monitoring/_attributes new file mode 120000 index 0000000000..20cc1dcb77 --- /dev/null +++ b/observability/monitoring/about-ocp-monitoring/_attributes @@ -0,0 +1 @@ +../../_attributes/ \ No newline at end of file diff --git a/observability/monitoring/about-ocp-monitoring/about-ocp-monitoring.adoc b/observability/monitoring/about-ocp-monitoring/about-ocp-monitoring.adoc new file mode 100644 index 0000000000..c4db245903 --- /dev/null +++ b/observability/monitoring/about-ocp-monitoring/about-ocp-monitoring.adoc @@ -0,0 +1,26 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +[id="about-ocp-monitoring"] += About {product-title} monitoring +:context: about-ocp-monitoring + +toc::[] + +ifndef::openshift-dedicated,openshift-rosa[] +{product-title} includes a preconfigured, preinstalled, and self-updating monitoring stack that provides monitoring for core platform components. You also have the option to xref:../../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-monitoring-for-user-defined-projects-uwm_preparing-to-configure-the-monitoring-stack-uwm[enable monitoring for user-defined projects]. + +A cluster administrator can xref:../../../observability/monitoring/configuring-core-platform-monitoring/preparing-to-configure-the-monitoring-stack.adoc#preparing-to-configure-the-monitoring-stack[configure the monitoring stack] with the supported configurations. {product-title} delivers monitoring best practices out of the box. + +A set of alerts are included by default that immediately notify administrators about issues with a cluster. Default dashboards in the {product-title} web console include visual representations of cluster metrics to help you to quickly understand the state of your cluster. With the {product-title} web console, you can xref:../../../observability/monitoring/accessing-metrics/accessing-metrics-as-an-administrator.adoc#accessing-metrics-as-an-administrator[access metrics] and xref:../../../observability/monitoring/managing-alerts/managing-alerts-as-an-administrator.adoc#managing-alerts-as-an-administrator[manage alerts]. + +After installing {product-title}, cluster administrators can optionally enable monitoring for user-defined projects. By using this feature, cluster administrators, developers, and other users can specify how services and pods are monitored in their own projects. +As a cluster administrator, you can find answers to common problems such as user metrics unavailability and high consumption of disk space by Prometheus in xref:../../../observability/monitoring/troubleshooting-monitoring-issues.adoc#troubleshooting-monitoring-issues[Troubleshooting monitoring issues]. +endif::openshift-dedicated,openshift-rosa[] + +ifdef::openshift-dedicated,openshift-rosa[] +In {product-title}, you can monitor your own projects in isolation from Red{nbsp}Hat Site Reliability Engineering (SRE) platform metrics. You can monitor your own projects without the need for an additional monitoring solution. +endif::openshift-dedicated,openshift-rosa[] + + + + diff --git a/observability/monitoring/about-ocp-monitoring/images b/observability/monitoring/about-ocp-monitoring/images new file mode 120000 index 0000000000..847b03ed05 --- /dev/null +++ b/observability/monitoring/about-ocp-monitoring/images @@ -0,0 +1 @@ +../../images/ \ No newline at end of file diff --git a/observability/monitoring/about-ocp-monitoring/key-concepts.adoc b/observability/monitoring/about-ocp-monitoring/key-concepts.adoc new file mode 100644 index 0000000000..51388771d2 --- /dev/null +++ b/observability/monitoring/about-ocp-monitoring/key-concepts.adoc @@ -0,0 +1,131 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +[id="key-concepts"] += Understanding the monitoring stack - key concepts +:context: key-concepts + +toc::[] + +Get familiar with the {product-title} monitoring concepts and terms. Learn about how you can improve performance and scale of your cluster, store and record data, manage metrics and alerts, and more. + +[id="about-performance-and-scalability_{context}"] +== About performance and scalability + +You can optimize the performance and scale of your clusters. +You can configure the default monitoring stack by performing any of the following actions: + +* Control the placement and distribution of monitoring components: +** Use node selectors to move components to specific nodes. +** Assign tolerations to enable moving components to tainted nodes. +* Use pod topology spread constraints. +* Set the body size limit for metrics scraping. +* Manage CPU and memory resources. +* Use metrics collection profiles. + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/configuring-core-platform-monitoring/configuring-performance-and-scalability.adoc#configuring-performance-and-scalability[Configuring performance and scalability for core platform monitoring] +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/configuring-performance-and-scalability-uwm.adoc#configuring-performance-and-scalability-uwm[Configuring performance and scalability for user workload monitoring] + +include::modules/monitoring-using-node-selectors-to-move-monitoring-components.adoc[leveloffset=+2] + +include::modules/monitoring-using-pod-topology-spread-constraints-for-monitoring.adoc[leveloffset=+2] + +include::modules/monitoring-about-specifying-limits-and-requests-for-monitoring-components.adoc[leveloffset=+2] + +include::modules/monitoring-configuring-metrics-collection-profiles.adoc[leveloffset=+2] + +[id="about-storing-and-recording-data_{context}"] +== About storing and recording data + +You can store and record data to help you protect the data and use them for troubleshooting. +You can configure the default monitoring stack by performing any of the following actions: + +* Configure persistent storage: +** Protect your metrics and alerting data from data loss by storing them in a persistent volume (PV). As a result, they can survive pods being restarted or recreated. +** Avoid getting duplicate notifications and losing silences for alerts when the Alertmanager pods are restarted. +* Modify the retention time and size for Prometheus and Thanos Ruler metrics data. +* Configure logging to help you troubleshoot issues with your cluster: +** Configure audit logs for Metrics Server. +** Set log levels for monitoring. +** Enable the query logging for Prometheus and Thanos Querier. + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/configuring-core-platform-monitoring/storing-and-recording-data.adoc#storing-and-recording-data[Storing and recording data for core platform monitoring] +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/storing-and-recording-data-uwm.adoc#storing-and-recording-data-uwm[Storing and recording data for user workload monitoring] + +include::modules/monitoring-retention-time-and-size-for-prometheus-metrics-data.adoc[leveloffset=+2] + +// Understanding metrics +include::modules/monitoring-understanding-metrics.adoc[leveloffset=+1] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/configuring-core-platform-monitoring/configuring-metrics.adoc#configuring-metrics[Configuring metrics for core platform monitoring] +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/configuring-metrics-uwm.adoc#configuring-metrics-uwm[Configuring metrics for user workload monitoring] +* xref:../../../observability/monitoring/accessing-metrics/accessing-metrics-as-an-administrator.adoc#accessing-metrics-as-an-administrator[Accessing metrics as an administrator] +* xref:../../../observability/monitoring/accessing-metrics/accessing-metrics-as-a-developer.adoc#accessing-metrics-as-a-developer[Accessing metrics as a developer] + +include::modules/monitoring-controlling-the-impact-of-unbound-attributes-in-user-defined-projects.adoc[leveloffset=+2] +include::modules/monitoring-adding-cluster-id-labels-to-metrics.adoc[leveloffset=+2] + +//About monitoring dashboards +[id="about-monitoring-dashboards_{context}"] +== About monitoring dashboards + +{product-title} provides a set of monitoring dashboards that help you understand the state of cluster components and user-defined workloads. + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/accessing-metrics/accessing-metrics-as-an-administrator.adoc#reviewing-monitoring-dashboards-admin_accessing-metrics-as-an-administrator[Reviewing monitoring dashboards as a cluster administrator] +* xref:../../../observability/monitoring/accessing-metrics/accessing-metrics-as-a-developer.adoc#reviewing-monitoring-dashboards-developer_accessing-metrics-as-a-developer[Reviewing monitoring dashboards as a developer] + +include::modules/monitoring-about-monitoring-dashboards.adoc[leveloffset=+2] + +//Managing alerts +include::modules/monitoring-about-managing-alerts.adoc[leveloffset=+1] + +[role="_additional-resources"] +.Additional resources + + +* xref:../../../observability/monitoring/configuring-core-platform-monitoring/configuring-alerts-and-notifications.adoc#configuring-alerts-and-notifications[Configuring alerts and notifications for core platform monitoring] +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/configuring-alerts-and-notifications-uwm.adoc#configuring-alerts-and-notifications-uwm[Configuring alerts and notifications for user workload monitoring] +* xref:../../../observability/monitoring/managing-alerts/managing-alerts-as-an-administrator.adoc#managing-alerts-as-an-administrator[Managing alerts as an Administrator] +* xref:../../../observability/monitoring/managing-alerts/managing-alerts-as-a-developer.adoc#managing-alerts-as-a-developer[Managing alerts as a Developer] + +include::modules/monitoring-managing-silences.adoc[leveloffset=+2] + +include::modules/monitoring-managing-core-platform-alerting-rules.adoc[leveloffset=+2] +include::modules/monitoring-tips-for-optimizing-alerting-rules-for-core-platform-monitoring.adoc[leveloffset=+2] + +include::modules/monitoring-about-creating-alerting-rules-for-user-defined-projects.adoc[leveloffset=+2] +include::modules/monitoring-managing-alerting-rules-for-user-defined-projects.adoc[leveloffset=+2] +include::modules/monitoring-optimizing-alerting-for-user-defined-projects.adoc[leveloffset=+2] + +include::modules/monitoring-searching-alerts-silences-and-alerting-rules.adoc[leveloffset=+2] + + +// Overview of setting up alert routing for user-defined projects +include::modules/monitoring-understanding-alert-routing-for-user-defined-projects.adoc[leveloffset=+1] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-alert-routing-for-user-defined-projects_preparing-to-configure-the-monitoring-stack-uwm[Enabling alert routing for user-defined projects] + +// Sending notifications to external systems +include::modules/monitoring-sending-notifications-to-external-systems.adoc[leveloffset=+1] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/configuring-core-platform-monitoring/configuring-alerts-and-notifications.adoc#configuring-alert-notifications_configuring-alerts-and-notifications[Configuring alert notifications for core platform monitoring] +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/configuring-alerts-and-notifications-uwm.adoc#configuring-alert-notifications_configuring-alerts-and-notifications-uwm[Configuring alert notifications for user workload monitoring] + + diff --git a/observability/monitoring/about-ocp-monitoring/modules b/observability/monitoring/about-ocp-monitoring/modules new file mode 120000 index 0000000000..36719b9de7 --- /dev/null +++ b/observability/monitoring/about-ocp-monitoring/modules @@ -0,0 +1 @@ +../../modules/ \ No newline at end of file diff --git a/observability/monitoring/about-ocp-monitoring/monitoring-stack-architecture.adoc b/observability/monitoring/about-ocp-monitoring/monitoring-stack-architecture.adoc new file mode 100644 index 0000000000..2f84137c38 --- /dev/null +++ b/observability/monitoring/about-ocp-monitoring/monitoring-stack-architecture.adoc @@ -0,0 +1,54 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +[id="monitoring-stack-architecture"] += Monitoring stack architecture +:context: monitoring-stack-architecture + +toc::[] + +The {product-title} +ifdef::openshift-rosa[] +(ROSA) +endif::openshift-rosa[] +monitoring stack is based on the link:https://prometheus.io/[Prometheus] open source project and its wider ecosystem. The monitoring stack includes default monitoring components and components for monitoring user-defined projects. + +// Understanding the monitoring stack +include::modules/monitoring-understanding-the-monitoring-stack.adoc[leveloffset=+1] +ifndef::openshift-dedicated,openshift-rosa[] +//Default monitoring components +include::modules/monitoring-default-monitoring-components.adoc[leveloffset=+1] +include::modules/monitoring-default-monitoring-targets.adoc[leveloffset=+2] +[role="_additional-resources"] +.Additional resources +* xref:../../../observability/monitoring/accessing-metrics/accessing-metrics-as-an-administrator.adoc#getting-detailed-information-about-a-target_accessing-metrics-as-an-administrator[Getting detailed information about a metrics target] +endif::openshift-dedicated,openshift-rosa[] + +//Components for monitoring user-defined projects +include::modules/monitoring-components-for-monitoring-user-defined-projects.adoc[leveloffset=+1] +include::modules/monitoring-targets-for-user-defined-projects.adoc[leveloffset=+2] + +//The monitoring stack in high-availability clusters +include::modules/monitoring-monitoring-stack-in-ha-clusters.adoc[leveloffset=+1] +[role="_additional-resources"] +.Additional resources +* xref:../../../operators/operator_sdk/osdk-ha-sno.adoc#osdk-ha-sno[High-availability or single-node cluster detection and support] +* xref:../../../observability/monitoring/configuring-core-platform-monitoring/storing-and-recording-data.adoc#configuring-persistent-storage_storing-and-recording-data[Configuring persistent storage] +* xref:../../../observability/monitoring/configuring-core-platform-monitoring/configuring-performance-and-scalability.adoc#configuring-performance-and-scalability[Configuring performance and scalability] + +//Glossary of common terms for OCP monitoring +include::modules/monitoring-common-terms.adoc[leveloffset=+1] + +ifndef::openshift-dedicated,openshift-rosa[] +[role="_additional-resources"] +[id="additional-resources_{context}"] +== Additional resources +* xref:../../../support/remote_health_monitoring/about-remote-health-monitoring.adoc#about-remote-health-monitoring[About remote health monitoring] +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#granting-users-permission-to-monitor-user-defined-projects_preparing-to-configure-the-monitoring-stack-uwm[Granting users permissions for monitoring for user-defined projects] +* xref:../../../security/tls-security-profiles.adoc#tls-security-profiles[Configuring TLS security profiles] +endif::openshift-dedicated,openshift-rosa[] + + + + + + diff --git a/observability/monitoring/about-ocp-monitoring/snippets b/observability/monitoring/about-ocp-monitoring/snippets new file mode 120000 index 0000000000..5a3f5add14 --- /dev/null +++ b/observability/monitoring/about-ocp-monitoring/snippets @@ -0,0 +1 @@ +../../snippets/ \ No newline at end of file diff --git a/observability/monitoring/accessing-metrics/_attributes b/observability/monitoring/accessing-metrics/_attributes new file mode 120000 index 0000000000..20cc1dcb77 --- /dev/null +++ b/observability/monitoring/accessing-metrics/_attributes @@ -0,0 +1 @@ +../../_attributes/ \ No newline at end of file diff --git a/observability/monitoring/accessing-metrics/accessing-metrics-as-a-developer.adoc b/observability/monitoring/accessing-metrics/accessing-metrics-as-a-developer.adoc new file mode 100644 index 0000000000..348aaf79e1 --- /dev/null +++ b/observability/monitoring/accessing-metrics/accessing-metrics-as-a-developer.adoc @@ -0,0 +1,37 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +[id="accessing-metrics-as-a-developer"] += Accessing metrics as a developer +:context: accessing-metrics-as-a-developer + +toc::[] + +You can access metrics to monitor the performance of your cluster workloads. + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#understanding-metrics_key-concepts[Understanding metrics] + +//Viewing a list of available metrics +include::modules/monitoring-viewing-a-list-of-available-metrics.adoc[leveloffset=+1] + +//Querying metrics for user-defined projects with the OCP web console +include::modules/monitoring-querying-metrics-for-user-defined-projects-with-mon-dashboard.adoc[leveloffset=+1] + +[role="_additional-resources"] +.Additional resources + +* link:https://prometheus.io/docs/prometheus/latest/querying/basics/[Querying Prometheus] (Prometheus documentation) + +//Reviewing monitoring dashboards as a developer +include::modules/monitoring-reviewing-monitoring-dashboards-developer.adoc[leveloffset=+1] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#about-monitoring-dashboards_key-concepts[About monitoring dashboards] +* xref:../../../applications/odc-monitoring-project-and-application-metrics-using-developer-perspective.adoc#monitoring-project-and-application-metrics-using-developer-perspective[Monitoring project and application metrics using the Developer perspective] + + + diff --git a/observability/monitoring/accessing-metrics/accessing-metrics-as-an-administrator.adoc b/observability/monitoring/accessing-metrics/accessing-metrics-as-an-administrator.adoc new file mode 100644 index 0000000000..d552bf30d1 --- /dev/null +++ b/observability/monitoring/accessing-metrics/accessing-metrics-as-an-administrator.adoc @@ -0,0 +1,37 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +[id="accessing-metrics-as-an-administrator"] += Accessing metrics as an administrator +:context: accessing-metrics-as-an-administrator + +toc::[] + +You can access metrics to monitor the performance of cluster components and your workloads. + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#understanding-metrics_key-concepts[Understanding metrics] + +//Viewing a list of available metrics +include::modules/monitoring-viewing-a-list-of-available-metrics.adoc[leveloffset=+1] + +//Querying metrics for all projects with the OCP web console +include::modules/monitoring-querying-metrics-for-all-projects-with-mon-dashboard.adoc[leveloffset=+1] + +[role="_additional-resources"] +.Additional resources + +* link:https://prometheus.io/docs/prometheus/latest/querying/basics/[Querying Prometheus] (Prometheus documentation) + +//Getting detailed information about a metrics target +include::modules/monitoring-getting-detailed-information-about-a-target.adoc[leveloffset=+1] + +//Reviewing monitoring dashboards as a cluster administrator +include::modules/monitoring-reviewing-monitoring-dashboards-admin.adoc[leveloffset=+1] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#about-monitoring-dashboards_key-concepts[About monitoring dashboards] + diff --git a/observability/monitoring/accessing-metrics/accessing-monitoring-apis-by-using-the-cli.adoc b/observability/monitoring/accessing-metrics/accessing-monitoring-apis-by-using-the-cli.adoc new file mode 100644 index 0000000000..ac3bd4e24c --- /dev/null +++ b/observability/monitoring/accessing-metrics/accessing-monitoring-apis-by-using-the-cli.adoc @@ -0,0 +1,52 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +[id="accessing-monitoring-apis-by-using-the-cli"] += Accessing monitoring APIs by using the CLI +:context: accessing-monitoring-apis-by-using-the-cli + +toc::[] + +In {product-title}, you can access web service APIs for some monitoring components from the command-line interface (CLI). + +[IMPORTANT] +==== +In certain situations, accessing API endpoints can degrade the performance and scalability of your cluster, especially if you use endpoints to retrieve, send, or query large amounts of metrics data. + +To avoid these issues, consider the following recommendations: + +* Avoid querying endpoints frequently. Limit queries to a maximum of one every 30 seconds. +* Do not retrieve all metrics data through the `/federate` endpoint for Prometheus. Query the endpoint only when you want to retrieve a limited, aggregated data set. For example, retrieving fewer than 1,000 samples for each request helps minimize the risk of performance degradation. +==== + +// About accessing monitoring web service APIs +include::modules/monitoring-about-accessing-monitoring-web-service-apis.adoc[leveloffset=+1] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/accessing-metrics/accessing-metrics-as-an-administrator.adoc#reviewing-monitoring-dashboards-admin_accessing-metrics-as-an-administrator[Reviewing monitoring dashboards as a cluster administrator] +* xref:../../../observability/monitoring/accessing-metrics/accessing-metrics-as-a-developer.adoc#reviewing-monitoring-dashboards-developer_accessing-metrics-as-a-developer[Reviewing monitoring dashboards as a developer] + +// Accessing a monitoring web service API +include::modules/monitoring-accessing-third-party-monitoring-web-service-apis.adoc[leveloffset=+1] + +// Querying metrics by using the federation endpoint for Prometheus +include::modules/monitoring-querying-metrics-by-using-the-federation-endpoint-for-prometheus.adoc[leveloffset=+1] + +// Accessing metrics from outside the cluster for custom applications +include::modules/accessing-metrics-outside-cluster.adoc[leveloffset=+1] + +// Resources reference for the Cluster Monitoring Operator +include::modules/monitoring-resources-reference-for-the-cluster-monitoring-operator.adoc[leveloffset=+1] + +[role="_additional-resources"] +[id="additional-resources_{context}"] +== Additional resources + +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-monitoring-for-user-defined-projects-uwm_preparing-to-configure-the-monitoring-stack-uwm[Enabling monitoring for user-defined projects] +* xref:../../../observability/monitoring/configuring-core-platform-monitoring/configuring-metrics.adoc#configuring-remote-write-storage_configuring-metrics[Configuring remote write storage for core platform monitoring] +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/configuring-metrics-uwm.adoc#configuring-remote-write-storage_configuring-metrics-uwm[Configuring remote write storage for monitoring of user-defined projects] +* xref:../../../observability/monitoring/accessing-metrics/accessing-metrics-as-an-administrator.adoc#accessing-metrics-as-an-administrator[Accessing metrics as an administrator] +* xref:../../../observability/monitoring/accessing-metrics/accessing-metrics-as-a-developer.adoc#accessing-metrics-as-a-developer[Accessing metrics as a developer] +* xref:../../../observability/monitoring/managing-alerts/managing-alerts-as-an-administrator.adoc#managing-alerts-as-an-administrator[Managing alerts as an Administrator] +* xref:../../../observability/monitoring/managing-alerts/managing-alerts-as-a-developer.adoc#managing-alerts-as-a-developer[Managing alerts as a Developer] diff --git a/observability/monitoring/accessing-metrics/images b/observability/monitoring/accessing-metrics/images new file mode 120000 index 0000000000..847b03ed05 --- /dev/null +++ b/observability/monitoring/accessing-metrics/images @@ -0,0 +1 @@ +../../images/ \ No newline at end of file diff --git a/observability/monitoring/accessing-metrics/modules b/observability/monitoring/accessing-metrics/modules new file mode 120000 index 0000000000..36719b9de7 --- /dev/null +++ b/observability/monitoring/accessing-metrics/modules @@ -0,0 +1 @@ +../../modules/ \ No newline at end of file diff --git a/observability/monitoring/accessing-metrics/snippets b/observability/monitoring/accessing-metrics/snippets new file mode 120000 index 0000000000..5a3f5add14 --- /dev/null +++ b/observability/monitoring/accessing-metrics/snippets @@ -0,0 +1 @@ +../../snippets/ \ No newline at end of file diff --git a/observability/monitoring/accessing-third-party-monitoring-apis.adoc b/observability/monitoring/accessing-third-party-monitoring-apis.adoc index afa25fea4e..4d45fe011b 100644 --- a/observability/monitoring/accessing-third-party-monitoring-apis.adoc +++ b/observability/monitoring/accessing-third-party-monitoring-apis.adoc @@ -2,12 +2,11 @@ [id="accessing-third-party-monitoring-apis"] = Accessing monitoring APIs by using the CLI include::_attributes/common-attributes.adoc[] -:context: accessing-monitoring-apis-by-using-the-cli +:context: accessing-third-party-monitoring-apis toc::[] -[role="_abstract"] -In {product-title} {product-version}, you can access web service APIs for some monitoring components from the command line interface (CLI). +In {product-title}, you can access web service APIs for some monitoring components from the command line interface (CLI). [IMPORTANT] ==== @@ -22,6 +21,7 @@ To avoid these issues, follow these recommendations: // Accessing service APIs for third-party monitoring components include::modules/monitoring-about-accessing-monitoring-web-service-apis.adoc[leveloffset=+1] +[role="_additional-resources"] .Additional resources * xref:../../observability/monitoring/reviewing-monitoring-dashboards.adoc#reviewing-monitoring-dashboards[Reviewing monitoring dashboards] @@ -36,13 +36,12 @@ include::modules/accessing-metrics-outside-cluster.adoc[leveloffset=+1] // Resources reference for accessing API endpoints include::modules/monitoring-resources-reference-for-the-cluster-monitoring-operator.adoc[leveloffset=+1] - [role="_additional-resources"] -[id="additional-resources_accessing-monitoring-apis-by-using-the-cli"] +[id="additional-resources_{context}"] == Additional resources ifndef::openshift-dedicated,openshift-rosa[] -* xref:../../observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc#enabling-monitoring-for-user-defined-projects_enabling-monitoring-for-user-defined-projects[Enabling monitoring for user-defined projects] +* xref:../../observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc#enabling-monitoring-for-user-defined-projects-uwm_enabling-monitoring-for-user-defined-projects[Enabling monitoring for user-defined projects] endif::openshift-dedicated,openshift-rosa[] * xref:../../observability/monitoring/configuring-the-monitoring-stack.adoc#configuring-remote-write-storage_configuring-the-monitoring-stack[Configuring remote write storage] * xref:../../observability/monitoring/managing-metrics.adoc#managing-metrics[Managing metrics] diff --git a/observability/monitoring/common-monitoring-configuration-scenarios.adoc b/observability/monitoring/common-monitoring-configuration-scenarios.adoc index 3b6714f6cc..03cbdb91b4 100644 --- a/observability/monitoring/common-monitoring-configuration-scenarios.adoc +++ b/observability/monitoring/common-monitoring-configuration-scenarios.adoc @@ -28,7 +28,7 @@ Any other configuration options listed here are optional. * xref:../../observability/monitoring/configuring-the-monitoring-stack.adoc#creating-cluster-monitoring-configmap_configuring-the-monitoring-stack[Create the `cluster-monitoring-config` `ConfigMap` object] if it does not exist. * xref:../../observability/monitoring/managing-alerts.adoc#sending-notifications-to-external-systems_managing-alerts[Configure alert receivers] so that Alertmanager can send alerts to an external notification system such as email, Slack, or PagerDuty. -* xref:../../observability/monitoring/managing-alerts.adoc#configuring-notifications-for-default-platform-alerts_managing-alerts[Configure notifications for default platform alerts]. +* xref:../../observability/monitoring/managing-alerts.adoc#configuring-alert-routing-default-platform-alerts_managing-alerts[Configure notifications for default platform alerts]. * For shorter term data retention, xref:../../observability/monitoring/configuring-the-monitoring-stack.adoc#configuring-persistent-storage_configuring-the-monitoring-stack[configure persistent storage] for Prometheus and Alertmanager to store metrics and alert data. Specify the metrics data retention parameters for Prometheus and Thanos Ruler. + @@ -57,7 +57,7 @@ With the monitoring stack configured to suit your needs, Prometheus collects met You can go to the *Observe* pages in the {product-title} web console to view and query collected metrics, manage alerts, identify performance bottlenecks, and scale resources as needed: * xref:../../observability/monitoring/reviewing-monitoring-dashboards.adoc#reviewing-monitoring-dashboards[View dashboards] to visualize collected metrics, troubleshoot alerts, and monitor other information about your cluster. -* xref:../../observability/monitoring/managing-metrics.adoc#about-querying-metrics_managing-metrics[Query collected metrics] by creating PromQL queries or using predefined queries. +* xref:../../observability/monitoring/managing-metrics.adoc#querying-metrics-for-all-projects-with-mon-dashboard_managing-metrics[Query collected metrics] by creating PromQL queries or using predefined queries. [id="configuring-monitoring-for-user-defined-projects-getting-started_{context}"] == Configuring monitoring for user-defined projects: Getting started @@ -67,20 +67,20 @@ Non-administrator users such as developers can then monitor their own projects o Cluster administrators typically complete the following activities to configure user-defined projects so that users can view collected metrics, query these metrics, and receive alerts for their own projects: -* xref:../../observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc#enabling-monitoring-for-user-defined-projects_enabling-monitoring-for-user-defined-projects[Enable user-defined projects]. +* xref:../../observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc#enabling-monitoring-for-user-defined-projects-uwm_enabling-monitoring-for-user-defined-projects[Enable user-defined projects]. * xref:../../observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc#granting-users-permission-to-monitor-user-defined-projects_enabling-monitoring-for-user-defined-projects[Assign the `monitoring-rules-view`, `monitoring-rules-edit`, or `monitoring-edit` cluster roles] to grant non-administrator users permissions to monitor user-defined projects. -* xref:../../observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc#granting-users-permission-to-configure-monitoring-for-user-defined-projects_enabling-monitoring-for-user-defined-projects[Assign the `user-workload-monitoring-config-edit` role] to grant non-administrator users permission to configure user-defined projects. +* xref:../../observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc#granting-users-permission-to-configure-monitoring-for-user-defined-projects_enabling-monitoring-for-user-defined-projects[Grant non-administrator users permission to configure user-defined projects] by assigning the `user-workload-monitoring-config-edit` role. * xref:../../observability/monitoring/enabling-alert-routing-for-user-defined-projects.adoc#enabling-alert-routing-for-user-defined-projects[Enable alert routing for user-defined projects] so that developers and other users can configure custom alerts and alert routing for their projects. * If needed, configure alert routing for user-defined projects to xref:../../observability/monitoring/enabling-alert-routing-for-user-defined-projects.adoc#enabling-a-separate-alertmanager-instance-for-user-defined-alert-routing_enabling-alert-routing-for-user-defined-projects[use an optional Alertmanager instance dedicated for use only by user-defined projects]. * xref:../../observability/monitoring/managing-alerts.adoc#configuring-different-alert-receivers-for-default-platform-alerts-and-user-defined-alerts_managing-alerts[Configure alert receivers] for user-defined projects. -* xref:../../observability/monitoring/managing-alerts.adoc#configuring-notifications-for-user-defined-alerts_managing-alerts[Configure notifications for user-defined alerts]. +* xref:../../observability/monitoring/managing-alerts.adoc#configuring-alert-routing-user-defined-alerts-secret_managing-alerts[Configure notifications for user-defined alerts]. After monitoring for user-defined projects is enabled and configured, developers and other non-administrator users can then perform the following activities to set up and use monitoring for their own projects: * xref:../../observability/monitoring/managing-metrics.adoc#setting-up-metrics-collection-for-user-defined-projects_managing-metrics[Deploy and monitor services]. * xref:../../observability/monitoring/managing-alerts.adoc#creating-alerting-rules-for-user-defined-projects_managing-alerts[Create and manage alerting rules]. * xref:../../observability/monitoring/managing-alerts.adoc#managing-alerts[Receive and manage alerts] for their projects. -* If granted the `user-workload-monitoring-config-edit` role, xref:../../observability/monitoring/managing-alerts.adoc#creating-alert-routing-for-user-defined-projects_managing-alerts[configure alert routing]. +* If granted the `user-workload-monitoring-config-edit` role, xref:../../observability/monitoring/managing-alerts.adoc#configuring-alert-routing-for-user-defined-projects_managing-alerts[configure alert routing]. * Use the {product-title} web console to xref:../../observability/monitoring/reviewing-monitoring-dashboards.adoc#reviewing-monitoring-dashboards-developer_reviewing-monitoring-dashboards[view dashboards]. -* xref:../../observability/monitoring/managing-metrics.adoc#querying-metrics-for-user-defined-projects-as-a-developer_managing-metrics[Query the collected metrics] by creating PromQL queries or using predefined queries. +* xref:../../observability/monitoring/managing-metrics.adoc#querying-metrics-for-user-defined-projects-with-mon-dashboard_managing-metrics[Query the collected metrics] by creating PromQL queries or using predefined queries. diff --git a/observability/monitoring/config-map-reference-for-the-cluster-monitoring-operator.adoc b/observability/monitoring/config-map-reference-for-the-cluster-monitoring-operator.adoc index 8bb5ab53fd..54baec8e1e 100644 --- a/observability/monitoring/config-map-reference-for-the-cluster-monitoring-operator.adoc +++ b/observability/monitoring/config-map-reference-for-the-cluster-monitoring-operator.adoc @@ -32,7 +32,14 @@ The configuration file is always defined under the `config.yaml` key in the conf ==== * Not all configuration parameters for the monitoring stack are exposed. Only the parameters and fields listed in this reference are supported for configuration. -For more information about supported configurations, see xref:../monitoring/configuring-the-monitoring-stack.adoc#maintenance-and-support_configuring-the-monitoring-stack[Maintenance and support for monitoring]. +For more information about supported configurations, see +ifndef::openshift-dedicated,openshift-rosa[] +* xref:../../observability/monitoring/getting-started/maintenance-and-support-for-monitoring.adoc#maintenance-and-support-for-monitoring[Maintenance and support for monitoring] +endif::openshift-dedicated,openshift-rosa[] +ifdef::openshift-dedicated,openshift-rosa[] +xref:../../observability/monitoring/configuring-the-monitoring-stack.adoc#maintenance-and-support_configuring-the-monitoring-stack[Maintenance and support for monitoring]. +endif::openshift-dedicated,openshift-rosa[] + * Configuring cluster monitoring is optional. * If a configuration does not exist or is empty, default values are used. * If the configuration has invalid YAML data, or if it contains unsupported or duplicated fields that bypassed early validation, the Cluster Monitoring Operator stops reconciling the resources and reports the `Degraded=True` status in the status conditions of the Operator. diff --git a/observability/monitoring/configuring-core-platform-monitoring/_attributes b/observability/monitoring/configuring-core-platform-monitoring/_attributes new file mode 120000 index 0000000000..20cc1dcb77 --- /dev/null +++ b/observability/monitoring/configuring-core-platform-monitoring/_attributes @@ -0,0 +1 @@ +../../_attributes/ \ No newline at end of file diff --git a/observability/monitoring/configuring-core-platform-monitoring/configuring-alerts-and-notifications.adoc b/observability/monitoring/configuring-core-platform-monitoring/configuring-alerts-and-notifications.adoc new file mode 100644 index 0000000000..048b092baa --- /dev/null +++ b/observability/monitoring/configuring-core-platform-monitoring/configuring-alerts-and-notifications.adoc @@ -0,0 +1,59 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +[id="configuring-alerts-and-notifications"] += Configuring alerts and notifications for core platform monitoring +:context: configuring-alerts-and-notifications + +toc::[] + +You can configure a local or external Alertmanager instance to route alerts from Prometheus to endpoint receivers. You can also attach custom labels to all time series and alerts to add useful metadata information. + +//Configuring external Alertmanager instances +include::modules/monitoring-configuring-external-alertmanagers.adoc[leveloffset=1,tags=**;CPM;!UWM] + +// Disabling the local Alertmanager +include::modules/monitoring-disabling-the-local-alertmanager.adoc[leveloffset=+2] + +[role="_additional-resources"] +.Additional resources + +* link:https://prometheus.io/docs/alerting/latest/alertmanager/[Alertmanager] (Prometheus documentation) +* xref:../../../observability/monitoring/managing-alerts/managing-alerts-as-an-administrator.adoc#managing-alerts-as-an-administrator[Managing alerts as an Administrator] + +//Configuring secrets for Alertmanager +include::modules/monitoring-configuring-secrets-for-alertmanager.adoc[leveloffset=1] + +include::modules/monitoring-adding-a-secret-to-the-alertmanager-configuration.adoc[leveloffset=2,tags=**;CPM;!UWM] + +//Attaching additional labels to your time series and alerts +include::modules/monitoring-attaching-additional-labels-to-your-time-series-and-alerts.adoc[leveloffset=+1,tags=**;CPM;!UWM] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/configuring-core-platform-monitoring/preparing-to-configure-the-monitoring-stack.adoc#preparing-to-configure-the-monitoring-stack[Preparing to configure core platform monitoring stack] + +[id="configuring-alert-notifications_{context}"] +== Configuring alert notifications + +In {product-title} {product-version}, you can view firing alerts in the Alerting UI. You can configure Alertmanager to send notifications about default platform alerts by configuring alert receivers. + +[IMPORTANT] +==== +Alertmanager does not send notifications by default. It is strongly recommended to configure Alertmanager to receive notifications by configuring alert receivers through the web console or through the `alertmanager-main` secret. +==== + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#sending-notifications-to-external-systems_key-concepts[Sending notifications to external systems] +* link:https://www.pagerduty.com/[PagerDuty] (PagerDuty official site) +* link:https://www.pagerduty.com/docs/guides/prometheus-integration-guide/[Prometheus Integration Guide] (PagerDuty official site) +* xref:../../../observability/monitoring/getting-started/maintenance-and-support-for-monitoring.adoc#support-version-matrix-for-monitoring-components_maintenance-and-support-for-monitoring[Support version matrix for monitoring components] +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-alert-routing-for-user-defined-projects_preparing-to-configure-the-monitoring-stack-uwm[Enabling alert routing for user-defined projects] + +include::modules/monitoring-configuring-alert-routing-default-platform-alerts.adoc[leveloffset=+2] + +include::modules/monitoring-configuring-alert-routing-console.adoc[leveloffset=+2] + +include::modules/monitoring-configuring-different-alert-receivers-for-default-platform-alerts-and-user-defined-alerts.adoc[leveloffset=+2] diff --git a/observability/monitoring/configuring-core-platform-monitoring/configuring-metrics.adoc b/observability/monitoring/configuring-core-platform-monitoring/configuring-metrics.adoc new file mode 100644 index 0000000000..7d46aa6de3 --- /dev/null +++ b/observability/monitoring/configuring-core-platform-monitoring/configuring-metrics.adoc @@ -0,0 +1,43 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +[id="configuring-metrics"] += Configuring metrics for core platform monitoring +:context: configuring-metrics + +toc::[] + +Configure the collection of metrics to monitor how cluster components and your own workloads are performing. + +You can send ingested metrics to remote systems for long-term storage and add cluster ID labels to the metrics to identify the data coming from different clusters. + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#understanding-metrics_key-concepts[Understanding metrics] + +// Configuring remote write storage +include::modules/monitoring-configuring-remote-write-storage.adoc[leveloffset=+1,tags=**;CPM;!UWM] + +include::modules/monitoring-supported-remote-write-authentication-settings.adoc[leveloffset=+2] + +include::modules/monitoring-example-remote-write-authentication-settings.adoc[leveloffset=+2,tags=**;CPM;!UWM] + +include::modules/monitoring-example-remote-write-queue-configuration.adoc[leveloffset=+2,tags=**;CPM;!UWM] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../rest_api/monitoring_apis/prometheus-monitoring-coreos-com-v1.adoc#spec-remotewrite-2[Prometheus REST API reference for remote write] +* link:https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage[Setting up remote write compatible endpoints] (Prometheus documentation) +* link:https://prometheus.io/docs/practices/remote_write/#remote-write-tuning[Tuning remote write settings] (Prometheus documentation) +* xref:../../../nodes/pods/nodes-pods-secrets.adoc#nodes-pods-secrets-about_nodes-pods-secrets[Understanding secrets] + +//Creating cluster ID labels for metrics for core platform monitoring +include::modules/monitoring-creating-cluster-id-labels-for-metrics.adoc[leveloffset=+1,tags=**;CPM;!UWM] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#adding-cluster-id-labels-to-metrics_key-concepts[Adding cluster ID labels to metrics] +* xref:../../../support/gathering-cluster-data.adoc#support-get-cluster-id_gathering-cluster-data[Obtaining your cluster ID] + diff --git a/observability/monitoring/configuring-core-platform-monitoring/configuring-performance-and-scalability.adoc b/observability/monitoring/configuring-core-platform-monitoring/configuring-performance-and-scalability.adoc new file mode 100644 index 0000000000..d4a40001bc --- /dev/null +++ b/observability/monitoring/configuring-core-platform-monitoring/configuring-performance-and-scalability.adoc @@ -0,0 +1,93 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +[id="configuring-performance-and-scalability"] += Configuring performance and scalability for core platform monitoring +:context: configuring-performance-and-scalability + +toc::[] + +You can configure the monitoring stack to optimize the performance and scale of your clusters. The following documentation provides information about how to distribute the monitoring components and control the impact of the monitoring stack on CPU and memory resources. + +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#about-performance-and-scalability_key-concepts[About performance and scalability] + +[id="controlling-placement-and-distribution-of-monitoing-components_{context}"] +== Controlling the placement and distribution of monitoring components + +You can move the monitoring stack components to specific nodes: + +* Use the `nodeSelector` constraint with labeled nodes to move any of the monitoring stack components to specific nodes. +* Assign tolerations to enable moving components to tainted nodes. + +By doing so, you control the placement and distribution of the monitoring components across a cluster. + +By controlling placement and distribution of monitoring components, you can optimize system resource use, improve performance, and separate workloads based on specific requirements or policies. + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#using-node-selectors-to-move-monitoring-components_key-concepts[Using node selectors to move monitoring components] + +include::modules/monitoring-moving-monitoring-components-to-different-nodes.adoc[leveloffset=+2,tags=**;CPM;!UWM] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/configuring-core-platform-monitoring/preparing-to-configure-the-monitoring-stack.adoc#preparing-to-configure-the-monitoring-stack[Preparing to configure core platform monitoring stack] +* xref:../../../nodes/nodes/nodes-nodes-working.adoc#nodes-nodes-working-updating_nodes-nodes-working[Understanding how to update labels on nodes] +* xref:../../../nodes/scheduling/nodes-scheduler-node-selectors.adoc#nodes-scheduler-node-selectors[Placing pods on specific nodes using node selectors] +* link:https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector[nodeSelector] (Kubernetes documentation) + +include::modules/monitoring-assigning-tolerations-to-monitoring-components.adoc[leveloffset=+2,tags=**;CPM;!UWM] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/configuring-core-platform-monitoring/preparing-to-configure-the-monitoring-stack.adoc#preparing-to-configure-the-monitoring-stack[Preparing to configure core platform monitoring stack] +* xref:../../../nodes/scheduling/nodes-scheduler-taints-tolerations.adoc#nodes-scheduler-taints-tolerations[Controlling pod placement using node taints] +* link:https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/[Taints and Tolerations] (Kubernetes documentation) + +// Setting the body size limit for metrics scraping +include::modules/monitoring-setting-the-body-size-limit-for-metrics-scraping.adoc[leveloffset=+1] + +[role="_additional-resources"] +.Additional resources + +* link:https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config[scrape_config configuration] (Prometheus documentation) + +[id="managing-cpu-and-memory-resources-for-monitoring-components_{context}"] +== Managing CPU and memory resources for monitoring components + +You can ensure that the containers that run monitoring components have enough CPU and memory resources by specifying values for resource limits and requests for those components. + +You can configure these limits and requests for core platform monitoring components in the `openshift-monitoring` namespace. + +include::modules/monitoring-specifying-limits-and-requests-for-monitoring-components.adoc[leveloffset=+2,tags=**;CPM;!UWM] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#about-specifying-limits-and-requests-for-monitoring-components_key-concepts[About specifying limits and requests] +* link:https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#requests-and-limits[Kubernetes requests and limits documentation] (Kubernetes documentation) + +// Choosing a metrics collection profile +include::modules/monitoring-choosing-a-metrics-collection-profile.adoc[leveloffset=+1] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#configuring-metrics-collection-profiles_key-concepts[About metrics collection profiles] +* xref:../../../observability/monitoring/accessing-metrics/accessing-metrics-as-an-administrator.adoc#viewing-a-list-of-available-metrics_accessing-metrics-as-an-administrator[Viewing a list of available metrics] +* xref:../../../nodes/clusters/nodes-cluster-enabling-features.adoc#nodes-cluster-enabling[Enabling features using feature gates] + +//Configuring pod topology spread constraints for core platform monitoring +include::modules/monitoring-configuring-pod-topology-spread-constraints.adoc[leveloffset=1,tags=**;CPM;!UWM] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#using-pod-topology-spread-constraints-for-monitoring_key-concepts[About pod topology spread constraints for monitoring] +* xref:../../../nodes/scheduling/nodes-scheduler-pod-topology-spread-constraints.adoc#nodes-scheduler-pod-topology-spread-constraints-about[Controlling pod placement by using pod topology spread constraints] +* link:https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/[Pod Topology Spread Constraints] (Kubernetes documentation) + + + diff --git a/observability/monitoring/configuring-core-platform-monitoring/images b/observability/monitoring/configuring-core-platform-monitoring/images new file mode 120000 index 0000000000..847b03ed05 --- /dev/null +++ b/observability/monitoring/configuring-core-platform-monitoring/images @@ -0,0 +1 @@ +../../images/ \ No newline at end of file diff --git a/observability/monitoring/configuring-core-platform-monitoring/modules b/observability/monitoring/configuring-core-platform-monitoring/modules new file mode 120000 index 0000000000..36719b9de7 --- /dev/null +++ b/observability/monitoring/configuring-core-platform-monitoring/modules @@ -0,0 +1 @@ +../../modules/ \ No newline at end of file diff --git a/observability/monitoring/configuring-core-platform-monitoring/preparing-to-configure-the-monitoring-stack.adoc b/observability/monitoring/configuring-core-platform-monitoring/preparing-to-configure-the-monitoring-stack.adoc new file mode 100644 index 0000000000..f88fe56da0 --- /dev/null +++ b/observability/monitoring/configuring-core-platform-monitoring/preparing-to-configure-the-monitoring-stack.adoc @@ -0,0 +1,39 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] + +[id="preparing-to-configure-the-monitoring-stack"] += Preparing to configure core platform monitoring stack +:context: preparing-to-configure-the-monitoring-stack + +toc::[] + +The {product-title} installation program provides only a low number of configuration options before installation. Configuring most {product-title} framework components, including the cluster monitoring stack, happens after the installation. + +This section explains which monitoring components can be configured and how to prepare for configuring the monitoring stack. + +[IMPORTANT] +==== +* Not all configuration parameters for the monitoring stack are exposed. +Only the parameters and fields listed in the xref:../../../observability/monitoring/config-map-reference-for-the-cluster-monitoring-operator.adoc#cluster-monitoring-operator-configuration-reference[Config map reference for the {cmo-full}] are supported for configuration. + +* The monitoring stack imposes additional resource requirements. Consult the computing resources recommendations in xref:../../../scalability_and_performance/recommended-performance-scale-practices/recommended-infrastructure-practices.adoc#scaling-cluster-monitoring-operator_recommended-infrastructure-practices[Scaling the {cmo-full}] and verify that you have sufficient resources. +==== + +// Configurable monitoring components +include::modules/monitoring-configurable-monitoring-components.adoc[leveloffset=+1,tags=**;CPM;!UWM] + +// Creating a cluster monitoring config map +include::modules/monitoring-creating-cluster-monitoring-configmap.adoc[leveloffset=+1] + +// Granting users permissions for core platform monitoring +include::modules/monitoring-granting-users-permissions-for-core-platform-monitoring.adoc[leveloffset=+1] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/accessing-metrics/accessing-monitoring-apis-by-using-the-cli.adoc#resources-reference-for-the-cluster-monitoring-operator_accessing-monitoring-apis-by-using-the-cli[Resources reference for the {cmo-full}] +* xref:../../../observability/monitoring/accessing-metrics/accessing-monitoring-apis-by-using-the-cli.adoc#cmo-services-resources_accessing-monitoring-apis-by-using-the-cli[CMO services resources] + +include::modules/monitoring-granting-user-permissions-using-the-web-console.adoc[leveloffset=+2] +include::modules/monitoring-granting-user-permissions-using-the-cli.adoc[leveloffset=+2] + diff --git a/observability/monitoring/configuring-core-platform-monitoring/snippets b/observability/monitoring/configuring-core-platform-monitoring/snippets new file mode 120000 index 0000000000..5a3f5add14 --- /dev/null +++ b/observability/monitoring/configuring-core-platform-monitoring/snippets @@ -0,0 +1 @@ +../../snippets/ \ No newline at end of file diff --git a/observability/monitoring/configuring-core-platform-monitoring/storing-and-recording-data.adoc b/observability/monitoring/configuring-core-platform-monitoring/storing-and-recording-data.adoc new file mode 100644 index 0000000000..cb70873c1d --- /dev/null +++ b/observability/monitoring/configuring-core-platform-monitoring/storing-and-recording-data.adoc @@ -0,0 +1,60 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +[id="storing-and-recording-data"] += Storing and recording data for core platform monitoring +:context: storing-and-recording-data + +toc::[] + +Store and record your metrics and alerting data, configure logs to specify which activities are recorded, control how long Prometheus retains stored data, and set the maximum amount of disk space for the data. These actions help you protect your data and use them for troubleshooting. + +// Configuring persistent storage +include::modules/monitoring-configuring-persistent-storage.adoc[leveloffset=+1] + +include::modules/monitoring-configuring-a-persistent-volume-claim.adoc[leveloffset=+2,tags=**;CPM;!UWM] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../storage/understanding-persistent-storage.adoc#understanding-persistent-storage[Understanding persistent storage] +* link:https://kubernetes.io/docs/concepts/storage/persistent-volumes/#persistentvolumeclaims[PersistentVolumeClaims] (Kubernetes documentation) + +include::modules/monitoring-resizing-a-persistent-volume.adoc[leveloffset=+2,tags=**;CPM;!UWM] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../scalability_and_performance/recommended-performance-scale-practices/recommended-infrastructure-practices.adoc#prometheus-database-storage-requirements_recommended-infrastructure-practices[Prometheus database storage requirements] +* xref:../../../storage/expanding-persistent-volumes.adoc#expanding-pvc-filesystem_expanding-persistent-volumes[Expanding persistent volume claims (PVCs) with a file system] + +// Modifying the retention time and size for Prometheus metrics data + +include::modules/monitoring-modifying-retention-time-and-size-for-prometheus-metrics-data.adoc[leveloffset=+1,tags=**;CPM;!UWM] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#retention-time-and-size-for-prometheus-metrics-data_key-concepts[Retention time and size for Prometheus metrics] +* xref:../../../observability/monitoring/configuring-core-platform-monitoring/preparing-to-configure-the-monitoring-stack.adoc#preparing-to-configure-the-monitoring-stack[Preparing to configure core platform monitoring stack] +* xref:../../../scalability_and_performance/recommended-performance-scale-practices/recommended-infrastructure-practices.adoc#prometheus-database-storage-requirements_cluster-monitoring-operator[Prometheus database storage requirements] +* xref:../../../scalability_and_performance/optimization/optimizing-storage.adoc#optimizing-storage[Recommended configurable storage technology] +* xref:../../../storage/understanding-persistent-storage.adoc#understanding-persistent-storage[Understanding persistent storage] +* xref:../../../scalability_and_performance/optimization/optimizing-storage.adoc#optimizing-storage[Optimizing storage] + +// Configuring audit logs for Metrics Server +include::modules/monitoring-configuring-audit-logs-for-metrics-server.adoc[leveloffset=+1] + +// Setting log levels for monitoring components +include::modules/monitoring-setting-log-levels-for-monitoring-components.adoc[leveloffset=+1,tags=**;CPM;!UWM] + +// Enabling the query log file for Prometheus +include::modules/monitoring-setting-query-log-file-for-prometheus.adoc[leveloffset=+1,tags=**;CPM;!UWM] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/configuring-core-platform-monitoring/preparing-to-configure-the-monitoring-stack.adoc#preparing-to-configure-the-monitoring-stack[Preparing to configure core platform monitoring stack] + +// Enabling query logging for Thanos Querier +include::modules/monitoring-enabling-query-logging-for-thanos-querier.adoc[leveloffset=+1] + diff --git a/observability/monitoring/configuring-the-monitoring-stack.adoc b/observability/monitoring/configuring-the-monitoring-stack.adoc index 392e099ab3..f3a72ed877 100644 --- a/observability/monitoring/configuring-the-monitoring-stack.adoc +++ b/observability/monitoring/configuring-the-monitoring-stack.adoc @@ -2,7 +2,7 @@ [id="configuring-the-monitoring-stack"] = Configuring the monitoring stack include::_attributes/common-attributes.adoc[] -:context: configuring-the-monitoring-stack +:context: configuring-the-monitoring-stack toc::[] @@ -82,8 +82,8 @@ include::modules/monitoring-granting-users-permissions-for-core-platform-monitor .Additional resources * xref:../../observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc#granting-user-permissions-using-the-web-console_enabling-monitoring-for-user-defined-projects[Granting user permissions by using the web console] * xref:../../observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc#granting-user-permissions-using-the-cli_enabling-monitoring-for-user-defined-projects[Granting user permissions by using the CLI] -* xref:../../observability/monitoring/accessing-third-party-monitoring-apis.adoc#resources-reference-for-the-cluster-monitoring-operator_accessing-monitoring-apis-by-using-the-cli[Resources reference for the {cmo-full}] -* xref:../../observability/monitoring/accessing-third-party-monitoring-apis.adoc#cmo-services-resources[CMO services resources] +* xref:../../observability/monitoring/accessing-third-party-monitoring-apis.adoc#resources-reference-for-the-cluster-monitoring-operator_accessing-third-party-monitoring-apis[Resources reference for the {cmo-full}] +* xref:../../observability/monitoring/accessing-third-party-monitoring-apis.adoc#cmo-services-resources_accessing-third-party-monitoring-apis[CMO services resources] endif::openshift-dedicated,openshift-rosa[] @@ -103,7 +103,8 @@ ifndef::openshift-dedicated,openshift-rosa[] endif::openshift-dedicated,openshift-rosa[] // Configurable monitoring components -include::modules/monitoring-configurable-monitoring-components.adoc[leveloffset=+1] +// The following module should only include monitoring for user-defined projects (UWM tags) +include::modules/monitoring-configurable-monitoring-components.adoc[leveloffset=+1,tags=**;!CPM;UWM] // Moving monitoring components to different nodes include::modules/monitoring-using-node-selectors-to-move-monitoring-components.adoc[leveloffset=+1] @@ -119,8 +120,8 @@ endif::openshift-dedicated,openshift-rosa[] * xref:../../observability/monitoring/configuring-the-monitoring-stack.adoc#using-pod-topology-spread-constraints-for-monitoring_configuring-the-monitoring-stack[Using pod topology spread constraints for monitoring] * link:https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector[Kubernetes documentation about node selectors] -include::modules/monitoring-moving-monitoring-components-to-different-nodes.adoc[leveloffset=+2] - +// The module should only include monitoring for user-defined projects +include::modules/monitoring-moving-monitoring-components-to-different-nodes.adoc[leveloffset=+2,tags=**;!CPM;UWM] [role="_additional-resources"] .Additional resources @@ -136,7 +137,8 @@ endif::openshift-dedicated,openshift-rosa[] * See the link:https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector[Kubernetes documentation] for details on the `nodeSelector` constraint // Assigning tolerations to monitoring components -include::modules/monitoring-assigning-tolerations-to-monitoring-components.adoc[leveloffset=+1] +// The following module should only include monitoring for user-defined projects (UWM tags) +include::modules/monitoring-assigning-tolerations-to-monitoring-components.adoc[leveloffset=+1,tags=**;!CPM;UWM] [role="_additional-resources"] .Additional resources @@ -168,14 +170,25 @@ You can ensure that the containers that run monitoring components have enough CP You can configure these limits and requests for core platform monitoring components in the `openshift-monitoring` namespace and for the components that monitor user-defined projects in the `openshift-user-workload-monitoring` namespace. include::modules/monitoring-about-specifying-limits-and-requests-for-monitoring-components.adoc[leveloffset=+2] -include::modules/monitoring-specifying-limits-and-requests-for-monitoring-components.adoc[leveloffset=+2] + +// The following module should only include monitoring for user-defined projects (UWM tags) +include::modules/monitoring-specifying-limits-and-requests-for-monitoring-components.adoc[leveloffset=+2,tags=**;!CPM;UWM] // Configuring persistent storage include::modules/monitoring-configuring-persistent-storage.adoc[leveloffset=+1] -include::modules/monitoring-configuring-a-persistent-volume-claim.adoc[leveloffset=+2] + +// Configuring a persistent volume claim +// The following module should only include monitoring for user-defined projects (UWM tags) +include::modules/monitoring-configuring-a-persistent-volume-claim.adoc[leveloffset=+2,tags=**;!CPM;UWM] + +[role="_additional-resources"] +.Additional resources +* link:https://kubernetes.io/docs/concepts/storage/persistent-volumes/#persistentvolumeclaims[PersistentVolumeClaims](Kubernetes documentation about how to specify `volumeClaimTemplate`) ifndef::openshift-dedicated,openshift-rosa[] -include::modules/monitoring-resizing-a-persistent-volume.adoc[leveloffset=+2] +// Resizing a persistent volume +// The following module should only include monitoring for user-defined projects (UWM tags) +include::modules/monitoring-resizing-a-persistent-volume.adoc[leveloffset=+2,tags=**;!CPM;UWM] [role="_additional-resources"] .Additional resources @@ -183,7 +196,14 @@ include::modules/monitoring-resizing-a-persistent-volume.adoc[leveloffset=+2] * xref:../../storage/expanding-persistent-volumes.adoc#expanding-pvc-filesystem_expanding-persistent-volumes[Expanding persistent volume claims (PVCs) with a file system] endif::openshift-dedicated,openshift-rosa[] -include::modules/monitoring-modifying-retention-time-and-size-for-prometheus-metrics-data.adoc[leveloffset=+2] +// The retention time and size for Prometheus metrics data +// This section will be moved in the future PR. Therefore, some of the repetition in the introduction for the following procedure modules does not matter for the time being +include::modules/monitoring-retention-time-and-size-for-prometheus-metrics-data.adoc[leveloffset=+2] + +// Modifying the retention time and size for Prometheus metrics data +// The following module should only include monitoring for user-defined projects (UWM tags) +include::modules/monitoring-modifying-retention-time-and-size-for-prometheus-metrics-data.adoc[leveloffset=+2,tags=**;!CPM;UWM] + include::modules/monitoring-modifying-the-retention-time-for-thanos-ruler-metrics-data.adoc[leveloffset=+2] [role="_additional-resources"] @@ -201,10 +221,20 @@ ifdef::openshift-dedicated,openshift-rosa[] endif::openshift-dedicated,openshift-rosa[] // Configuring remote write storage for Prometheus -include::modules/monitoring-configuring-remote-write-storage.adoc[leveloffset=+1] + +// Configuring remote write storage +// The following module should only include monitoring for user-defined projects (UWM tags) +include::modules/monitoring-configuring-remote-write-storage.adoc[leveloffset=+1,tags=**;!CPM;UWM] + include::modules/monitoring-supported-remote-write-authentication-settings.adoc[leveloffset=+2] -include::modules/monitoring-example-remote-write-authentication-settings.adoc[leveloffset=+2] -include::modules/monitoring-example-remote-write-queue-configuration.adoc[leveloffset=+2] + +// Example remote write authentication settings +// The following module should only include monitoring for user-defined projects (UWM tags) +include::modules/monitoring-example-remote-write-authentication-settings.adoc[leveloffset=+2,tags=**;!CPM;UWM] + +// Example remote write queue configuration +// The following module should only include monitoring for user-defined projects (UWM tags) +include::modules/monitoring-example-remote-write-queue-configuration.adoc[leveloffset=+2,tags=**;!CPM;UWM] [role="_additional-resources"] .Additional resources @@ -220,7 +250,10 @@ endif::openshift-dedicated,openshift-rosa[] // Configuring labels for outgoing metrics include::modules/monitoring-adding-cluster-id-labels-to-metrics.adoc[leveloffset=+1] -include::modules/monitoring-creating-cluster-id-labels-for-metrics.adoc[leveloffset=+2] + +// Creating cluster ID labels for metrics +// The following module should only include monitoring for user-defined projects (UWM tags) +include::modules/monitoring-creating-cluster-id-labels-for-metrics.adoc[leveloffset=+2,tags=**;!CPM;UWM] [role="_additional-resources"] .Additional resources @@ -248,8 +281,8 @@ include::modules/monitoring-choosing-a-metrics-collection-profile.adoc[leveloffs * See xref:../../nodes/clusters/nodes-cluster-enabling-features.adoc[Enabling features using feature gates] for steps to enable Technology Preview features. endif::openshift-dedicated,openshift-rosa[] -// Managing scrape and evaluation intervals and enforced limits for user-defined projects -include::modules/monitoring-limiting-scrape-samples-in-user-defined-projects.adoc[leveloffset=+1] +// Controlling the impact of unbound metrics attributes in user-defined projects +include::modules/monitoring-controlling-the-impact-of-unbound-attributes-in-user-defined-projects.adoc[leveloffset=+1] include::modules/monitoring-setting-scrape-and-evaluation-intervals-limits-for-user-defined-projects.adoc[leveloffset=+2] ifndef::openshift-dedicated,openshift-rosa[] include::modules/monitoring-creating-scrape-sample-alerts.adoc[leveloffset=+2] @@ -262,15 +295,20 @@ include::modules/monitoring-creating-scrape-sample-alerts.adoc[leveloffset=+2] * See xref:../../observability/monitoring/troubleshooting-monitoring-issues.adoc#determining-why-prometheus-is-consuming-disk-space_troubleshooting-monitoring-issues[Determining why Prometheus is consuming a lot of disk space] for steps to query which metrics have the highest number of scrape samples. endif::openshift-dedicated,openshift-rosa[] -//Configuring external alertmanagers -include::modules/monitoring-configuring-external-alertmanagers.adoc[leveloffset=1] +//Configuring external Alertmanager instances +// The following module should only include monitoring for user-defined projects (UWM tags) +include::modules/monitoring-configuring-external-alertmanagers.adoc[leveloffset=1,tags=**;!CPM;UWM] //Configuring secrets for Alertmanager include::modules/monitoring-configuring-secrets-for-alertmanager.adoc[leveloffset=1] -include::modules/monitoring-adding-a-secret-to-the-alertmanager-configuration.adoc[leveloffset=2] + +// Adding a secret to the Alertmanager configuration +// The following module should only include monitoring for user-defined projects (UWM tags) +include::modules/monitoring-adding-a-secret-to-the-alertmanager-configuration.adoc[leveloffset=2,tags=**;!CPM;UWM] //Attaching additional labels to your time series and alerts -include::modules/monitoring-attaching-additional-labels-to-your-time-series-and-alerts.adoc[leveloffset=+1] +// The following module should only include monitoring for user-defined projects (UWM tags) +include::modules/monitoring-attaching-additional-labels-to-your-time-series-and-alerts.adoc[leveloffset=+1,tags=**;!CPM;UWM] ifndef::openshift-dedicated,openshift-rosa[] [role="_additional-resources"] @@ -293,13 +331,16 @@ endif::openshift-dedicated,openshift-rosa[] * link:https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/[Kubernetes Pod Topology Spread Constraints documentation] // Configuring pod topology spread constraints -include::modules/monitoring-configuring-pod-topology-spread-constraints.adoc[leveloffset=2] +// The following module should only include monitoring for user-defined projects (UWM tags) +include::modules/monitoring-configuring-pod-topology-spread-constraints.adoc[leveloffset=2,tags=**;!CPM;UWM] // Setting log levels for monitoring components -include::modules/monitoring-setting-log-levels-for-monitoring-components.adoc[leveloffset=+1] +// The following module should only include monitoring for user-defined projects (UWM tags) +include::modules/monitoring-setting-log-levels-for-monitoring-components.adoc[leveloffset=+1,tags=**;!CPM;UWM] -// Setting query log for Prometheus -include::modules/monitoring-setting-query-log-file-for-prometheus.adoc[leveloffset=+1] +// Enabling the query log file for Prometheus +// The following module should only include monitoring for user-defined projects (UWM tags) +include::modules/monitoring-setting-query-log-file-for-prometheus.adoc[leveloffset=+1,tags=**;!CPM;UWM] ifndef::openshift-dedicated,openshift-rosa[] [role="_additional-resources"] @@ -315,14 +356,7 @@ include::modules/monitoring-enabling-query-logging-for-thanos-querier.adoc[level [role="_additional-resources"] .Additional resources -* See xref:../../observability/monitoring/configuring-the-monitoring-stack.adoc#preparing-to-configure-the-monitoring-stack[Preparing to configure the monitoring stack] for steps to create monitoring config maps. -endif::openshift-dedicated,openshift-rosa[] - -[role="_additional-resources"] -.Additional resources - -ifndef::openshift-dedicated,openshift-rosa[] -* See xref:../../observability/monitoring/configuring-the-monitoring-stack.adoc#preparing-to-configure-the-monitoring-stack[Preparing to configure the monitoring stack] for steps to create monitoring config maps. +* xref:../../observability/monitoring/configuring-the-monitoring-stack.adoc#preparing-to-configure-the-monitoring-stack[Preparing to configure the monitoring stack] endif::openshift-dedicated,openshift-rosa[] // Disabling the local Alertmanager diff --git a/observability/monitoring/configuring-user-workload-monitoring/_attributes b/observability/monitoring/configuring-user-workload-monitoring/_attributes new file mode 120000 index 0000000000..20cc1dcb77 --- /dev/null +++ b/observability/monitoring/configuring-user-workload-monitoring/_attributes @@ -0,0 +1 @@ +../../_attributes/ \ No newline at end of file diff --git a/observability/monitoring/configuring-user-workload-monitoring/configuring-alerts-and-notifications-uwm.adoc b/observability/monitoring/configuring-user-workload-monitoring/configuring-alerts-and-notifications-uwm.adoc new file mode 100644 index 0000000000..0d00011ae1 --- /dev/null +++ b/observability/monitoring/configuring-user-workload-monitoring/configuring-alerts-and-notifications-uwm.adoc @@ -0,0 +1,60 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +[id="configuring-alerts-and-notifications-uwm"] += Configuring alerts and notifications for user workload monitoring +:context: configuring-alerts-and-notifications-uwm + +toc::[] + +You can configure a local or external Alertmanager instance to route alerts from Prometheus to endpoint receivers. You can also attach custom labels to all time series and alerts to add useful metadata information. + +//Configuring external Alertmanager instances +include::modules/monitoring-configuring-external-alertmanagers.adoc[leveloffset=1,tags=**;!CPM;UWM] + +//Configuring secrets for Alertmanager +include::modules/monitoring-configuring-secrets-for-alertmanager.adoc[leveloffset=1] + +include::modules/monitoring-adding-a-secret-to-the-alertmanager-configuration.adoc[leveloffset=2,tags=**;!CPM;UWM] + +//Attaching additional labels to your time series and alerts +include::modules/monitoring-attaching-additional-labels-to-your-time-series-and-alerts.adoc[leveloffset=+1,tags=**;!CPM;UWM] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-monitoring-for-user-defined-projects-uwm_preparing-to-configure-the-monitoring-stack-uwm[Enabling monitoring for user-defined projects] + +[id="configuring-alert-notifications_{context}"] +== Configuring alert notifications + +In {product-title}, an administrator can enable alert routing for user-defined projects with one of the following methods: + +* Use the default platform Alertmanager instance. +* Use a separate Alertmanager instance only for user-defined projects. + +Developers and other users with the `alert-routing-edit` cluster role can configure custom alert notifications for their user-defined projects by configuring alert receivers. + +[NOTE] +==== +Review the following limitations of alert routing for user-defined projects: + +* User-defined alert routing is scoped to the namespace in which the resource is defined. For example, a routing configuration in namespace `ns1` only applies to `PrometheusRules` resources in the same namespace. + +* When a namespace is excluded from user-defined monitoring, `AlertmanagerConfig` resources in the namespace cease to be part of the Alertmanager configuration. +==== + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#understanding-alert-routing-for-user-defined-projects_key-concepts[Understanding alert routing for user-defined projects] +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#sending-notifications-to-external-systems_key-concepts[Sending notifications to external systems] +* link:https://www.pagerduty.com/[PagerDuty] (PagerDuty official site) +* link:https://www.pagerduty.com/docs/guides/prometheus-integration-guide/[Prometheus Integration Guide] (PagerDuty official site) +* xref:../../../observability/monitoring/getting-started/maintenance-and-support-for-monitoring.adoc#support-version-matrix-for-monitoring-components_maintenance-and-support-for-monitoring[Support version matrix for monitoring components] +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-alert-routing-for-user-defined-projects_preparing-to-configure-the-monitoring-stack-uwm[Enabling alert routing for user-defined projects] + +include::modules/monitoring-configuring-alert-routing-for-user-defined-projects.adoc[leveloffset=+2] + +include::modules/monitoring-configuring-alert-routing-user-defined-alerts-secret.adoc[leveloffset=+2] + +include::modules/monitoring-configuring-different-alert-receivers-for-default-platform-alerts-and-user-defined-alerts.adoc[leveloffset=+2] diff --git a/observability/monitoring/configuring-user-workload-monitoring/configuring-metrics-uwm.adoc b/observability/monitoring/configuring-user-workload-monitoring/configuring-metrics-uwm.adoc new file mode 100644 index 0000000000..9037e8e5ad --- /dev/null +++ b/observability/monitoring/configuring-user-workload-monitoring/configuring-metrics-uwm.adoc @@ -0,0 +1,60 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +[id="configuring-metrics-uwm"] += Configuring metrics for user workload monitoring +:context: configuring-metrics-uwm + +toc::[] + +Configure the collection of metrics to monitor how cluster components and your own workloads are performing. + +You can send ingested metrics to remote systems for long-term storage and add cluster ID labels to the metrics to identify the data coming from different clusters. + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#understanding-metrics_key-concepts[Understanding metrics] + +// Configuring remote write storage +include::modules/monitoring-configuring-remote-write-storage.adoc[leveloffset=+1,tags=**;!CPM;UWM] + +include::modules/monitoring-supported-remote-write-authentication-settings.adoc[leveloffset=+2] + +include::modules/monitoring-example-remote-write-authentication-settings.adoc[leveloffset=+2,tags=**;!CPM;UWM] + +include::modules/monitoring-example-remote-write-queue-configuration.adoc[leveloffset=+2,tags=**;!CPM;UWM] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../rest_api/monitoring_apis/prometheus-monitoring-coreos-com-v1.adoc#spec-remotewrite-2[Prometheus REST API reference for remote write] +* link:https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage[Setting up remote write compatible endpoints] (Prometheus documentation) +* link:https://prometheus.io/docs/practices/remote_write/#remote-write-tuning[Tuning remote write settings] (Prometheus documentation) +* xref:../../../nodes/pods/nodes-pods-secrets.adoc#nodes-pods-secrets-about_nodes-pods-secrets[Understanding secrets] + +// Creating cluster ID labels for metrics for monitoring of user-defined projects +include::modules/monitoring-creating-cluster-id-labels-for-metrics.adoc[leveloffset=+1,tags=**;!CPM;UWM] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#adding-cluster-id-labels-to-metrics_key-concepts[Adding cluster ID labels to metrics] +* xref:../../../support/gathering-cluster-data.adoc#support-get-cluster-id_gathering-cluster-data[Obtaining your cluster ID] + +// Setting up metrics collection for user-defined projects + +include::modules/monitoring-setting-up-metrics-collection-for-user-defined-projects.adoc[leveloffset=+1] + +include::modules/monitoring-deploying-a-sample-service.adoc[leveloffset=+2] + +include::modules/monitoring-specifying-how-a-service-is-monitored.adoc[leveloffset=+2] + +include::modules/monitoring-example-service-endpoint-authentication-settings.adoc[leveloffset=+2] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-monitoring-for-user-defined-projects-uwm_preparing-to-configure-the-monitoring-stack-uwm[Enabling monitoring for user-defined projects] +* link:https://access.redhat.com/articles/6675491[Scrape Prometheus metrics using TLS in ServiceMonitor configuration] (Red{nbsp}Hat Customer Portal article) +* xref:../../../rest_api/monitoring_apis/podmonitor-monitoring-coreos-com-v1.adoc#podmonitor-monitoring-coreos-com-v1[PodMonitor API] +* xref:../../../rest_api/monitoring_apis/servicemonitor-monitoring-coreos-com-v1.adoc#servicemonitor-monitoring-coreos-com-v1[ServiceMonitor API] diff --git a/observability/monitoring/configuring-user-workload-monitoring/configuring-performance-and-scalability-uwm.adoc b/observability/monitoring/configuring-user-workload-monitoring/configuring-performance-and-scalability-uwm.adoc new file mode 100644 index 0000000000..d0c88d5e1c --- /dev/null +++ b/observability/monitoring/configuring-user-workload-monitoring/configuring-performance-and-scalability-uwm.adoc @@ -0,0 +1,98 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +[id="configuring-performance-and-scalability-uwm"] += Configuring performance and scalability for user workload monitoring +:context: configuring-performance-and-scalability-uwm + +toc::[] + +You can configure the monitoring stack to optimize the performance and scale of your clusters. The following documentation provides information about how to distribute the monitoring components and control the impact of the monitoring stack on CPU and memory resources. + +[id="controlling-placement-and-distribution-of-monitoing-components_{context}"] +== Controlling the placement and distribution of monitoring components + +You can move the monitoring stack components to specific nodes: + +* Use the `nodeSelector` constraint with labeled nodes to move any of the monitoring stack components to specific nodes. +* Assign tolerations to enable moving components to tainted nodes. + +By doing so, you control the placement and distribution of the monitoring components across a cluster. + +By controlling placement and distribution of monitoring components, you can optimize system resource use, improve performance, and separate workloads based on specific requirements or policies. + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#using-node-selectors-to-move-monitoring-components_key-concepts[Using node selectors to move monitoring components] + +include::modules/monitoring-moving-monitoring-components-to-different-nodes.adoc[leveloffset=+2,tags=**;!CPM;UWM] + +[role="_additional-resources"] +.Additional resources +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-monitoring-for-user-defined-projects-uwm_preparing-to-configure-the-monitoring-stack-uwm[Enabling monitoring for user-defined projects] +* xref:../../../nodes/nodes/nodes-nodes-working.adoc#nodes-nodes-working-updating_nodes-nodes-working[Understanding how to update labels on nodes] +* xref:../../../nodes/scheduling/nodes-scheduler-node-selectors.adoc#nodes-scheduler-node-selectors[Placing pods on specific nodes using node selectors] +* link:https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector[nodeSelector] (Kubernetes documentation) + +include::modules/monitoring-assigning-tolerations-to-monitoring-components.adoc[leveloffset=+2,tags=**;!CPM;UWM] + +[role="_additional-resources"] +.Additional resources +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-monitoring-for-user-defined-projects-uwm_preparing-to-configure-the-monitoring-stack-uwm[Enabling monitoring for user-defined projects] +* xref:../../../nodes/scheduling/nodes-scheduler-taints-tolerations.adoc#nodes-scheduler-taints-tolerations[Controlling pod placement using node taints] +* link:https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/[Taints and Tolerations] (Kubernetes documentation) + +[id="managing-cpu-and-memory-resources-for-monitoring-components_{context}"] +== Managing CPU and memory resources for monitoring components + +You can ensure that the containers that run monitoring components have enough CPU and memory resources by specifying values for resource limits and requests for those components. + +You can configure these limits and requests for monitoring components that monitor user-defined projects in the `openshift-user-workload-monitoring` namespace. + +include::modules/monitoring-specifying-limits-and-requests-for-monitoring-components.adoc[leveloffset=+2,tags=**;!CPM;UWM] + +[role="_additional-resources"] +.Additional resources +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#about-specifying-limits-and-requests-for-monitoring-components_key-concepts[About specifying limits and requests for monitoring components] +* link:https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#requests-and-limits[Kubernetes requests and limits documentation] (Kubernetes documentation) + +[id="controlling-the-impact-of-unbound-attributes-in-user-defined-projects_{context}"] +== Controlling the impact of unbound metrics attributes in user-defined projects + +ifndef::openshift-dedicated,openshift-rosa[] +Cluster administrators +endif::openshift-dedicated,openshift-rosa[] +ifdef::openshift-dedicated,openshift-rosa[] +A `dedicated-admin` +endif::openshift-dedicated,openshift-rosa[] +can use the following measures to control the impact of unbound metrics attributes in user-defined projects: + +* Limit the number of samples that can be accepted per target scrape in user-defined projects +* Limit the number of scraped labels, the length of label names, and the length of label values +* Configure the intervals between consecutive scrapes and between Prometheus rule evaluations +* Create alerts that fire when a scrape sample threshold is reached or when the target cannot be scraped + +[NOTE] +==== +Limiting scrape samples can help prevent the issues caused by adding many unbound attributes to labels. Developers can also prevent the underlying cause by limiting the number of unbound attributes that they define for metrics. Using attributes that are bound to a limited set of possible values reduces the number of potential key-value pair combinations. +==== + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#controlling-the-impact-of-unbound-attributes-in-user-defined-projects_key-concepts[Controlling the impact of unbound metrics attributes in user-defined projects] +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-monitoring-for-user-defined-projects-uwm_preparing-to-configure-the-monitoring-stack-uwm[Enabling monitoring for user-defined projects] +* xref:../../../observability/monitoring/troubleshooting-monitoring-issues.adoc#determining-why-prometheus-is-consuming-disk-space_troubleshooting-monitoring-issues[Determining why Prometheus is consuming a lot of disk space] + +include::modules/monitoring-setting-scrape-and-evaluation-intervals-limits-for-user-defined-projects.adoc[leveloffset=+2] +include::modules/monitoring-creating-scrape-sample-alerts.adoc[leveloffset=+2] + +//Configuring pod topology spread constraints for monitoring of user-defined projects +include::modules/monitoring-configuring-pod-topology-spread-constraints.adoc[leveloffset=1,tags=**;!CPM;UWM] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#using-pod-topology-spread-constraints-for-monitoring_key-concepts[About pod topology spread constraints for monitoring] +* xref:../../../nodes/scheduling/nodes-scheduler-pod-topology-spread-constraints.adoc#nodes-scheduler-pod-topology-spread-constraints-about[Controlling pod placement by using pod topology spread constraints] +* link:https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/[Pod Topology Spread Constraints] (Kubernetes documentation) \ No newline at end of file diff --git a/observability/monitoring/configuring-user-workload-monitoring/images b/observability/monitoring/configuring-user-workload-monitoring/images new file mode 120000 index 0000000000..847b03ed05 --- /dev/null +++ b/observability/monitoring/configuring-user-workload-monitoring/images @@ -0,0 +1 @@ +../../images/ \ No newline at end of file diff --git a/observability/monitoring/configuring-user-workload-monitoring/modules b/observability/monitoring/configuring-user-workload-monitoring/modules new file mode 120000 index 0000000000..36719b9de7 --- /dev/null +++ b/observability/monitoring/configuring-user-workload-monitoring/modules @@ -0,0 +1 @@ +../../modules/ \ No newline at end of file diff --git a/observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc b/observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc new file mode 100644 index 0000000000..52ec4036ad --- /dev/null +++ b/observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc @@ -0,0 +1,76 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +[id="preparing-to-configure-the-monitoring-stack-uwm"] += Preparing to configure the user workload monitoring stack +:context: preparing-to-configure-the-monitoring-stack-uwm + +toc::[] + +This section explains which user-defined monitoring components can be configured, how to enable user workload monitoring, and how to prepare for configuring the user workload monitoring stack. + +[IMPORTANT] +==== +* Not all configuration parameters for the monitoring stack are exposed. +Only the parameters and fields listed in the xref:../../../observability/monitoring/config-map-reference-for-the-cluster-monitoring-operator.adoc#cluster-monitoring-operator-configuration-reference[Config map reference for the {cmo-full}] are supported for configuration. + +* The monitoring stack imposes additional resource requirements. Consult the computing resources recommendations in xref:../../../scalability_and_performance/recommended-performance-scale-practices/recommended-infrastructure-practices.adoc#scaling-cluster-monitoring-operator_recommended-infrastructure-practices[Scaling the {cmo-full}] and verify that you have sufficient resources. +==== + +// Configurable monitoring components +include::modules/monitoring-configurable-monitoring-components.adoc[leveloffset=+1,tags=**;!CPM;UWM] + +// Enabling monitoring for user-defined projects +[id="enabling-monitoring-for-user-defined-projects-uwm_{context}"] +== Enabling monitoring for user-defined projects + +In {product-title}, you can enable monitoring for user-defined projects in addition to the default platform monitoring. You can monitor your own projects in {product-title} without the need for an additional monitoring solution. Using this feature centralizes monitoring for core platform components and user-defined projects. + +include::snippets/monitoring-custom-prometheus-note.adoc[] + +include::modules/monitoring-enabling-monitoring-for-user-defined-projects.adoc[leveloffset=+2] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/getting-started/user-workload-monitoring-first-steps.adoc#user-workload-monitoring-first-steps[User workload monitoring first steps] + +include::modules/monitoring-granting-users-permission-to-configure-monitoring-for-user-defined-projects.adoc[leveloffset=+2] + +// Enabling alert routing for user-defined projects +include::modules/monitoring-enabling-alert-routing-for-user-defined-projects.adoc[leveloffset=+1] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#understanding-alert-routing-for-user-defined-projects_key-concepts[Understanding alert routing for user-defined projects] + +// Enabling the platform Alertmanager instance for user-defined alert routing +ifndef::openshift-dedicated,openshift-rosa[] +include::modules/monitoring-enabling-the-platform-alertmanager-instance-for-user-defined-alert-routing.adoc[leveloffset=+2] +endif::openshift-dedicated,openshift-rosa[] + +include::modules/monitoring-enabling-a-separate-alertmanager-instance-for-user-defined-alert-routing.adoc[leveloffset=+2] +include::modules/monitoring-granting-users-permission-to-configure-alert-routing-for-user-defined-projects.adoc[leveloffset=+2] + +[role="_additional-resources"] +.Additional resources + +xref:../../../observability/monitoring/configuring-user-workload-monitoring/configuring-alerts-and-notifications-uwm.adoc#configuring-alert-notifications_configuring-alerts-and-notifications-uwm[Configuring alert notifications] + +// Granting users permissions for monitoring for user-defined projects +include::modules/monitoring-granting-users-permission-to-monitor-user-defined-projects.adoc[leveloffset=+1] + +[role="_additional-resources"] +.Additional resources +* xref:../../../observability/monitoring/accessing-metrics/accessing-monitoring-apis-by-using-the-cli.adoc#cmo-services-resources_accessing-monitoring-apis-by-using-the-cli[CMO services resources] +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#granting-users-permission-to-configure-monitoring-for-user-defined-projects_preparing-to-configure-the-monitoring-stack-uwm[Granting users permission to configure monitoring for user-defined projects] +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#granting-users-permission-to-configure-alert-routing-for-user-defined-projects_preparing-to-configure-the-monitoring-stack-uwm[Granting users permission to configure alert routing for user-defined projects] + +include::modules/monitoring-granting-user-permissions-using-the-web-console.adoc[leveloffset=+2] +include::modules/monitoring-granting-user-permissions-using-the-cli.adoc[leveloffset=+2] + +// Excluding a user-defined project from monitoring +include::modules/monitoring-excluding-a-user-defined-project-from-monitoring.adoc[leveloffset=+1] + +// Disabling monitoring for user-defined projects +include::modules/monitoring-disabling-monitoring-for-user-defined-projects.adoc[leveloffset=+1] \ No newline at end of file diff --git a/observability/monitoring/configuring-user-workload-monitoring/snippets b/observability/monitoring/configuring-user-workload-monitoring/snippets new file mode 120000 index 0000000000..5a3f5add14 --- /dev/null +++ b/observability/monitoring/configuring-user-workload-monitoring/snippets @@ -0,0 +1 @@ +../../snippets/ \ No newline at end of file diff --git a/observability/monitoring/configuring-user-workload-monitoring/storing-and-recording-data-uwm.adoc b/observability/monitoring/configuring-user-workload-monitoring/storing-and-recording-data-uwm.adoc new file mode 100644 index 0000000000..ee27889c56 --- /dev/null +++ b/observability/monitoring/configuring-user-workload-monitoring/storing-and-recording-data-uwm.adoc @@ -0,0 +1,58 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +[id="storing-and-recording-data-uwm"] += Storing and recording data for user workload monitoring +:context: storing-and-recording-data-uwm + +toc::[] + +Store and record your metrics and alerting data, configure logs to specify which activities are recorded, control how long Prometheus retains stored data, and set the maximum amount of disk space for the data. These actions help you protect your data and use them for troubleshooting. + +// Configuring persistent storage +include::modules/monitoring-configuring-persistent-storage.adoc[leveloffset=+1] + +include::modules/monitoring-configuring-a-persistent-volume-claim.adoc[leveloffset=+2,tags=**;!CPM;UWM] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../storage/understanding-persistent-storage.adoc#understanding-persistent-storage[Understanding persistent storage] +* link:https://kubernetes.io/docs/concepts/storage/persistent-volumes/#persistentvolumeclaims[PersistentVolumeClaims] (Kubernetes documentation) + +include::modules/monitoring-resizing-a-persistent-volume.adoc[leveloffset=+2,tags=**;!CPM;UWM] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../scalability_and_performance/recommended-performance-scale-practices/recommended-infrastructure-practices.adoc#prometheus-database-storage-requirements_recommended-infrastructure-practices[Prometheus database storage requirements] +* xref:../../../storage/expanding-persistent-volumes.adoc#expanding-pvc-filesystem_expanding-persistent-volumes[Expanding persistent volume claims (PVCs) with a file system] + +// Modifying the retention time and size + +include::modules/monitoring-modifying-retention-time-and-size-for-prometheus-metrics-data.adoc[leveloffset=+1,tags=**;!CPM;UWM] + +include::modules/monitoring-modifying-the-retention-time-for-thanos-ruler-metrics-data.adoc[leveloffset=+2] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#retention-time-and-size-for-prometheus-metrics-data_key-concepts[Retention time and size for Prometheus metrics] +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-monitoring-for-user-defined-projects-uwm_preparing-to-configure-the-monitoring-stack-uwm[Enabling monitoring for user-defined projects] +* xref:../../../scalability_and_performance/recommended-performance-scale-practices/recommended-infrastructure-practices.adoc#prometheus-database-storage-requirements_cluster-monitoring-operator[Prometheus database storage requirements] +* xref:../../../scalability_and_performance/optimization/optimizing-storage.adoc#optimizing-storage[Recommended configurable storage technology] +* xref:../../../storage/understanding-persistent-storage.adoc#understanding-persistent-storage[Understanding persistent storage] +* xref:../../../scalability_and_performance/optimization/optimizing-storage.adoc#optimizing-storage[Optimizing storage] + +// Setting log levels for monitoring components +include::modules/monitoring-setting-log-levels-for-monitoring-components.adoc[leveloffset=+1,tags=**;!CPM;UWM] + +// Enabling the query log file for Prometheus +include::modules/monitoring-setting-query-log-file-for-prometheus.adoc[leveloffset=+1,tags=**;!CPM;UWM] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-monitoring-for-user-defined-projects-uwm_preparing-to-configure-the-monitoring-stack-uwm[Enabling monitoring for user-defined projects] + + + diff --git a/observability/monitoring/enabling-alert-routing-for-user-defined-projects.adoc b/observability/monitoring/enabling-alert-routing-for-user-defined-projects.adoc index c03b06ffaf..83e607e645 100644 --- a/observability/monitoring/enabling-alert-routing-for-user-defined-projects.adoc +++ b/observability/monitoring/enabling-alert-routing-for-user-defined-projects.adoc @@ -6,14 +6,8 @@ include::_attributes/common-attributes.adoc[] toc::[] -[role="_abstract"] -ifndef::openshift-dedicated,openshift-rosa[] -In {product-title} {product-version}, a cluster administrator can enable alert routing for user-defined projects. -endif::openshift-dedicated,openshift-rosa[] -ifdef::openshift-dedicated,openshift-rosa[] -In {product-title}, a `dedicated-admin` can enable alert routing for user-defined projects. -endif::openshift-dedicated,openshift-rosa[] -This process consists of two general steps: +In {product-title}, an administrator can enable alert routing for user-defined projects. +This process consists of the following steps: ifndef::openshift-dedicated,openshift-rosa[] * Enable alert routing for user-defined projects to use the default platform Alertmanager instance or, optionally, a separate Alertmanager instance only for user-defined projects. @@ -45,4 +39,4 @@ include::modules/monitoring-granting-users-permission-to-configure-alert-routing ifndef::openshift-dedicated,openshift-rosa[] * xref:../../observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc#enabling-monitoring-for-user-defined-projects[Enabling monitoring for user defined projects] endif::openshift-dedicated,openshift-rosa[] -* xref:../../observability/monitoring/managing-alerts.adoc#creating-alert-routing-for-user-defined-projects_managing-alerts[Creating alert routing for user-defined projects] +* xref:../../observability/monitoring/managing-alerts.adoc#configuring-alert-routing-for-user-defined-projects_managing-alerts[Configuring alert routing for user-defined projects] diff --git a/observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc b/observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc index 64788eeaea..8101c759ea 100644 --- a/observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc +++ b/observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc @@ -1,11 +1,15 @@ :_mod-docs-content-type: ASSEMBLY [id="enabling-monitoring-for-user-defined-projects"] -= Enabling monitoring for user-defined projects += Enabling the user workload monitoring include::_attributes/common-attributes.adoc[] :context: enabling-monitoring-for-user-defined-projects toc::[] +// Preparing the following short assembly introduction into a module, because this assembly will be deleted and divided into just modules +// Introduction enabling monitoring for user-defined projects +//include::modules/monitoring-intro-enabling-monitoring-for-user-defined-projects.adoc[leveloffset=+1] + In {product-title}, you can enable monitoring for user-defined projects in addition to the default platform monitoring. You can monitor your own projects in {product-title} without the need for an additional monitoring solution. Using this feature centralizes monitoring for core platform components and user-defined projects. include::snippets/monitoring-custom-prometheus-note.adoc[] @@ -25,7 +29,7 @@ include::modules/monitoring-granting-users-permission-to-monitor-user-defined-pr [role="_additional-resources"] .Additional resources -* xref:../../observability/monitoring/accessing-third-party-monitoring-apis.adoc#cmo-services-resources[CMO services resources] +* xref:../../observability/monitoring/accessing-third-party-monitoring-apis.adoc#cmo-services-resources_accessing-third-party-monitoring-apis[CMO services resources] include::modules/monitoring-granting-user-permissions-using-the-web-console.adoc[leveloffset=+2] include::modules/monitoring-granting-user-permissions-using-the-cli.adoc[leveloffset=+2] @@ -33,14 +37,6 @@ include::modules/monitoring-granting-user-permissions-using-the-cli.adoc[levelof // Granting users permission to configure monitoring for user-defined projects include::modules/monitoring-granting-users-permission-to-configure-monitoring-for-user-defined-projects.adoc[leveloffset=+1] -// Accessing metrics from outside the cluster for custom applications -include::modules/accessing-metrics-outside-cluster.adoc[leveloffset=+1] - -[role="_additional-resources"] -.Additional resources - -* xref:../../observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc#enabling-monitoring-for-user-defined-projects_enabling-monitoring-for-user-defined-projects[Enabling monitoring for user-defined projects] - // Excluding a user-defined project from monitoring include::modules/monitoring-excluding-a-user-defined-project-from-monitoring.adoc[leveloffset=+1] diff --git a/observability/monitoring/getting-started/_attributes b/observability/monitoring/getting-started/_attributes new file mode 120000 index 0000000000..20cc1dcb77 --- /dev/null +++ b/observability/monitoring/getting-started/_attributes @@ -0,0 +1 @@ +../../_attributes/ \ No newline at end of file diff --git a/observability/monitoring/getting-started/core-platform-monitoring-first-steps.adoc b/observability/monitoring/getting-started/core-platform-monitoring-first-steps.adoc new file mode 100644 index 0000000000..2cb5d6128a --- /dev/null +++ b/observability/monitoring/getting-started/core-platform-monitoring-first-steps.adoc @@ -0,0 +1,58 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +[id="core-platform-monitoring-first-steps"] += Core platform monitoring first steps +:context: core-platform-monitoring-first-steps + +toc::[] + +After {product-title} is installed, core platform monitoring components immediately begin collecting metrics, which you can query and view. +The default in-cluster monitoring stack includes the core platform Prometheus instance that collects metrics from your cluster and the core Alertmanager instance that routes alerts, among other components. +Depending on who will use the monitoring stack and for what purposes, as a cluster administrator, you can further configure these monitoring components to suit the needs of different users in various scenarios. + +[id="configuring-core-platform-monitoring-postinstallation-steps_{context}"] +== Configuring core platform monitoring: Postinstallation steps + +After {product-title} is installed, cluster administrators typically configure core platform monitoring to suit their needs. +These activities include setting up storage and configuring options for Prometheus, Alertmanager, and other monitoring components. + +[NOTE] +==== +By default, in a newly installed {product-title} system, users can query and view collected metrics. +You need only configure an alert receiver if you want users to receive alert notifications. +Any other configuration options listed here are optional. +==== + +* xref:../../../observability/monitoring/configuring-core-platform-monitoring/preparing-to-configure-the-monitoring-stack.adoc#creating-cluster-monitoring-configmap_preparing-to-configure-the-monitoring-stack[Create the `cluster-monitoring-config` `ConfigMap` object] if it does not exist. +* xref:../../../observability/monitoring/configuring-core-platform-monitoring/configuring-alerts-and-notifications.adoc#configuring-alert-notifications_configuring-alerts-and-notifications[Configure notifications for default platform alerts] so that Alertmanager can send alerts to an external notification system such as email, Slack, or PagerDuty. +* For shorter term data retention, xref:../../../observability/monitoring/configuring-core-platform-monitoring/storing-and-recording-data.adoc#configuring-persistent-storage_storing-and-recording-data[configure persistent storage] for Prometheus and Alertmanager to store metrics and alert data. +Specify the metrics data retention parameters for Prometheus and Thanos Ruler. ++ +[IMPORTANT] +==== +* In multi-node clusters, you must configure persistent storage for Prometheus, Alertmanager, and Thanos Ruler to ensure high availability. + +* By default, in a newly installed {product-title} system, the monitoring `ClusterOperator` resource reports a `PrometheusDataPersistenceNotConfigured` status message to remind you that storage is not configured. +==== ++ +* For longer term data retention, xref:../../../observability/monitoring/configuring-core-platform-monitoring/configuring-metrics.adoc#configuring-remote-write-storage_configuring-metrics[configure the remote write feature] to enable Prometheus to send ingested metrics to remote systems for storage. ++ +[IMPORTANT] +==== +Be sure to xref:../../../observability/monitoring/configuring-core-platform-monitoring/configuring-metrics.adoc#creating-cluster-id-labels-for-metrics_configuring-metrics[add cluster ID labels to metrics] for use with your remote write storage configuration. +==== ++ +* xref:../../../observability/monitoring/configuring-core-platform-monitoring/preparing-to-configure-the-monitoring-stack.adoc#granting-users-permissions-for-core-platform-monitoring_preparing-to-configure-the-monitoring-stack[Grant monitoring cluster roles] to any non-administrator users that need to access certain monitoring features. +* xref:../../../observability/monitoring/configuring-core-platform-monitoring/configuring-performance-and-scalability.adoc#assigning-tolerations-to-monitoring-components_configuring-performance-and-scalability[Assign tolerations] to monitoring stack components so that administrators can move them to tainted nodes. +* xref:../../../observability/monitoring/configuring-core-platform-monitoring/configuring-performance-and-scalability.adoc#setting-the-body-size-limit-for-metrics-scraping_configuring-performance-and-scalability[Set the body size limit] for metrics collection to help avoid situations in which Prometheus consumes excessive amounts of memory when scraped targets return a response that contains a large amount of data. +* xref:../../../observability/monitoring/managing-alerts/managing-alerts-as-an-administrator.adoc#managing-alerting-rules-for-core-platform-monitoring_managing-alerts-as-an-administrator[Modify or create alerting rules] for your cluster. +These rules specify the conditions that trigger alerts, such as high CPU or memory usage, network latency, and so forth. +* xref:../../../observability/monitoring/configuring-core-platform-monitoring/configuring-performance-and-scalability.adoc#managing-cpu-and-memory-resources-for-monitoring-components_configuring-performance-and-scalability[Specify resource limits and requests for monitoring components] to ensure that the containers that run monitoring components have enough CPU and memory resources. + +With the monitoring stack configured to suit your needs, Prometheus collects metrics from the specified services and stores these metrics according to your settings. +You can go to the *Observe* pages in the {product-title} web console to view and query collected metrics, manage alerts, identify performance bottlenecks, and scale resources as needed: + +* xref:../../../observability/monitoring/accessing-metrics/accessing-metrics-as-an-administrator.adoc#reviewing-monitoring-dashboards-admin_accessing-metrics-as-an-administrator[View dashboards] to visualize collected metrics, troubleshoot alerts, and monitor other information about your cluster. +* xref:../../../observability/monitoring/accessing-metrics/accessing-metrics-as-an-administrator.adoc#querying-metrics-for-all-projects-with-mon-dashboard_accessing-metrics-as-an-administrator[Query collected metrics] by creating PromQL queries or using predefined queries. + + diff --git a/observability/monitoring/getting-started/developer-and-non-administrator-steps.adoc b/observability/monitoring/getting-started/developer-and-non-administrator-steps.adoc new file mode 100644 index 0000000000..43aa985744 --- /dev/null +++ b/observability/monitoring/getting-started/developer-and-non-administrator-steps.adoc @@ -0,0 +1,16 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +[id="developer-and-non-administrator-steps"] += Developer and non-administrator steps +:context: developer-and-non-administrator-steps + +toc::[] + +After monitoring for user-defined projects is enabled and configured, developers and other non-administrator users can then perform the following activities to set up and use monitoring for their own projects: + +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/configuring-metrics-uwm.adoc#setting-up-metrics-collection-for-user-defined-projects_configuring-metrics-uwm[Deploy and monitor services]. +* xref:../../../observability/monitoring/managing-alerts/managing-alerts-as-a-developer.adoc#managing-alerting-rules-for-user-defined-projects-uwm_managing-alerts-as-a-developer[Create and manage alerting rules]. +* xref:../../../observability/monitoring/managing-alerts/managing-alerts-as-a-developer.adoc#managing-alerts-as-a-developer[Receive and manage alerts] for your projects. +* If granted the `alert-routing-edit` cluster role, xref:../../../observability/monitoring/configuring-user-workload-monitoring/configuring-alerts-and-notifications-uwm.adoc#configuring-alert-routing-for-user-defined-projects_configuring-alerts-and-notifications-uwm[configure alert routing]. +* xref:../../../observability/monitoring/accessing-metrics/accessing-metrics-as-a-developer.adoc#reviewing-monitoring-dashboards-developer_accessing-metrics-as-a-developer[View dashboards] by using the {product-title} web console. +* xref:../../../observability/monitoring/accessing-metrics/accessing-metrics-as-a-developer.adoc#querying-metrics-for-user-defined-projects-with-mon-dashboard_accessing-metrics-as-a-developer[Query the collected metrics] by creating PromQL queries or using predefined queries. diff --git a/observability/monitoring/getting-started/images b/observability/monitoring/getting-started/images new file mode 120000 index 0000000000..847b03ed05 --- /dev/null +++ b/observability/monitoring/getting-started/images @@ -0,0 +1 @@ +../../images/ \ No newline at end of file diff --git a/observability/monitoring/getting-started/maintenance-and-support-for-monitoring.adoc b/observability/monitoring/getting-started/maintenance-and-support-for-monitoring.adoc new file mode 100644 index 0000000000..157e969aa0 --- /dev/null +++ b/observability/monitoring/getting-started/maintenance-and-support-for-monitoring.adoc @@ -0,0 +1,28 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +[id="maintenance-and-support-for-monitoring"] += Maintenance and support for monitoring +:context: maintenance-and-support-for-monitoring + +toc::[] + +Not all configuration options for the monitoring stack are exposed. The only supported way of configuring {product-title} monitoring is by configuring the {cmo-first} using the options described in the xref:../../../observability/monitoring/config-map-reference-for-the-cluster-monitoring-operator.adoc#cluster-monitoring-operator-configuration-reference[Config map reference for the {cmo-full}]. *Do not use other configurations, as they are unsupported.* + +Configuration paradigms might change across Prometheus releases, and such cases can only be handled gracefully if all configuration possibilities are controlled. If you use configurations other than those described in the xref:../../../observability/monitoring/config-map-reference-for-the-cluster-monitoring-operator.adoc#cluster-monitoring-operator-configuration-reference[Config map reference for the {cmo-full}], your changes will disappear because the {cmo-short} automatically reconciles any differences and resets any unsupported changes back to the originally defined state by default and by design. + +ifdef::openshift-dedicated,openshift-rosa[] +[IMPORTANT] +==== +Installing another Prometheus instance is not supported by the Red Hat Site Reliability Engineers (SRE). +==== +endif::openshift-dedicated,openshift-rosa[] + +include::modules/monitoring-support-considerations.adoc[leveloffset=+1] +ifndef::openshift-dedicated,openshift-rosa[] +include::modules/monitoring-support-policy-for-monitoring-operators.adoc[leveloffset=+1] +endif::openshift-dedicated,openshift-rosa[] + +include::modules/monitoring-support-version-matrix-for-monitoring-components.adoc[leveloffset=+1] + + + diff --git a/observability/monitoring/getting-started/modules b/observability/monitoring/getting-started/modules new file mode 120000 index 0000000000..36719b9de7 --- /dev/null +++ b/observability/monitoring/getting-started/modules @@ -0,0 +1 @@ +../../modules/ \ No newline at end of file diff --git a/observability/monitoring/getting-started/snippets b/observability/monitoring/getting-started/snippets new file mode 120000 index 0000000000..5a3f5add14 --- /dev/null +++ b/observability/monitoring/getting-started/snippets @@ -0,0 +1 @@ +../../snippets/ \ No newline at end of file diff --git a/observability/monitoring/getting-started/user-workload-monitoring-first-steps.adoc b/observability/monitoring/getting-started/user-workload-monitoring-first-steps.adoc new file mode 100644 index 0000000000..7b193b5e75 --- /dev/null +++ b/observability/monitoring/getting-started/user-workload-monitoring-first-steps.adoc @@ -0,0 +1,20 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +[id="user-workload-monitoring-first-steps"] += User workload monitoring first steps +:context: user-workload-monitoring-first-steps + +toc::[] + +As a cluster administrator, you can optionally enable monitoring for user-defined projects in addition to core platform monitoring. +Non-administrator users such as developers can then monitor their own projects outside of core platform monitoring. + +Cluster administrators typically complete the following activities to configure user-defined projects so that users can view collected metrics, query these metrics, and receive alerts for their own projects: + +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-monitoring-for-user-defined-projects-uwm_preparing-to-configure-the-monitoring-stack-uwm[Enable user workload monitoring]. +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#granting-users-permission-to-monitor-user-defined-projects_preparing-to-configure-the-monitoring-stack-uwm[Grant non-administrator users permissions to monitor user-defined projects] by assigning the `monitoring-rules-view`, `monitoring-rules-edit`, or `monitoring-edit` cluster roles. +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#granting-users-permission-to-configure-alert-routing-for-user-defined-projects_preparing-to-configure-the-monitoring-stack-uwm[Assign the `user-workload-monitoring-config-edit` role] to grant non-administrator users permission to configure user-defined projects. +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-alert-routing-for-user-defined-projects_preparing-to-configure-the-monitoring-stack-uwm[Enable alert routing for user-defined projects] so that developers and other users can configure custom alerts and alert routing for their projects. +* If needed, configure alert routing for user-defined projects to xref:../../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-a-separate-alertmanager-instance-for-user-defined-alert-routing_preparing-to-configure-the-monitoring-stack-uwm[use an optional Alertmanager instance dedicated for use only by user-defined projects]. +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/configuring-alerts-and-notifications-uwm.adoc#configuring-alert-notifications_configuring-alerts-and-notifications-uwm[Configure notifications for user-defined alerts]. +* If you use the platform Alertmanager instance for user-defined alert routing, xref:../../../observability/monitoring/configuring-user-workload-monitoring/configuring-alerts-and-notifications-uwm.adoc#configuring-different-alert-receivers-for-default-platform-alerts-and-user-defined-alerts_configuring-alerts-and-notifications-uwm[configure different alert receivers] for default platform alerts and user-defined alerts. diff --git a/observability/monitoring/managing-alerts.adoc b/observability/monitoring/managing-alerts.adoc index ceb8bdcbba..77e2f4f6d0 100644 --- a/observability/monitoring/managing-alerts.adoc +++ b/observability/monitoring/managing-alerts.adoc @@ -18,13 +18,15 @@ The alerts, silences, and alerting rules that are available in the Alerting UI r ==== // Accessing the Alerting UI in the Administrator and Developer perspectives -include::modules/monitoring-accessing-the-alerting-ui.adoc[leveloffset=+1] +include::modules/monitoring-accessing-the-alerting-ui.adoc[leveloffset=1,tags=**;ADM;!DEV] +include::modules/monitoring-accessing-the-alerting-ui.adoc[leveloffset=1,tags=**;DEV;!ADM] // Searching and filtering alerts, silences, and alerting rules include::modules/monitoring-searching-alerts-silences-and-alerting-rules.adoc[leveloffset=+1] // Getting information about alerts, silences and alerting rules -include::modules/monitoring-getting-information-about-alerts-silences-and-alerting-rules.adoc[leveloffset=+1] +include::modules/monitoring-getting-information-about-alerts-silences-and-alerting-rules.adoc[leveloffset=1,tags=**;ADM;!DEV] +include::modules/monitoring-getting-information-about-alerts-silences-and-alerting-rules.adoc[leveloffset=1,tags=**;DEV;!ADM] [role="_additional-resources"] .Additional resources @@ -36,9 +38,15 @@ include::modules/monitoring-managing-silences.adoc[leveloffset=+1] .Additional resources * xref:../../observability/monitoring/configuring-the-monitoring-stack.adoc#configuring-persistent-storage_configuring-the-monitoring-stack[Configuring persistent storage] -include::modules/monitoring-silencing-alerts.adoc[leveloffset=+2] -include::modules/monitoring-editing-silences.adoc[leveloffset=+2] -include::modules/monitoring-expiring-silences.adoc[leveloffset=+2] +include::modules/monitoring-silencing-alerts.adoc[leveloffset=+2,tags=**;ADM;!DEV] +include::modules/monitoring-silencing-alerts.adoc[leveloffset=+2,tags=**;DEV;!ADM] + + +include::modules/monitoring-editing-silences.adoc[leveloffset=+2,tags=**;ADM;!DEV] +include::modules/monitoring-editing-silences.adoc[leveloffset=+2,tags=**;DEV;!ADM] + +include::modules/monitoring-expiring-silences.adoc[leveloffset=+2,tags=**;ADM;!DEV] +include::modules/monitoring-expiring-silences.adoc[leveloffset=+2,tags=**;DEV;!ADM] // Managing core platform alerting rules ifndef::openshift-dedicated,openshift-rosa[] @@ -56,7 +64,7 @@ include::modules/monitoring-modifying-core-platform-alerting-rules.adoc[leveloff * See the link:https://prometheus.io/docs/practices/alerting/[Prometheus alerting documentation] for further guidelines on optimizing alerts. endif::openshift-dedicated,openshift-rosa[] -// Creating alerting rules for user-defined projects +// Creating alerting rules for user workload monitoring include::modules/monitoring-about-creating-alerting-rules-for-user-defined-projects.adoc[leveloffset=+1] include::modules/monitoring-optimizing-alerting-for-user-defined-projects.adoc[leveloffset=+2] include::modules/monitoring-creating-alerting-rules-for-user-defined-projects.adoc[leveloffset=+2] @@ -67,7 +75,7 @@ include::modules/monitoring-creating-cross-project-alerting-rules-for-user-defin * link:https://prometheus.io/docs/practices/alerting/[Prometheus alerting documentation] * xref:../../observability/monitoring/monitoring-overview.adoc#monitoring-overview[Monitoring overview] -// Managing alerting rules for user-defined projects +// Managing alerting rules for user workload monitoring include::modules/monitoring-managing-alerting-rules-for-user-defined-projects.adoc[leveloffset=+1] include::modules/monitoring-accessing-alerting-rules-for-your-project.adoc[leveloffset=+2] include::modules/monitoring-listing-alerting-rules-for-all-projects-in-a-single-view.adoc[leveloffset=+2] @@ -83,12 +91,12 @@ include::modules/monitoring-disabling-cross-project-alerting-rules-for-user-defi include::modules/monitoring-sending-notifications-to-external-systems.adoc[leveloffset=+1] // Configuring alert receivers ifndef::openshift-dedicated,openshift-rosa[] -include::modules/monitoring-configuring-alert-receivers.adoc[leveloffset=+2] +include::modules/monitoring-configuring-alert-routing-console.adoc[leveloffset=+2] endif::openshift-dedicated,openshift-rosa[] // Configuring different alert receivers for default platform alerts and user-defined alerts include::modules/monitoring-configuring-different-alert-receivers-for-default-platform-alerts-and-user-defined-alerts.adoc[leveloffset=+2] // Creating alert routing for user-defined projects -include::modules/monitoring-creating-alert-routing-for-user-defined-projects.adoc[leveloffset=+2] +include::modules/monitoring-configuring-alert-routing-for-user-defined-projects.adoc[leveloffset=+2] [id="configuring-alertmanager-to-send-notifications"] == Configuring Alertmanager to send notifications @@ -106,14 +114,14 @@ All features of a supported version of upstream Alertmanager are also supported // Configuring notifications for default platform alerts ifndef::openshift-dedicated,openshift-rosa[] -include::modules/monitoring-configuring-notifications-for-default-platform-alerts.adoc[leveloffset=+2] +include::modules/monitoring-configuring-alert-routing-default-platform-alerts.adoc[leveloffset=+2] endif::openshift-dedicated,openshift-rosa[] // Configuring notifications for user-defined alerts -include::modules/monitoring-configuring-notifications-for-user-defined-alerts.adoc[leveloffset=+2] +include::modules/monitoring-configuring-alert-routing-user-defined-alerts-secret.adoc[leveloffset=+2] [role="_additional-resources"] -[id="additional-resources_configuring-alertmanager-to-send-notifications"] +[id="additional-resources_{context}"] == Additional resources * link:https://www.pagerduty.com/[PagerDuty official site] diff --git a/observability/monitoring/managing-alerts/_attributes b/observability/monitoring/managing-alerts/_attributes new file mode 120000 index 0000000000..20cc1dcb77 --- /dev/null +++ b/observability/monitoring/managing-alerts/_attributes @@ -0,0 +1 @@ +../../_attributes/ \ No newline at end of file diff --git a/observability/monitoring/managing-alerts/images b/observability/monitoring/managing-alerts/images new file mode 120000 index 0000000000..847b03ed05 --- /dev/null +++ b/observability/monitoring/managing-alerts/images @@ -0,0 +1 @@ +../../images/ \ No newline at end of file diff --git a/observability/monitoring/managing-alerts/managing-alerts-as-a-developer.adoc b/observability/monitoring/managing-alerts/managing-alerts-as-a-developer.adoc new file mode 100644 index 0000000000..0d72847a15 --- /dev/null +++ b/observability/monitoring/managing-alerts/managing-alerts-as-a-developer.adoc @@ -0,0 +1,79 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +[id="managing-alerts-as-a-developer"] += Managing alerts as a Developer +:context: managing-alerts-as-a-developer + +toc::[] + +In {product-title}, the Alerting UI enables you to manage alerts, silences, and alerting rules. + +[NOTE] +==== +The alerts, silences, and alerting rules that are available in the Alerting UI relate to the projects that you have access to. +==== + +// Accessing the Alerting UI from the Developer perspective +include::modules/monitoring-accessing-the-alerting-ui.adoc[leveloffset=1,tags=**;DEV;!ADM] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#searching-alerts-silences-and-alerting-rules_key-concepts[Searching and filtering alerts, silences, and alerting rules] + +// Getting information about alerts, silences and alerting rules from the Developer perspective +include::modules/monitoring-getting-information-about-alerts-silences-and-alerting-rules.adoc[leveloffset=1,tags=**;DEV;!ADM] + +[role="_additional-resources"] +.Additional resources +* link:https://github.com/openshift/runbooks/tree/master/alerts/cluster-monitoring-operator[{cmo-full} runbooks] ({cmo-full} GitHub repository) + +[id="managing-silences_{context}"] +== Managing silences + +You can create a silence for an alert in the {product-title} web console in the *Developer* perspective. +After you create silences, you can view, edit, and expire them. You also do not receive notifications about a silenced alert when the alert fires. + +[NOTE] +==== +When you create silences, they are replicated across Alertmanager pods. However, if you do not configure persistent storage for Alertmanager, silences might be lost. This can happen, for example, if all Alertmanager pods restart at the same time. +==== + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#managing-silences_key-concepts[Managing silences] +* xref:../../../observability/monitoring/configuring-core-platform-monitoring/storing-and-recording-data.adoc#configuring-persistent-storage_storing-and-recording-data[Configuring persistent storage] + +include::modules/monitoring-silencing-alerts.adoc[leveloffset=+2,tags=**;DEV;!ADM] +include::modules/monitoring-editing-silences.adoc[leveloffset=+2,tags=**;DEV;!ADM] +include::modules/monitoring-expiring-silences.adoc[leveloffset=+2,tags=**;DEV;!ADM] + + +[id="managing-alerting-rules-for-user-defined-projects-uwm_{context}"] +== Managing alerting rules for user-defined projects + +In {product-title}, you can create, view, edit, and remove alerting rules for user-defined projects. Those alerting rules will trigger alerts based on the values of the chosen metrics. + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#about-creating-alerting-rules-for-user-defined-projects_key-concepts[Creating alerting rules for user-defined projects] +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#managing-alerting-rules-for-user-defined-projects_key-concepts[Managing alerting rules for user-defined projects] +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#optimizing-alerting-for-user-defined-projects_key-concepts[Optimizing alerting for user-defined projects] + +include::modules/monitoring-creating-alerting-rules-for-user-defined-projects.adoc[leveloffset=+2] +include::modules/monitoring-creating-cross-project-alerting-rules-for-user-defined-projects.adoc[leveloffset=+2] + +[role="_additional-resources"] +.Additional resources +* xref:../../../observability/monitoring/about-ocp-monitoring/monitoring-stack-architecture.adoc#monitoring-stack-architecture[Monitoring stack architecture] +* link:https://prometheus.io/docs/practices/alerting/[Alerting] (Prometheus documentation) + +include::modules/monitoring-accessing-alerting-rules-for-your-project.adoc[leveloffset=+2] +include::modules/monitoring-removing-alerting-rules-for-user-defined-projects.adoc[leveloffset=+2] + +[role="_additional-resources"] +.Additional resources + +* link:https://prometheus.io/docs/alerting/alertmanager/[Alertmanager] (Prometheus documentation) diff --git a/observability/monitoring/managing-alerts/managing-alerts-as-an-administrator.adoc b/observability/monitoring/managing-alerts/managing-alerts-as-an-administrator.adoc new file mode 100644 index 0000000000..9dab1f7712 --- /dev/null +++ b/observability/monitoring/managing-alerts/managing-alerts-as-an-administrator.adoc @@ -0,0 +1,113 @@ +:_mod-docs-content-type: ASSEMBLY +include::_attributes/common-attributes.adoc[] +[id="managing-alerts-as-an-administrator"] += Managing alerts as an Administrator +:context: managing-alerts-as-an-administrator + +toc::[] + +In {product-title}, the Alerting UI enables you to manage alerts, silences, and alerting rules. + +[NOTE] +==== +The alerts, silences, and alerting rules that are available in the Alerting UI relate to the projects that you have access to. For example, if you are logged in as a user with the `cluster-admin` role, you can access all alerts, silences, and alerting rules. +==== + +// Accessing the Alerting UI from the Administrator perspective +include::modules/monitoring-accessing-the-alerting-ui.adoc[leveloffset=1,tags=**;ADM;!DEV] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#searching-alerts-silences-and-alerting-rules_key-concepts[Searching and filtering alerts, silences, and alerting rules] + +// Getting information about alerts, silences and alerting rules from the Administrator perspective +include::modules/monitoring-getting-information-about-alerts-silences-and-alerting-rules.adoc[leveloffset=1,tags=**;ADM;!DEV] + +[role="_additional-resources"] +.Additional resources +* link:https://github.com/openshift/runbooks/tree/master/alerts/cluster-monitoring-operator[{cmo-full} runbooks] ({cmo-full} GitHub repository) + +[id="managing-silences_{context}"] +== Managing silences + +You can create a silence for an alert in the {product-title} web console in the *Administrator* perspective. +After you create silences, you can view, edit, and expire them. You also do not receive notifications about a silenced alert when the alert fires. + +[NOTE] +==== +When you create silences, they are replicated across Alertmanager pods. However, if you do not configure persistent storage for Alertmanager, silences might be lost. This can happen, for example, if all Alertmanager pods restart at the same time. +==== + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#managing-silences_key-concepts[Managing silences] +* xref:../../../observability/monitoring/configuring-core-platform-monitoring/storing-and-recording-data.adoc#configuring-persistent-storage_storing-and-recording-data[Configuring persistent storage] + +include::modules/monitoring-silencing-alerts.adoc[leveloffset=+2,tags=**;ADM;!DEV] +include::modules/monitoring-editing-silences.adoc[leveloffset=+2,tags=**;ADM;!DEV] +include::modules/monitoring-expiring-silences.adoc[leveloffset=+2,tags=**;ADM;!DEV] + + +[id="managing-alerting-rules-for-core-platform-monitoring_{context}"] +== Managing alerting rules for core platform monitoring + +The {product-title} monitoring includes a large set of default alerting rules for platform metrics. +As a cluster administrator, you can customize this set of rules in two ways: + +* Modify the settings for existing platform alerting rules by adjusting thresholds or by adding and modifying labels. +For example, you can change the `severity` label for an alert from `warning` to `critical` to help you route and triage issues flagged by an alert. + +* Define and add new custom alerting rules by constructing a query expression based on core platform metrics in the `openshift-monitoring` project. + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#managing-core-platform-alerting-rules_key-concepts[Managing alerting rules for core platform monitoring] +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#tips-for-optimizing-alerting-rules-for-core-platform-monitoring_key-concepts[Tips for optimizing alerting rules for core platform monitoring] + +include::modules/monitoring-creating-new-alerting-rules.adoc[leveloffset=+2] +include::modules/monitoring-modifying-core-platform-alerting-rules.adoc[leveloffset=+2] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/about-ocp-monitoring/monitoring-stack-architecture.adoc#monitoring-stack-architecture[Monitoring stack architecture] +* link:https://prometheus.io/docs/alerting/alertmanager/[Alertmanager] (Prometheus documentation) +* link:https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config[relabel_config configuration] (Prometheus documentation) +* link:https://prometheus.io/docs/practices/alerting/[Alerting] (Prometheus documentation) + +[id="managing-alerting-rules-for-user-defined-projects_{context}"] +== Managing alerting rules for user-defined projects + +In {product-title}, you can create, view, edit, and remove alerting rules for user-defined projects. Those alerting rules will trigger alerts based on the values of the chosen metrics. + +[role="_additional-resources"] +.Additional resources +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#about-creating-alerting-rules-for-user-defined-projects_key-concepts[Creating alerting rules for user-defined projects] +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#managing-alerting-rules-for-user-defined-projects_key-concepts[Managing alerting rules for user-defined projects] +* xref:../../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#optimizing-alerting-for-user-defined-projects_key-concepts[Optimizing alerting for user-defined projects] + +include::modules/monitoring-creating-alerting-rules-for-user-defined-projects.adoc[leveloffset=+2] +include::modules/monitoring-creating-cross-project-alerting-rules-for-user-defined-projects.adoc[leveloffset=+2] + +[role="_additional-resources"] +.Additional resources + +* xref:../../../observability/monitoring/about-ocp-monitoring/monitoring-stack-architecture.adoc#monitoring-stack-architecture[Monitoring stack architecture] +* link:https://prometheus.io/docs/practices/alerting/[Alerting] (Prometheus documentation) + +include::modules/monitoring-listing-alerting-rules-for-all-projects-in-a-single-view.adoc[leveloffset=+2] +include::modules/monitoring-removing-alerting-rules-for-user-defined-projects.adoc[leveloffset=+2] +include::modules/monitoring-disabling-cross-project-alerting-rules-for-user-defined-projects.adoc[leveloffset=+2] + +[role="_additional-resources"] +.Additional resources + +* link:https://prometheus.io/docs/alerting/alertmanager/[Alertmanager] (Prometheus documentation) + + + + + diff --git a/observability/monitoring/managing-alerts/modules b/observability/monitoring/managing-alerts/modules new file mode 120000 index 0000000000..36719b9de7 --- /dev/null +++ b/observability/monitoring/managing-alerts/modules @@ -0,0 +1 @@ +../../modules/ \ No newline at end of file diff --git a/observability/monitoring/managing-alerts/snippets b/observability/monitoring/managing-alerts/snippets new file mode 120000 index 0000000000..5a3f5add14 --- /dev/null +++ b/observability/monitoring/managing-alerts/snippets @@ -0,0 +1 @@ +../../snippets/ \ No newline at end of file diff --git a/observability/monitoring/managing-metrics.adoc b/observability/monitoring/managing-metrics.adoc index d258deedc0..13b32d9cdc 100644 --- a/observability/monitoring/managing-metrics.adoc +++ b/observability/monitoring/managing-metrics.adoc @@ -40,26 +40,23 @@ ifndef::openshift-dedicated,openshift-rosa[] include::modules/monitoring-viewing-a-list-of-available-metrics.adoc[leveloffset=+1] endif::openshift-dedicated,openshift-rosa[] -// Querying metrics -include::modules/monitoring-about-querying-metrics.adoc[leveloffset=+1] - // include::modules/monitoring-contents-of-the-metrics-ui.adoc[leveloffset=+2] -// Querying metrics for all projects as an administrator -include::modules/monitoring-querying-metrics-for-all-projects-as-an-administrator.adoc[leveloffset=+2] +// Querying metrics for all projects with the {product-title} web console [adm] +include::modules/monitoring-querying-metrics-for-all-projects-with-mon-dashboard.adoc[leveloffset=+1] [role="_additional-resources"] .Additional resources -* For more information about creating PromQL queries, see the link:https://prometheus.io/docs/prometheus/latest/querying/basics/[Prometheus query documentation]. +* link:https://prometheus.io/docs/prometheus/latest/querying/basics/[Prometheus query documentation] -// Querying metrics for user-defined projects as a developer -include::modules/monitoring-querying-metrics-for-user-defined-projects-as-a-developer.adoc[leveloffset=+2] +// Querying metrics for user-defined projects with the {product-title} web console [dev] +include::modules/monitoring-querying-metrics-for-user-defined-projects-with-mon-dashboard.adoc[leveloffset=+1] [role="_additional-resources"] .Additional resources -* For more information about creating PromQL queries, see the link:https://prometheus.io/docs/prometheus/latest/querying/basics/[Prometheus query documentation]. +* link:https://prometheus.io/docs/prometheus/latest/querying/basics/[Prometheus query documentation] // Getting detailed information about metrics targets include::modules/monitoring-getting-detailed-information-about-a-target.adoc[leveloffset=+1] diff --git a/observability/monitoring/monitoring-overview.adoc b/observability/monitoring/monitoring-overview.adoc index c386abed77..f8d632d752 100644 --- a/observability/monitoring/monitoring-overview.adoc +++ b/observability/monitoring/monitoring-overview.adoc @@ -25,6 +25,14 @@ endif::openshift-dedicated,openshift-rosa[] ifdef::openshift-dedicated,openshift-rosa[] In {product-title}, you can monitor your own projects in isolation from Red Hat Site Reliability Engineering (SRE) platform metrics. You can monitor your own projects without the need for an additional monitoring solution. + +The {product-title} +endif::openshift-dedicated,openshift-rosa[] +ifdef::openshift-rosa[] +(ROSA) +endif::openshift-rosa[] +ifdef::openshift-dedicated,openshift-rosa[] +monitoring stack is based on the link:https://prometheus.io/[Prometheus] open source project and its wider ecosystem. endif::openshift-dedicated,openshift-rosa[] // Understanding the monitoring stack @@ -39,7 +47,7 @@ include::modules/monitoring-default-monitoring-targets.adoc[leveloffset=+2] include::modules/monitoring-components-for-monitoring-user-defined-projects.adoc[leveloffset=+2] include::modules/monitoring-targets-for-user-defined-projects.adoc[leveloffset=+2] -include::modules/monitoring-understanding-monitoring-stack-in-ha-clusters.adoc[leveloffset=+2] +include::modules/monitoring-monitoring-stack-in-ha-clusters.adoc[leveloffset=+2] [role="_additional-resources"] .Additional resources * xref:../../operators/operator_sdk/osdk-ha-sno.adoc#osdk-ha-sno[High-availability or single-node cluster detection and support] @@ -50,7 +58,7 @@ include::modules/monitoring-common-terms.adoc[leveloffset=+1] ifndef::openshift-dedicated,openshift-rosa[] [role="_additional-resources"] -[id="additional-resources_monitoring-overview"] +[id="additional-resources_{context}"] == Additional resources * xref:../../support/remote_health_monitoring/about-remote-health-monitoring.adoc#about-remote-health-monitoring[About remote health monitoring] diff --git a/observability/monitoring/reviewing-monitoring-dashboards.adoc b/observability/monitoring/reviewing-monitoring-dashboards.adoc index 646873aa02..6a1e1ef0e6 100644 --- a/observability/monitoring/reviewing-monitoring-dashboards.adoc +++ b/observability/monitoring/reviewing-monitoring-dashboards.adoc @@ -6,30 +6,10 @@ include::_attributes/common-attributes.adoc[] toc::[] -ifndef::openshift-dedicated,openshift-rosa[] -{product-title} {product-version} provides a comprehensive set of monitoring dashboards that help you understand the state of cluster components and user-defined workloads. -endif::openshift-dedicated,openshift-rosa[] -ifdef::openshift-dedicated,openshift-rosa[] -{product-title} provides monitoring dashboards that help you understand the state of user-defined projects. -endif::openshift-dedicated,openshift-rosa[] +{product-title} provides a set of monitoring dashboards that help you understand the state of cluster components and user-defined workloads. -Use the *Administrator* perspective to access dashboards for the core {product-title} components, including the following items: - -* API performance -* etcd -* Kubernetes compute resources -* Kubernetes network resources -* Prometheus -* USE method dashboards relating to cluster and node performance -* Node performance metrics - -.Example dashboard in the Administrator perspective -image::monitoring-dashboard-administrator.png[] - -In the *Developer* perspective, you can access only the Kubernetes compute resources dashboards: - -.Example dashboard in the Developer perspective -image::observe-dashboard-developer.png[] +// About monitoring dashboards +include::modules/monitoring-about-monitoring-dashboards.adoc[leveloffset=+1] // Reviewing monitoring dashboards as a cluster administrator include::modules/monitoring-reviewing-monitoring-dashboards-admin.adoc[leveloffset=+1] @@ -40,8 +20,7 @@ include::modules/monitoring-reviewing-monitoring-dashboards-developer.adoc[level ifndef::openshift-dedicated,openshift-rosa[] // This additional resource might be valid for ROSA/OSD when the Building applications content is ported. [role="_additional-resources"] -[id="additional-resources-reviewing-monitoring-dashboards"] -.Additional resources - +[id="additional-resources_{context}"] +== Additional resources * xref:../../applications/odc-monitoring-project-and-application-metrics-using-developer-perspective.adoc#monitoring-project-and-application-metrics-using-developer-perspective[Monitoring project and application metrics using the Developer perspective] endif::openshift-dedicated,openshift-rosa[] diff --git a/observability/monitoring/troubleshooting-monitoring-issues.adoc b/observability/monitoring/troubleshooting-monitoring-issues.adoc index 096ec7d33a..e42bf072f3 100644 --- a/observability/monitoring/troubleshooting-monitoring-issues.adoc +++ b/observability/monitoring/troubleshooting-monitoring-issues.adoc @@ -20,9 +20,9 @@ include::modules/monitoring-investigating-why-user-defined-metrics-are-unavailab [role="_additional-resources"] .Additional resources -* xref:../../observability/monitoring/configuring-the-monitoring-stack.adoc#creating-user-defined-workload-monitoring-configmap_configuring-the-monitoring-stack[Creating a user-defined workload monitoring config map] -* See xref:../../observability/monitoring/managing-metrics.adoc#specifying-how-a-service-is-monitored_managing-metrics[Specifying how a service is monitored] for details on how to create a `ServiceMonitor` or `PodMonitor` resource -* See xref:../../observability/monitoring/managing-metrics.adoc#getting-detailed-information-about-a-target_managing-metrics[Getting detailed information about metrics targets] +* xref:../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-monitoring-for-user-defined-projects-uwm_preparing-to-configure-the-monitoring-stack-uwm[Enabling monitoring for user-defined projects] +* xref:../../observability/monitoring/configuring-user-workload-monitoring/configuring-metrics-uwm.adoc#specifying-how-a-service-is-monitored_configuring-metrics-uwm[Specifying how a service is monitored] +* xref:../../observability/monitoring/accessing-metrics/accessing-metrics-as-an-administrator.adoc#getting-detailed-information-about-a-target_accessing-metrics-as-an-administrator[Getting detailed information about a metrics target] endif::openshift-dedicated,openshift-rosa[] // Investigating why user-defined project metrics are unavailable (OSD/ROSA) @@ -35,9 +35,15 @@ include::modules/monitoring-determining-why-prometheus-is-consuming-disk-space.a [role="_additional-resources"] .Additional resources +ifndef::openshift-dedicated,openshift-rosa[] +* xref:../../observability/monitoring/accessing-metrics/accessing-monitoring-apis-by-using-the-cli.adoc#accessing-monitoring-apis-by-using-the-cli[Accessing monitoring APIs by using the CLI] +* xref:../../observability/monitoring/configuring-user-workload-monitoring/configuring-performance-and-scalability-uwm.adoc#setting-scrape-and-evaluation-intervals-limits-for-user-defined-projects_configuring-performance-and-scalability-uwm[Setting scrape intervals, evaluation intervals, and enforced limits for user-defined projects] +endif::openshift-dedicated,openshift-rosa[] -* xref:../../observability/monitoring/accessing-third-party-monitoring-apis.adoc#about-accessing-monitoring-web-service-apis_accessing-monitoring-apis-by-using-the-cli[Accessing monitoring APIs by using the CLI] -* xref:../../observability/monitoring/configuring-the-monitoring-stack.adoc#setting-scrape-and-evaluation-intervals-limits-for-user-defined-projects_configuring-the-monitoring-stack[Setting scrape intervals, evaluation intervals, and scrape limits for user-defined projects] +ifdef::openshift-dedicated,openshift-rosa[] +* xref:../../observability/monitoring/accessing-third-party-monitoring-apis.adoc#about-accessing-monitoring-web-service-apis_accessing-third-party-monitoring-apis[Accessing monitoring APIs by using the CLI] +* xref:../../observability/monitoring/configuring-the-monitoring-stack.adoc#setting-scrape-and-evaluation-intervals-limits-for-user-defined-projects_configuring-the-monitoring-stack[Setting scrape intervals, evaluation intervals, and enforced limits for user-defined projects] +endif::openshift-dedicated,openshift-rosa[] * xref:../../support/getting-support.adoc#support-submitting-a-case_getting-support[Submitting a support case] // Resolving the KubePersistentVolumeFillingUp alert firing for Prometheus diff --git a/observability/network_observability/metrics-alerts-dashboards.adoc b/observability/network_observability/metrics-alerts-dashboards.adoc index 3c8a13b374..ca2bec841c 100644 --- a/observability/network_observability/metrics-alerts-dashboards.adoc +++ b/observability/network_observability/metrics-alerts-dashboards.adoc @@ -25,5 +25,5 @@ include::modules/network-observability-tcp-flag-syn-flood.adoc[leveloffset=+1] [role="_additional-resources"] .Additional resources * xref:../../observability/network_observability/observing-network-traffic.adoc#network-observability-filtering-ebpf-rule_nw-observe-network-traffic[Filtering eBPF flow data using a global rule] -* xref:../../observability/monitoring/managing-alerts.adoc#creating-alerting-rules-for-user-defined-projects_managing-alerts[Creating alerting rules for user-defined projects]. +* xref:../../observability/monitoring/managing-alerts/managing-alerts-as-a-developer.adoc#creating-alerting-rules-for-user-defined-projects_managing-alerts-as-a-developer[Creating alerting rules for user-defined projects]. * xref:../../support/troubleshooting/investigating-monitoring-issues.adoc#determining-why-prometheus-is-consuming-disk-space_investigating-monitoring-issues[Troubleshooting high cardinality metrics- Determining why Prometheus is consuming a lot of disk space] diff --git a/observability/network_observability/network-observability-operator-monitoring.adoc b/observability/network_observability/network-observability-operator-monitoring.adoc index 9c9219106a..1dafd36fe8 100644 --- a/observability/network_observability/network-observability-operator-monitoring.adoc +++ b/observability/network_observability/network-observability-operator-monitoring.adoc @@ -17,4 +17,4 @@ include::modules/network-observability-ebpf-agent-alert.adoc[leveloffset=+1] [role="_additional-resources"] .Additional resources -* For more information about creating alerts that you can see on the dashboard, see xref:../../observability/monitoring/managing-alerts.adoc#creating-alerting-rules-for-user-defined-projects_managing-alerts[Creating alerting rules for user-defined projects]. \ No newline at end of file +* For more information about creating alerts that you can see on the dashboard, see xref:../../observability/monitoring/managing-alerts/managing-alerts-as-a-developer.adoc#creating-alerting-rules-for-user-defined-projects_managing-alerts-as-a-developer[Creating alerting rules for user-defined projects]. \ No newline at end of file diff --git a/observability/otel/otel-configuring-metrics-for-monitoring-stack.adoc b/observability/otel/otel-configuring-metrics-for-monitoring-stack.adoc index 7c36bb042a..a92af420e5 100644 --- a/observability/otel/otel-configuring-metrics-for-monitoring-stack.adoc +++ b/observability/otel/otel-configuring-metrics-for-monitoring-stack.adoc @@ -18,14 +18,4 @@ include::modules/otel-config-receive-metrics-monitoring-stack.adoc[leveloffset=+ [id="additional-resources_otel-configuring-metrics-for-monitoring-stack"] == Additional resources -// * xref:../monitoring/accessing-third-party-monitoring-apis.adoc#monitoring-querying-metrics-by-using-the-federation-endpoint-for-prometheus[Querying metrics by using the federation endpoint for Prometheus] - -//* xref:../monitoring/accessing-third-party-monitoring-apis.adoc#monitoring-querying-metrics-by-using-the-federation-endpoint-for-prometheus_accessing-third-party-monitoring-apis[Querying metrics by using the federation endpoint for Prometheus] - -//* xref:../monitoring/accessing-third-party-monitoring-apis.adoc#monitoring-querying-metrics-by-using-the-federation-endpoint-for-prometheus[Querying metrics by using the federation endpoint for Prometheus] - -//* xref:../monitoring/accessing-third-party-monitoring-apis.adoc#monitoring-querying-metrics-by-using-the-federation-endpoint-for-prometheus_accessing-monitoring-apis-by-using-the-cli[Querying metrics by using the federation endpoint for Prometheus] - -//* xref:../monitoring/accessing-third-party-monitoring-apis.adoc#accessing-third-party-monitoring-apis_monitoring-querying-metrics-by-using-the-federation-endpoint-for-prometheus[Querying metrics by using the federation endpoint for Prometheus] - -* xref:../monitoring/accessing-third-party-monitoring-apis.adoc#monitoring-querying-metrics-by-using-the-federation-endpoint-for-prometheus_accessing-monitoring-apis-by-using-the-cli[Querying metrics by using the federation endpoint for Prometheus] +* xref:../../observability/monitoring/accessing-metrics/accessing-monitoring-apis-by-using-the-cli.adoc#monitoring-querying-metrics-by-using-the-federation-endpoint-for-prometheus_accessing-monitoring-apis-by-using-the-cli[Querying metrics by using the federation endpoint for Prometheus] diff --git a/observability/otel/otel-configuring-otelcol-metrics.adoc b/observability/otel/otel-configuring-otelcol-metrics.adoc index c0fe04a530..3e82820b07 100644 --- a/observability/otel/otel-configuring-otelcol-metrics.adoc +++ b/observability/otel/otel-configuring-otelcol-metrics.adoc @@ -50,4 +50,4 @@ You can use the *Administrator* view of the web console to verify successful con . Check that the *ServiceMonitors* or *PodMonitors* in the `opentelemetry-collector-` format have the *Up* status. .Additional resources -* xref:../../observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc#enabling-monitoring-for-user-defined-projects[Enabling monitoring for user-defined projects] \ No newline at end of file +* xref:../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-monitoring-for-user-defined-projects-uwm_preparing-to-configure-the-monitoring-stack-uwm[Enabling monitoring for user-defined projects] \ No newline at end of file diff --git a/observability/overview/index.adoc b/observability/overview/index.adoc index 120618a14c..2dd7d0c145 100644 --- a/observability/overview/index.adoc +++ b/observability/overview/index.adoc @@ -36,7 +36,13 @@ Monitor the in-cluster health and performance of your applications running on {p Monitoring stack components are deployed by default in every {product-title} installation and are managed by the {cmo-first}. These components include Prometheus, Alertmanager, Thanos Querier, and others. The {cmo-short} also deploys the Telemeter Client, which sends a subset of data from platform Prometheus instances to Red Hat to facilitate Remote Health Monitoring for clusters. +ifndef::openshift-dedicated,openshift-rosa[] +For more information, see xref:../../observability/monitoring/about-ocp-monitoring/about-ocp-monitoring.adoc#about-ocp-monitoring[About {product-title} monitoring] and xref:../../support/remote_health_monitoring/about-remote-health-monitoring.adoc#about-remote-health-monitoring[About remote health monitoring]. +endif::openshift-dedicated,openshift-rosa[] + +ifdef::openshift-dedicated,openshift-rosa[] For more information, see xref:../../observability/monitoring/monitoring-overview.adoc#monitoring-overview[Monitoring overview] and xref:../../support/remote_health_monitoring/about-remote-health-monitoring.adoc#about-remote-health-monitoring[About remote health monitoring]. +endif::openshift-dedicated,openshift-rosa[] [id="cluster-logging-index_{context}"] == Logging diff --git a/observability/power_monitoring/visualizing-power-monitoring-metrics.adoc b/observability/power_monitoring/visualizing-power-monitoring-metrics.adoc index d8c1a9d09f..fa9d9a1002 100644 --- a/observability/power_monitoring/visualizing-power-monitoring-metrics.adoc +++ b/observability/power_monitoring/visualizing-power-monitoring-metrics.adoc @@ -19,4 +19,4 @@ include::modules/power-monitoring-metrics-overview.adoc[leveloffset=+1] [role="_additional-resources"] [id="additional-resources_visualizing-power-monitoring-metrics"] == Additional resources -* xref:../../observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc#enabling-monitoring-for-user-defined-projects_enabling-monitoring-for-user-defined-projects[Enabling monitoring for user-defined projects] \ No newline at end of file +* xref:../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-monitoring-for-user-defined-projects-uwm_preparing-to-configure-the-monitoring-stack-uwm[Enabling monitoring for user-defined projects] \ No newline at end of file diff --git a/post_installation_configuration/cluster-tasks.adoc b/post_installation_configuration/cluster-tasks.adoc index 1f98e29828..8703f8bd95 100644 --- a/post_installation_configuration/cluster-tasks.adoc +++ b/post_installation_configuration/cluster-tasks.adoc @@ -119,7 +119,7 @@ documentation for details on how and when you can create additional resource ins |`alertmanager.monitoring.coreos.com` |`main` |`openshift-monitoring` -|Controls the xref:../observability/monitoring/managing-alerts.adoc#managing-alerts[Alertmanager] deployment parameters. +|Controls the xref:../observability/monitoring/configuring-core-platform-monitoring/configuring-alerts-and-notifications.adoc#configuring-alerts-and-notifications[Alertmanager] deployment parameters. |`ingresscontroller.operator.openshift.io` |`default` diff --git a/post_installation_configuration/configuring-alert-notifications.adoc b/post_installation_configuration/configuring-alert-notifications.adoc index 8fde499c73..d253eeaf1a 100644 --- a/post_installation_configuration/configuring-alert-notifications.adoc +++ b/post_installation_configuration/configuring-alert-notifications.adoc @@ -14,5 +14,5 @@ include::modules/monitoring-sending-notifications-to-external-systems.adoc[level [role="_additional-resources"] == Additional resources -* xref:../observability/monitoring/monitoring-overview.adoc#monitoring-overview[Monitoring overview] -* xref:../observability/monitoring/managing-alerts.adoc#configuring-alert-receivers_managing-alerts[Configuring alert receivers] +* xref:../observability/monitoring/about-ocp-monitoring/about-ocp-monitoring.adoc#about-ocp-monitoring[About {product-title} monitoring] +* xref:../observability/monitoring/configuring-user-workload-monitoring/configuring-alerts-and-notifications-uwm.adoc#configuring-alert-notifications_configuring-alerts-and-notifications-uwm[Configuring alert notifications] diff --git a/rosa_architecture/index.adoc b/rosa_architecture/index.adoc index f7baa70381..758c68c79c 100644 --- a/rosa_architecture/index.adoc +++ b/rosa_architecture/index.adoc @@ -281,9 +281,9 @@ Use the Cluster Version Operator (CVO) to upgrade your {product-title} cluster. - **xref:../observability/network_observability/network-observability-overview.adoc#network-observability-overview[Network Observability]**: Observe network traffic for {product-title} clusters by using eBPF technology to create and enrich network flows. You can xref:../observability/network_observability/metrics-alerts-dashboards.adoc#metrics-alerts-dashboards_metrics-alerts-dashboards[view dashboards, customize alerts], and xref:../observability/network_observability/observing-network-traffic.adoc#network-observability-trafficflow_nw-observe-network-traffic[analyze network flow] information for further insight and troubleshooting. -- **xref:../observability/monitoring/monitoring-overview.adoc#monitoring-overview[In-cluster monitoring]**: -Learn to xref:../observability/monitoring/configuring-the-monitoring-stack.adoc#configuring-the-monitoring-stack[configure the monitoring stack]. -After configuring monitoring, use the web console to access xref:../observability/monitoring/reviewing-monitoring-dashboards.adoc#reviewing-monitoring-dashboards[monitoring dashboards]. In addition to infrastructure metrics, you can also scrape and view metrics for your own services. +- **xref:../observability/monitoring/about-ocp-monitoring/about-ocp-monitoring.adoc#about-ocp-monitoring[In-cluster monitoring]**: +Learn to xref:../observability/monitoring/getting-started/core-platform-monitoring-first-steps.adoc#core-platform-monitoring-first-steps[configure the monitoring stack]. +After configuring monitoring, use the web console to access xref:../observability/monitoring/accessing-metrics/accessing-metrics-as-an-administrator.adoc#reviewing-monitoring-dashboards-admin_accessing-metrics-as-an-administrator[monitoring dashboards]. In addition to infrastructure metrics, you can also scrape and view metrics for your own services. - **xref:../support/remote_health_monitoring/about-remote-health-monitoring.adoc#about-remote-health-monitoring_about-remote-health-monitoring[Remote health monitoring]**: {product-title} collects anonymized aggregated information about your cluster. By using Telemetry and the Insights Operator, this data is received by Red Hat and used to improve {product-title}. You can view the xref:../support/remote_health_monitoring/showing-data-collected-by-remote-health-monitoring.adoc#showing-data-collected-by-remote-health-monitoring_showing-data-collected-by-remote-health-monitoring[data collected by remote health monitoring]. diff --git a/rosa_architecture/learn_more_about_openshift.adoc b/rosa_architecture/learn_more_about_openshift.adoc index 4c1c18f62b..c85124de58 100644 --- a/rosa_architecture/learn_more_about_openshift.adoc +++ b/rosa_architecture/learn_more_about_openshift.adoc @@ -52,7 +52,7 @@ Use the following sections to find content to help you learn about and use {prod | link:https://learn.openshift.com/?extIdCarryOver=true&sc_cid=701f2000001Css5AAC[OpenShift Interactive Learning Portal] | xref:../networking/understanding-networking.adoc#understanding-networking[Networking] -| xref:../observability/monitoring/monitoring-overview.adoc#monitoring-overview[Monitoring overview] +| xref:../observability/monitoring/about-ocp-monitoring/about-ocp-monitoring.adoc#about-ocp-monitoring[About {product-title} monitoring] | link:https://access.redhat.com/support/policy/updates/openshift#ocp4_phases[{product-title} Life Cycle] | @@ -96,7 +96,7 @@ Use the following sections to find content to help you learn about and use {prod | | -| xref:../observability/monitoring/monitoring-overview.adoc#monitoring-overview[Monitoring] +| xref:../observability/monitoring/about-ocp-monitoring/about-ocp-monitoring.adoc#about-ocp-monitoring[Monitoring] | | diff --git a/scalability_and_performance/telco_ref_design_specs/core/telco-core-ref-design-components.adoc b/scalability_and_performance/telco_ref_design_specs/core/telco-core-ref-design-components.adoc index 8108423725..3703208538 100644 --- a/scalability_and_performance/telco_ref_design_specs/core/telco-core-ref-design-components.adoc +++ b/scalability_and_performance/telco_ref_design_specs/core/telco-core-ref-design-components.adoc @@ -131,7 +131,7 @@ include::modules/telco-core-monitoring.adoc[leveloffset=+1] [role="_additional-resources"] .Additional resources -* xref:../../../observability/monitoring/monitoring-overview.adoc#about-openshift-monitoring[About {product-version} monitoring] +* xref:../../../observability/monitoring/about-ocp-monitoring/about-ocp-monitoring.adoc#about-ocp-monitoring[About {product-title} monitoring] include::modules/telco-core-scheduling.adoc[leveloffset=+1] diff --git a/security/cert_manager_operator/cert-manager-monitoring.adoc b/security/cert_manager_operator/cert-manager-monitoring.adoc index 753cbb66ba..f5779982f0 100644 --- a/security/cert_manager_operator/cert-manager-monitoring.adoc +++ b/security/cert_manager_operator/cert-manager-monitoring.adoc @@ -14,7 +14,7 @@ include::modules/cert-manager-enable-metrics.adoc[leveloffset=+1] [role="_additional-resources"] .Additional resources -* xref:../../observability/monitoring/managing-metrics.adoc#setting-up-metrics-collection-for-user-defined-projects_managing-metrics[Setting up metrics collection for user-defined projects] +* xref:../../observability/monitoring/configuring-user-workload-monitoring/configuring-metrics-uwm.adoc#setting-up-metrics-collection-for-user-defined-projects_configuring-metrics-uwm[Setting up metrics collection for user-defined projects] // Querying metrics for the {cert-manager-operator} include::modules/cert-manager-query-metrics.adoc[leveloffset=+1] \ No newline at end of file diff --git a/serverless/observability/admin-metrics/serverless-admin-metrics.adoc b/serverless/observability/admin-metrics/serverless-admin-metrics.adoc index d5b8bc8eb4..ccf48f34a8 100644 --- a/serverless/observability/admin-metrics/serverless-admin-metrics.adoc +++ b/serverless/observability/admin-metrics/serverless-admin-metrics.adoc @@ -20,7 +20,7 @@ endif::[] == Prerequisites ifdef::openshift-enterprise[] -* See the {product-title} documentation on xref:../../../observability/monitoring/managing-metrics.adoc#managing-metrics[Managing metrics] for information about enabling metrics for your cluster. +* See the {product-title} documentation on xref:../../../observability/monitoring/accessing-metrics/accessing-metrics-as-an-administrator.adoc#accessing-metrics-as-an-administrator[Accessing metrics as an administrator] for information about enabling metrics for your cluster. * You have access to an {product-title} account with cluster administrator access. endif::[] diff --git a/serverless/observability/developer-metrics/serverless-developer-metrics.adoc b/serverless/observability/developer-metrics/serverless-developer-metrics.adoc index 53998b954f..65f1657387 100644 --- a/serverless/observability/developer-metrics/serverless-developer-metrics.adoc +++ b/serverless/observability/developer-metrics/serverless-developer-metrics.adoc @@ -32,7 +32,7 @@ ifdef::openshift-enterprise[] [id="additional-resources_serverless-service-monitoring"] [role="_additional-resources"] == Additional resources -* xref:../../../observability/monitoring/monitoring-overview.adoc#monitoring-overview[Monitoring overview] -* xref:../../../observability/monitoring/managing-metrics.adoc#specifying-how-a-service-is-monitored[Enabling monitoring for user-defined projects] -* xref:../../../observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc#enabling-monitoring-for-user-defined-projects[Specifying how a service is monitored] +* xref:../../../observability/monitoring/about-ocp-monitoring/about-ocp-monitoring.adoc#about-ocp-monitoring[About {product-title} monitoring] +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-monitoring-for-user-defined-projects-uwm_preparing-to-configure-the-monitoring-stack-uwm[Enabling monitoring for user-defined projects] +* xref:../../../observability/monitoring/configuring-user-workload-monitoring/configuring-metrics-uwm.adoc#specifying-how-a-service-is-monitored_configuring-metrics-uwm[Specifying how a service is monitored] endif::[] diff --git a/service_mesh/v2x/ossm-observability.adoc b/service_mesh/v2x/ossm-observability.adoc index e49790ab87..241502ca7f 100644 --- a/service_mesh/v2x/ossm-observability.adoc +++ b/service_mesh/v2x/ossm-observability.adoc @@ -44,7 +44,7 @@ ifndef::openshift-rosa,openshift-rosa-hcp,openshift-dedicated[] [id="additional-resources_user-workload-monitoring"] == Additional resources -* xref:../../observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc[Enabling monitoring for user-defined projects] +* xref:../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-monitoring-for-user-defined-projects-uwm_preparing-to-configure-the-monitoring-stack-uwm[Enabling monitoring for user-defined projects] * xref:../../observability/distr_tracing/distr_tracing_tempo/distr-tracing-tempo-installing.adoc[Installing the distributed tracing platform (Tempo)] * xref:../../observability/otel/otel-installing.adoc[Installing the Red Hat build of OpenTelemetry] endif::openshift-rosa,openshift-rosa-hcp,openshift-dedicated[] diff --git a/storage/persistent_storage/persistent_storage_local/persistent-storage-local.adoc b/storage/persistent_storage/persistent_storage_local/persistent-storage-local.adoc index 5cc3cbf228..3104dd1f09 100644 --- a/storage/persistent_storage/persistent_storage_local/persistent-storage-local.adoc +++ b/storage/persistent_storage/persistent_storage_local/persistent-storage-local.adoc @@ -37,7 +37,7 @@ include::modules/persistent-storage-local-tolerations.adoc[leveloffset=+1] include::modules/persistent-storage-local-metrics.adoc[leveloffset=+1] -For more information about metrics, see xref:../../../observability/monitoring/managing-metrics.adoc#managing-metric[Managing metrics]. +For more information about metrics, see xref:../../../observability/monitoring/accessing-metrics/accessing-metrics-as-an-administrator.adoc#accessing-metrics-as-an-administrator[Accessing metrics as an administrator]. == Deleting the Local Storage Operator resources diff --git a/support/remote_health_monitoring/about-remote-health-monitoring.adoc b/support/remote_health_monitoring/about-remote-health-monitoring.adoc index 02c695ba95..291569bbe3 100644 --- a/support/remote_health_monitoring/about-remote-health-monitoring.adoc +++ b/support/remote_health_monitoring/about-remote-health-monitoring.adoc @@ -109,13 +109,15 @@ include::modules/understanding-telemetry-and-insights-operator-data-flow.adoc[le ifndef::openshift-rosa-hcp[] [role="_additional-resources"] .Additional resources +ifdef::openshift-rosa,openshift-dedicated[] +* See xref:../../observability/monitoring/monitoring-overview.adoc#monitoring-overview[Monitoring overview] +endif::openshift-rosa,openshift-dedicated[] -* See xref:../../observability/monitoring/monitoring-overview.adoc#monitoring-overview_monitoring-overview[Monitoring overview] for more information about the {product-title} monitoring stack. -endif::openshift-rosa-hcp[] - -ifndef::openshift-rosa,openshift-rosa-hcp,openshift-dedicated[] +ifndef::openshift-rosa,openshift-dedicated[] +* See xref:../../observability/monitoring/about-ocp-monitoring/about-ocp-monitoring.adoc#about-ocp-monitoring[About {product-title} monitoring] for more information about the {product-title} monitoring stack. * See xref:../../installing/install_config/configuring-firewall.adoc#configuring-firewall[Configuring your firewall] for details about configuring a firewall and enabling endpoints for Telemetry and Insights -endif::openshift-rosa,openshift-rosa-hcp,openshift-dedicated[] +endif::openshift-rosa,openshift-dedicated[] +endif::openshift-rosa-hcp[] [id="additional-details-about-how-remote-health-monitoring-data-is-used"] == Additional details about how remote health monitoring data is used diff --git a/support/troubleshooting/investigating-monitoring-issues.adoc b/support/troubleshooting/investigating-monitoring-issues.adoc index c3d2ca7393..a86277744c 100644 --- a/support/troubleshooting/investigating-monitoring-issues.adoc +++ b/support/troubleshooting/investigating-monitoring-issues.adoc @@ -22,10 +22,17 @@ include::modules/monitoring-investigating-why-user-defined-metrics-are-unavailab ifndef::openshift-rosa-hcp[] [role="_additional-resources"] .Additional resources - +ifdef::openshift-rosa,openshift-dedicated[] * xref:../../observability/monitoring/configuring-the-monitoring-stack.adoc#creating-user-defined-workload-monitoring-configmap_configuring-the-monitoring-stack[Creating a user-defined workload monitoring config map] * See xref:../../observability/monitoring/managing-metrics.adoc#specifying-how-a-service-is-monitored_managing-metrics[Specifying how a service is monitored] for details on how to create a service monitor or pod monitor * See xref:../../observability/monitoring/managing-metrics.adoc#getting-detailed-information-about-a-target_managing-metrics[Getting detailed information about a metrics target] +endif::openshift-rosa,openshift-dedicated[] + +ifndef::openshift-rosa,openshift-dedicated[] +* xref:../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-monitoring-for-user-defined-projects-uwm_preparing-to-configure-the-monitoring-stack-uwm[Enabling monitoring for user-defined projects] +* See xref:../../observability/monitoring/configuring-user-workload-monitoring/configuring-metrics-uwm.adoc#specifying-how-a-service-is-monitored_configuring-metrics-uwm[Specifying how a service is monitored] for details on how to create a service monitor or pod monitor +* See xref:../../observability/monitoring/accessing-metrics/accessing-metrics-as-an-administrator.adoc#getting-detailed-information-about-a-target_accessing-metrics-as-an-administrator[Getting detailed information about a metrics target] +endif::openshift-rosa,openshift-dedicated[] endif::openshift-rosa-hcp[] // Determining why Prometheus is consuming a lot of disk space @@ -35,8 +42,12 @@ include::modules/monitoring-determining-why-prometheus-is-consuming-disk-space.a ifndef::openshift-rosa-hcp[] [role="_additional-resources"] .Additional resources - +ifdef::openshift-rosa,openshift-dedicated[] * xref:../../observability/monitoring/configuring-the-monitoring-stack.adoc#setting-scrape-and-evaluation-intervals-limits-for-user-defined-projects_configuring-the-monitoring-stack[Setting scrape and evaluation intervals and enforced limits for user-defined projects] +endif::openshift-rosa,openshift-dedicated[] +ifndef::openshift-rosa,openshift-dedicated[] +* xref:../../observability/monitoring/configuring-user-workload-monitoring/configuring-performance-and-scalability-uwm.adoc#setting-scrape-and-evaluation-intervals-limits-for-user-defined-projects_configuring-performance-and-scalability-uwm[Setting scrape intervals, evaluation intervals, and enforced limits for user-defined projects] +endif::openshift-rosa,openshift-dedicated[] endif::openshift-rosa-hcp[] // Resolving the KubePersistentVolumeFillingUp alert firing for Prometheus diff --git a/virt/monitoring/virt-exposing-custom-metrics-for-vms.adoc b/virt/monitoring/virt-exposing-custom-metrics-for-vms.adoc index 32fd3aab3a..601f2e9c6d 100644 --- a/virt/monitoring/virt-exposing-custom-metrics-for-vms.adoc +++ b/virt/monitoring/virt-exposing-custom-metrics-for-vms.adoc @@ -22,13 +22,13 @@ include::modules/virt-accessing-node-exporter-outside-cluster.adoc[leveloffset=+ == Additional resources // Hiding in ROSA/OSD as not supported ifndef::openshift-rosa,openshift-dedicated[] -* xref:../../observability/monitoring/configuring-the-monitoring-stack.adoc#configuring-the-monitoring-stack[Configuring the monitoring stack] +* xref:../../observability/monitoring/getting-started/core-platform-monitoring-first-steps.adoc#core-platform-monitoring-first-steps[Core platform monitoring first steps] -* xref:../../observability/monitoring/enabling-monitoring-for-user-defined-projects.adoc#enabling-monitoring-for-user-defined-projects[Enabling monitoring for user-defined projects] +* xref:../../observability/monitoring/configuring-user-workload-monitoring/preparing-to-configure-the-monitoring-stack-uwm.adoc#enabling-monitoring-for-user-defined-projects-uwm_preparing-to-configure-the-monitoring-stack-uwm[Enabling monitoring for user-defined projects] -* xref:../../observability/monitoring/managing-metrics.adoc#managing-metrics[Managing metrics] +* xref:../../observability/monitoring/accessing-metrics/accessing-metrics-as-a-developer.adoc#accessing-metrics-as-a-developer[Accessing metrics as a developer] -* xref:../../observability/monitoring/reviewing-monitoring-dashboards.adoc#reviewing-monitoring-dashboards[Reviewing monitoring dashboards] +* xref:../../observability/monitoring/accessing-metrics/accessing-metrics-as-a-developer.adoc#reviewing-monitoring-dashboards-developer_accessing-metrics-as-a-developer[Reviewing monitoring dashboards as a developer] * xref:../../applications/application-health.adoc#application-health[Monitoring application health by using health checks] endif::openshift-rosa,openshift-dedicated[] diff --git a/virt/monitoring/virt-monitoring-overview.adoc b/virt/monitoring/virt-monitoring-overview.adoc index 73d927904f..0089e188da 100644 --- a/virt/monitoring/virt-monitoring-overview.adoc +++ b/virt/monitoring/virt-monitoring-overview.adoc @@ -33,7 +33,12 @@ xref:../../virt/monitoring/virt-monitoring-vm-health.adoc#virt-monitoring-vm-hea Configure readiness, liveness, and guest agent ping probes and a watchdog for VMs. xref:../../virt/monitoring/virt-runbooks.adoc#virt-runbooks[Runbooks]:: +ifdef::openshift-dedicated,openshift-rosa[] Diagnose and resolve issues that trigger {VirtProductName} xref:../../observability/monitoring/managing-alerts.adoc#managing-alerts[alerts] in the {product-title} web console. +endif::openshift-dedicated,openshift-rosa[] +ifndef::openshift-dedicated,openshift-rosa[] +Diagnose and resolve issues that trigger {VirtProductName} xref:../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#about-managing-alerts_key-concepts[alerts] in the {product-title} web console. +endif::openshift-dedicated,openshift-rosa[] //:FeatureName: The guest agent ping probe //include::snippets/technology-preview.adoc[] diff --git a/virt/monitoring/virt-prometheus-queries.adoc b/virt/monitoring/virt-prometheus-queries.adoc index 0c57a52527..37f42ed895 100644 --- a/virt/monitoring/virt-prometheus-queries.adoc +++ b/virt/monitoring/virt-prometheus-queries.adoc @@ -25,11 +25,9 @@ endif::openshift-rosa,openshift-dedicated[] * For guest memory swapping queries to return data, memory swapping must be enabled on the virtual guests. -include::modules/monitoring-about-querying-metrics.adoc[leveloffset=+1] +include::modules/monitoring-querying-metrics-for-all-projects-with-mon-dashboard.adoc[leveloffset=+1] -include::modules/monitoring-querying-metrics-for-all-projects-as-an-administrator.adoc[leveloffset=+2] - -include::modules/monitoring-querying-metrics-for-user-defined-projects-as-a-developer.adoc[leveloffset=+2] +include::modules/monitoring-querying-metrics-for-user-defined-projects-with-mon-dashboard.adoc[leveloffset=+1] include::modules/virt-querying-metrics.adoc[leveloffset=+1] @@ -38,8 +36,12 @@ include::modules/virt-live-migration-metrics.adoc[leveloffset=+2] [id="additional-resources_virt-prometheus-queries"] [role="_additional-resources"] == Additional resources - +ifdef::openshift-dedicated,openshift-rosa[] * xref:../../observability/monitoring/monitoring-overview.adoc#monitoring-overview[Monitoring overview] +endif::openshift-dedicated,openshift-rosa[] +ifndef::openshift-dedicated,openshift-rosa[] +* xref:../../observability/monitoring/about-ocp-monitoring/about-ocp-monitoring.adoc#about-ocp-monitoring[About {product-title} monitoring] +endif::openshift-dedicated,openshift-rosa[] * link:https://prometheus.io/docs/prometheus/latest/querying/basics/[Querying Prometheus] diff --git a/virt/monitoring/virt-runbooks.adoc b/virt/monitoring/virt-runbooks.adoc index d4bf1bced9..b3fe529ed7 100644 --- a/virt/monitoring/virt-runbooks.adoc +++ b/virt/monitoring/virt-runbooks.adoc @@ -7,7 +7,14 @@ include::_attributes/common-attributes.adoc[] toc::[] :!virt-runbooks: -To diagnose and resolve issues that trigger {VirtProductName} xref:../../observability/monitoring/managing-alerts.adoc#managing-alerts[alerts], follow the procedures in the runbooks for the {VirtProductName} Operator. Triggered {VirtProductName} alerts can be viewed in the main *Observe* -> *Alerts* tab in the web console, and also in the *Virtualization* -> *Overview* tab. +To diagnose and resolve issues that trigger {VirtProductName} +ifdef::openshift-dedicated,openshift-rosa[] +xref:../../observability/monitoring/managing-alerts.adoc#managing-alerts[alerts], +endif::openshift-dedicated,openshift-rosa[] +ifndef::openshift-dedicated,openshift-rosa[] +xref:../../observability/monitoring/about-ocp-monitoring/key-concepts.adoc#about-managing-alerts_key-concepts[alerts], +endif::openshift-dedicated,openshift-rosa[] +follow the procedures in the runbooks for the {VirtProductName} Operator. Triggered {VirtProductName} alerts can be viewed in the main *Observe* -> *Alerts* tab in the web console, and also in the *Virtualization* -> *Overview* tab. Runbooks for the {VirtProductName} Operator are maintained in the link:https://github.com/openshift/runbooks/tree/master/alerts/openshift-virtualization-operator[openshift/runbooks] Git repository, and you can view them on GitHub. diff --git a/virt/release_notes/virt-4-18-release-notes.adoc b/virt/release_notes/virt-4-18-release-notes.adoc index ee6d759487..3f1066f9c7 100644 --- a/virt/release_notes/virt-4-18-release-notes.adoc +++ b/virt/release_notes/virt-4-18-release-notes.adoc @@ -109,7 +109,7 @@ Deprecated features are included in the current release and supported. However, * Support for Windows Server 2012 R2 templates is deprecated. //CNV-34681: Deprecated alerts -* The alerts `KubeVirtComponentExceedsRequestedMemory` and `KubeVirtComponentExceedsRequestedCPU` are deprecated. You can safely xref:../../observability/monitoring/managing-alerts.adoc#silencing-alerts_managing-alerts[silence] them. +* The alerts `KubeVirtComponentExceedsRequestedMemory` and `KubeVirtComponentExceedsRequestedCPU` are deprecated. You can safely xref:../../observability/monitoring/managing-alerts/managing-alerts-as-an-administrator.adoc#silencing-alerts-adm_managing-alerts-as-an-administrator[silence] them. [id="virt-4-18-removed"] === Removed features diff --git a/virt/support/virt-collecting-virt-data.adoc b/virt/support/virt-collecting-virt-data.adoc index 89fcaf8bd1..2aeedc693c 100644 --- a/virt/support/virt-collecting-virt-data.adoc +++ b/virt/support/virt-collecting-virt-data.adoc @@ -19,8 +19,12 @@ Prometheus is a time-series database and a rule evaluation engine for metrics. P Alertmanager:: The Alertmanager service handles alerts received from Prometheus. The Alertmanager is also responsible for sending the alerts to external notification systems. - +ifdef::openshift-dedicated,openshift-rosa[] For information about the {product-title} monitoring stack, see xref:../../observability/monitoring/monitoring-overview.adoc#about-openshift-monitoring[About {product-title} monitoring]. +endif::openshift-dedicated,openshift-rosa[] +ifndef::openshift-dedicated,openshift-rosa[] +For information about the {product-title} monitoring stack, see xref:../../observability/monitoring/about-ocp-monitoring/about-ocp-monitoring.adoc#about-ocp-monitoring[About {product-title} monitoring]. +endif::openshift-dedicated,openshift-rosa[] // This procedure is in the assembly so that we can add xrefs instead of a long list of additional resources. [id="virt-collecting-data-about-your-environment_{context}"] @@ -29,9 +33,14 @@ For information about the {product-title} monitoring stack, see xref:../../obser Collecting data about your environment minimizes the time required to analyze and determine the root cause. .Prerequisites - +ifdef::openshift-dedicated,openshift-rosa[] * xref:../../observability/monitoring/configuring-the-monitoring-stack.adoc#modifying-retention-time-for-prometheus-metrics-data_configuring-the-monitoring-stack[Set the retention time for Prometheus metrics data] to a minimum of seven days. * xref:../../observability/monitoring/managing-alerts.adoc#sending-notifications-to-external-systems_managing-alerts[Configure the Alertmanager to capture relevant alerts and to send alert notifications to a dedicated mailbox] so that they can be viewed and persisted outside the cluster. +endif::openshift-dedicated,openshift-rosa[] +ifndef::openshift-dedicated,openshift-rosa[] +* xref:../../observability/monitoring/configuring-core-platform-monitoring/storing-and-recording-data.adoc#modifying-retention-time-for-prometheus-metrics-data_storing-and-recording-data[Set the retention time for Prometheus metrics data] to a minimum of seven days. +* xref:../../observability/monitoring/configuring-core-platform-monitoring/configuring-alerts-and-notifications.adoc#configuring-alert-notifications_configuring-alerts-and-notifications[Configure the Alertmanager to capture relevant alerts and to send alert notifications to a dedicated mailbox] so that they can be viewed and persisted outside the cluster. +endif::openshift-dedicated,openshift-rosa[] * Record the exact number of affected nodes and virtual machines. .Procedure @@ -41,10 +50,10 @@ ifndef::openshift-rosa,openshift-dedicated[] . xref:../../support/gathering-cluster-data.adoc#support_gathering_data_gathering-cluster-data[Collect must-gather data for the cluster]. . link:https://access.redhat.com/documentation/en-us/red_hat_openshift_data_foundation/4.18/html-single/troubleshooting_openshift_data_foundation/index#downloading-log-files-and-diagnostic-information_rhodf[Collect must-gather data for {rh-storage-first}], if necessary. . xref:../../virt/support/virt-collecting-virt-data.adoc#virt-using-virt-must-gather_virt-collecting-virt-data[Collect must-gather data for {VirtProductName}]. -. xref:../../observability/monitoring/managing-metrics.adoc#querying-metrics-for-all-projects-as-an-administrator_managing-metrics[Collect Prometheus metrics for the cluster]. +. xref:../../observability/monitoring/accessing-metrics/accessing-metrics-as-an-administrator.adoc#querying-metrics-for-all-projects-with-mon-dashboard_accessing-metrics-as-an-administrator[Collect Prometheus metrics for the cluster]. endif::openshift-rosa,openshift-dedicated[] ifdef::openshift-rosa,openshift-dedicated[] -* xref:../../observability/monitoring/managing-metrics.adoc#querying-metrics-for-all-projects-as-an-administrator_managing-metrics[Collect Prometheus metrics for the cluster]. +* xref:../../observability/monitoring/managing-metrics.adoc#querying-metrics-for-all-projects-with-mon-dashboard_managing-metrics[Collect Prometheus metrics for the cluster]. endif::openshift-rosa,openshift-dedicated[] [id="virt-collecting-data-about-vms_{context}"] diff --git a/welcome/learn_more_about_openshift.adoc b/welcome/learn_more_about_openshift.adoc index cf68313243..efc7f87732 100644 --- a/welcome/learn_more_about_openshift.adoc +++ b/welcome/learn_more_about_openshift.adoc @@ -212,9 +212,8 @@ a|* xref:../operators/understanding/crds/crd-extending-api-with-crds.adoc#crd-cr a|* xref:../observability/network_observability/metrics-alerts-dashboards.adoc#metrics-alerts-dashboards_metrics-alerts-dashboards[Using metrics with dashboards and alerts] * xref:../observability/network_observability/observing-network-traffic.adoc#network-observability-trafficflow_nw-observe-network-traffic[Obsserving the network traffic from the Traffic flows view] -| xref:../observability/monitoring/monitoring-overview.adoc#monitoring-overview[Monitoring overview] -a|* xref:../observability/monitoring/monitoring-overview.adoc#monitoring-overview[In-cluster monitoring] -* xref:../support/remote_health_monitoring/about-remote-health-monitoring.adoc#about-remote-health-monitoring_about-remote-health-monitoring[Remote health monitoring] +| xref:../observability/monitoring/about-ocp-monitoring/about-ocp-monitoring.adoc#about-ocp-monitoring[About {product-title} monitoring] +a|* xref:../support/remote_health_monitoring/about-remote-health-monitoring.adoc#about-remote-health-monitoring_about-remote-health-monitoring[Remote health monitoring] * xref:../observability/power_monitoring/power-monitoring-overview.adoc#power-monitoring-overview[{PM-title-c} (Technology Preview)] |=== @@ -258,7 +257,7 @@ a|* xref:../observability/monitoring/monitoring-overview.adoc#monitoring-overvie | | -| xref:../observability/monitoring/monitoring-overview.adoc#monitoring-overview[Monitoring] +| xref:../observability/monitoring/about-ocp-monitoring/about-ocp-monitoring.adoc#about-ocp-monitoring[Monitoring] | |===