From a86b57d572a1782cc59ffe8a070fe3835206687f Mon Sep 17 00:00:00 2001 From: aireilly Date: Tue, 30 May 2023 17:25:24 +0100 Subject: [PATCH] Adding updates for telcodocs-1321 @Missxiaoguo review comments Implementing Ian's comments Ian's latest comments 11Aug + metadata fixes Latest comments from Ian Updates for David J @tliu2021 changes - remove spec.clusters.nodes.biosConfigRef doc David's NUMA zone suggestion removing 'cpuset: 0-1,20-21' from the example SiteConfig --- .vale/styles/Vocab/OpenShiftDocs/accept.txt | 5 +- modules/ztp-deploying-a-site.adoc | 16 ++- modules/ztp-du-cluster-config-reference.adoc | 92 ------------- modules/ztp-du-firmware-config-reference.adoc | 4 +- ...tp-enabling-workload-partitioning-sno.adoc | 21 ++- ...ating-install-and-config-crs-manually.adoc | 21 +-- modules/ztp-installation-crs.adoc | 2 +- .../ztp-precaching-booting-from-live-os.adoc | 4 +- modules/ztp-precaching-getting-tool.adoc | 4 +- modules/ztp-precaching-troubleshooting.adoc | 6 +- ...ztp-preparing-the-hub-cluster-for-ztp.adoc | 8 ++ ...ztp-recommended-cluster-kernel-config.adoc | 10 +- modules/ztp-recommended-cluster-mc-crs.adoc | 47 +++++-- ...sno-du-accelerating-container-startup.adoc | 6 +- ...du-configuring-crun-container-runtime.adoc | 16 ++- ...guring-logging-locally-and-forwarding.adoc | 16 ++- modules/ztp-sno-du-configuring-lvms.adoc | 16 ++- ...sno-du-configuring-performance-addons.adoc | 10 +- modules/ztp-sno-du-configuring-ptp.adoc | 6 +- modules/ztp-sno-du-configuring-sriov.adoc | 79 ++++++++++- ...-configuring-the-container-mountspace.adoc | 9 +- .../ztp-sno-du-configuring-the-operators.adoc | 35 ++++- modules/ztp-sno-du-disabling-crio-wipe.adoc | 23 ++++ ...-sno-du-disabling-network-diagnostics.adoc | 6 +- modules/ztp-sno-du-enabling-kdump.adoc | 14 +- modules/ztp-sno-du-enabling-sctp.adoc | 6 +- ...sno-du-enabling-workload-partitioning.adoc | 47 +++---- ...esource-usage-with-cluster-monitoring.adoc | 6 +- ...-sno-du-removing-the-console-operator.adoc | 12 +- ...ors-needed-for-platform-configuration.adoc | 35 ++++- ...p-sno-du-tuning-the-performance-patch.adoc | 20 ++- .../ztp-sno-siteconfig-config-reference.adoc | 89 +++++++++++++ .../enabling-workload-partitioning.adoc | 27 ++-- .../ztp-deploying-far-edge-sites.adoc | 2 + ...ference-cluster-configuration-for-vdu.adoc | 4 +- ...ormance-profile-workload-partitioning.adoc | 39 ++++++ .../ztp-02-master-workload-partitioning.adoc | 31 ----- ...-accelerated-container-startup-master.adoc | 88 ------------ snippets/ztp-cluster-logging.adoc | 48 ------- snippets/ztp-disable-console.adoc | 18 --- snippets/ztp-disable-ntp.adoc | 36 ----- snippets/ztp-operator-groups-namespace.adoc | 70 ---------- snippets/ztp-operator-subs.adoc | 55 -------- snippets/ztp-performance-profile.adoc | 43 ------ snippets/ztp-ptp-config.adoc | 123 ----------------- snippets/ztp-sriov-du-config.adoc | 78 ----------- ...ner-mount-ns-and-kubelet-conf-master.yaml} | 8 +- ...=> ztp_03-sctp-machine-config-master.yaml} | 17 +-- ...-accelerated-container-startup-master.yaml | 85 ++++++++++++ snippets/ztp_05-kdump-config-master.yaml | 32 +++++ ...e-master.adoc => ztp_06-kdump-master.yaml} | 7 +- snippets/ztp_99-crio-disable-wipe-master.yaml | 18 +++ snippets/ztp_99-crio-disable-wipe-worker.yaml | 18 +++ snippets/ztp_ClusterLogForwarder.yaml | 24 ++++ snippets/ztp_ClusterLogNS.yaml | 7 + snippets/ztp_ClusterLogOperGroup.yaml | 9 ++ snippets/ztp_ClusterLogSubscription.yaml | 13 ++ snippets/ztp_ClusterLogging.yaml | 15 +++ ...cs.adoc => ztp_DisableSnoNetworkDiag.yaml} | 5 - snippets/ztp_PerformanceProfile.yaml | 30 +++++ snippets/ztp_PtpConfigSlave.yaml | 122 +++++++++++++++++ snippets/ztp_PtpSubscription.yaml | 14 ++ snippets/ztp_PtpSubscriptionNS.yaml | 9 ++ snippets/ztp_PtpSubscriptionOperGroup.yaml | 8 ++ ...doc => ztp_ReduceMonitoringFootprint.yaml} | 7 +- snippets/ztp_SriovNetwork.yaml | 17 +++ snippets/ztp_SriovNetworkNodePolicy.yaml | 17 +++ snippets/ztp_SriovOperatorConfig.yaml | 10 ++ snippets/ztp_SriovSubscription.yaml | 13 ++ snippets/ztp_SriovSubscriptionNS.yaml | 6 + snippets/ztp_SriovSubscriptionOperGroup.yaml | 8 ++ ...e-lvms.adoc => ztp_StorageLVMCluster.yaml} | 8 +- snippets/ztp_StorageNS.yaml | 6 + snippets/ztp_StorageOperGroup.yaml | 8 ++ snippets/ztp_StorageSubscription.yaml | 13 ++ ...ch.adoc => ztp_TunedPerformancePatch.yaml} | 14 +- ...aster.adoc => ztp_enable-crun-master.yaml} | 5 - ...orker.adoc => ztp_enable-crun-worker.yaml} | 5 - snippets/ztp_example-sno.yaml | 126 ++++++++++++++++++ 79 files changed, 1198 insertions(+), 859 deletions(-) delete mode 100644 modules/ztp-du-cluster-config-reference.adoc create mode 100644 modules/ztp-sno-du-disabling-crio-wipe.adoc create mode 100644 modules/ztp-sno-siteconfig-config-reference.adoc create mode 100644 snippets/performance-profile-workload-partitioning.adoc delete mode 100644 snippets/ztp-02-master-workload-partitioning.adoc delete mode 100644 snippets/ztp-04-accelerated-container-startup-master.adoc delete mode 100644 snippets/ztp-cluster-logging.adoc delete mode 100644 snippets/ztp-disable-console.adoc delete mode 100644 snippets/ztp-disable-ntp.adoc delete mode 100644 snippets/ztp-operator-groups-namespace.adoc delete mode 100644 snippets/ztp-operator-subs.adoc delete mode 100644 snippets/ztp-performance-profile.adoc delete mode 100644 snippets/ztp-ptp-config.adoc delete mode 100644 snippets/ztp-sriov-du-config.adoc rename snippets/{ztp-container-mount-namespace-and-kubelet-conf-master.adoc => ztp_01-container-mount-ns-and-kubelet-conf-master.yaml} (97%) rename snippets/{ztp-load-sctp-module.adoc => ztp_03-sctp-machine-config-master.yaml} (61%) create mode 100644 snippets/ztp_04-accelerated-container-startup-master.yaml create mode 100644 snippets/ztp_05-kdump-config-master.yaml rename snippets/{ztp-06-kdump-enable-master.adoc => ztp_06-kdump-master.yaml} (80%) create mode 100644 snippets/ztp_99-crio-disable-wipe-master.yaml create mode 100644 snippets/ztp_99-crio-disable-wipe-worker.yaml create mode 100644 snippets/ztp_ClusterLogForwarder.yaml create mode 100644 snippets/ztp_ClusterLogNS.yaml create mode 100644 snippets/ztp_ClusterLogOperGroup.yaml create mode 100644 snippets/ztp_ClusterLogSubscription.yaml create mode 100644 snippets/ztp_ClusterLogging.yaml rename snippets/{ztp-network-diagnostics.adoc => ztp_DisableSnoNetworkDiag.yaml} (55%) create mode 100644 snippets/ztp_PerformanceProfile.yaml create mode 100644 snippets/ztp_PtpConfigSlave.yaml create mode 100644 snippets/ztp_PtpSubscription.yaml create mode 100644 snippets/ztp_PtpSubscriptionNS.yaml create mode 100644 snippets/ztp_PtpSubscriptionOperGroup.yaml rename snippets/{ztp-cluster-monitoring.adoc => ztp_ReduceMonitoringFootprint.yaml} (72%) create mode 100644 snippets/ztp_SriovNetwork.yaml create mode 100644 snippets/ztp_SriovNetworkNodePolicy.yaml create mode 100644 snippets/ztp_SriovOperatorConfig.yaml create mode 100644 snippets/ztp_SriovSubscription.yaml create mode 100644 snippets/ztp_SriovSubscriptionNS.yaml create mode 100644 snippets/ztp_SriovSubscriptionOperGroup.yaml rename snippets/{ztp-storage-lvms.adoc => ztp_StorageLVMCluster.yaml} (58%) create mode 100644 snippets/ztp_StorageNS.yaml create mode 100644 snippets/ztp_StorageOperGroup.yaml create mode 100644 snippets/ztp_StorageSubscription.yaml rename snippets/{ztp-performance-patch.adoc => ztp_TunedPerformancePatch.yaml} (70%) rename snippets/{ztp-07-ztp-sno-du-configuring-crun-container-runtime-master.adoc => ztp_enable-crun-master.yaml} (71%) rename snippets/{ztp-08-ztp-sno-du-configuring-crun-container-runtime-worker.adoc => ztp_enable-crun-worker.yaml} (72%) create mode 100644 snippets/ztp_example-sno.yaml diff --git a/.vale/styles/Vocab/OpenShiftDocs/accept.txt b/.vale/styles/Vocab/OpenShiftDocs/accept.txt index d79ca1ba70..3cbc8a7d5c 100644 --- a/.vale/styles/Vocab/OpenShiftDocs/accept.txt +++ b/.vale/styles/Vocab/OpenShiftDocs/accept.txt @@ -8,4 +8,7 @@ custom resource custom resources MetalLB Operator -Operators \ No newline at end of file +Operators +[Ff]ronthaul +[Mm]idhaul +[Rr]ealtime diff --git a/modules/ztp-deploying-a-site.adoc b/modules/ztp-deploying-a-site.adoc index 307464ef25..19757ba327 100644 --- a/modules/ztp-deploying-a-site.adoc +++ b/modules/ztp-deploying-a-site.adoc @@ -51,8 +51,7 @@ $ oc create namespace $CLUSTERNS + [NOTE] ==== -The secrets are referenced from the `SiteConfig` custom resource (CR) by name. The namespace -must match the `SiteConfig` namespace. +The secrets are referenced from the `SiteConfig` custom resource (CR) by name. The namespace must match the `SiteConfig` namespace. ==== . Create a `SiteConfig` CR for your cluster in your local clone of the Git repository: @@ -66,7 +65,16 @@ The folder includes example files for single node, three-node, and standard clus .. Change the cluster and host details in the example file to match the type of cluster you want. For example: + -include::snippets/ztp-example-siteconfig.adoc[] +.Example {sno} SiteConfig CR +[source,yaml] +---- +include::snippets/ztp_example-sno.yaml[] +---- ++ +[NOTE] +==== +For more information about BMC addressing, see the "Additional resources" section. +==== .. You can inspect the default set of extra-manifest `MachineConfig` CRs in `out/argocd/extra-manifest`. It is automatically applied to the cluster when it is installed. @@ -77,7 +85,7 @@ include::snippets/ztp-example-siteconfig.adoc[] ==== For optimal cluster performance, enable crun for master and worker nodes in {sno}, {sno} with additional worker nodes, {3no}, and standard clusters. -Enable crun in a `ContainerRuntimeConfig` CR as an additional day-0 install-time manifest to avoid the cluster having to reboot. +Enable crun in a `ContainerRuntimeConfig` CR as an additional Day 0 install-time manifest to avoid the cluster having to reboot. The `enable-crun-master.yaml` and `enable-crun-worker.yaml` CR files are in the `out/source-crs/optional-extra-manifest/` folder that you can extract from the `ztp-site-generate` container. For more information, see "Customizing extra installation manifests in the {ztp} pipeline". diff --git a/modules/ztp-du-cluster-config-reference.adoc b/modules/ztp-du-cluster-config-reference.adoc deleted file mode 100644 index c42fa55d48..0000000000 --- a/modules/ztp-du-cluster-config-reference.adoc +++ /dev/null @@ -1,92 +0,0 @@ -// Module included in the following assemblies: -// -// * scalability_and_performance/ztp_far_edge/ztp-vdu-validating-cluster-tuning.adoc - -:_module-type: REFERENCE -[id="ztp-du-cluster-config-reference_{context}"] -= Recommended cluster configurations to run vDU applications - -Clusters running virtualized distributed unit (vDU) applications require a highly tuned and optimized configuration. The following information describes the various elements that you require to support vDU workloads in {product-title} {product-version} clusters. - -== Recommended cluster MachineConfig CRs - -The following `MachineConfig` CRs configure the cluster host: - -.Recommended MachineConfig CRs -[cols=2*, width="80%", options="header"] -|==== -|CR filename -|Description - -|`02-workload-partitioning.yaml` -|Configures workload partitioning for the cluster. Apply this `MachineConfig` CR when you install the cluster. - -|`MachineConfigSctp.yaml` -|Loads the SCTP kernel module. This `MachineConfig` CR is optional and can be omitted if you do not require this kernel module. - -|`MachineConfigContainerMountNS.yaml` -|Configures the container mount namespace and kubelet conf. - -|`MachineConfigAcceleratedStartup.yaml` -|Configures accelerated startup for the cluster. - -|`06-kdump-master.yaml`, `06-kdump-worker.yaml` -|Configures `kdump` for the cluster. -|==== - -== Recommended cluster Operators - -The following Operators are required for clusters running vDU applications and are a part of the baseline reference configuration: - -* Node Tuning Operator (NTO). NTO packages functionality that was previously delivered with the Performance Addon Operator, which is now a part of NTO. - -* PTP Operator - -* SR-IOV Network Operator - -* Red Hat OpenShift Logging Operator - -* Local Storage Operator - -== Recommended cluster kernel configuration - -Always use the latest supported realtime kernel version in your cluster. You should also ensure that the following configurations are applied in the cluster: - -. Ensure the following `additionalKernelArgs` are set in the cluster performance profile: -+ -[source,yaml] ----- -spec: - additionalKernelArgs: - - "idle=poll" - - "rcupdate.rcu_normal_after_boot=0" - - "efi=runtime" ----- - -. Ensure that the `performance-patch` profile in the `Tuned` CR configures the correct CPU isolation set that matches the `isolated` CPU set in the related `PerformanceProfile` CR, for example: -+ -[source,yaml] ----- -spec: - profile: - - name: performance-patch - # The 'include' line must match the associated PerformanceProfile name - # And the cmdline_crash CPU set must match the 'isolated' set in the associated PerformanceProfile - data: | - [main] - summary=Configuration changes profile inherited from performance created tuned - include=openshift-node-performance-openshift-node-performance-profile - [bootloader] - cmdline_crash=nohz_full=2-51,54-103 <1> - [sysctl] - kernel.timer_migration=1 - [scheduler] - group.ice-ptp=0:f:10:*:ice-ptp.* - [service] - service.stalld=start,enable - service.chronyd=stop,disable ----- -<1> Listed CPUs depend on the host hardware configuration, specifically the number of available CPUs in the system and the CPU topology. - - - diff --git a/modules/ztp-du-firmware-config-reference.adoc b/modules/ztp-du-firmware-config-reference.adoc index 12515f7ea7..c116580b0a 100644 --- a/modules/ztp-du-firmware-config-reference.adoc +++ b/modules/ztp-du-firmware-config-reference.adoc @@ -2,7 +2,7 @@ // // * scalability_and_performance/ztp_far_edge/ztp-vdu-validating-cluster-tuning.adoc -:_module-type: REFERENCE +:_content-type: REFERENCE [id="ztp-du-firmware-config-reference_{context}"] = Recommended firmware configuration for vDU cluster hosts @@ -93,4 +93,4 @@ Enable global SR-IOV and VT-d settings in the firmware for the host. These setti [NOTE] ==== Enable both `C-states` and OS-controlled `P-States` to allow per pod power management. -==== \ No newline at end of file +==== diff --git a/modules/ztp-enabling-workload-partitioning-sno.adoc b/modules/ztp-enabling-workload-partitioning-sno.adoc index e3dfcbd26f..8e60a8477c 100644 --- a/modules/ztp-enabling-workload-partitioning-sno.adoc +++ b/modules/ztp-enabling-workload-partitioning-sno.adoc @@ -8,18 +8,27 @@ Workload partitioning configures {product-title} services, cluster management workloads, and infrastructure pods to run on a reserved number of host CPUs. -To configure workload partitioning with {ztp-first}, you specify cluster management CPU resources with the `cpuset` field of the `SiteConfig` custom resource (CR) and the `reserved` field of the group `PolicyGenTemplate` CR. -The {ztp} pipeline uses these values to populate the required fields in the workload partitioning `MachineConfig` CR (`cpuset`) and the `PerformanceProfile` CR (`reserved`) that configure the {sno} cluster. +To configure workload partitioning with {ztp-first}, you configure a `cpuPartitioningMode` field in the `SiteConfig` custom resource (CR) that you use to install the cluster and you apply a `PerformanceProfile` CR that configures the `isolated` and `reserved` CPUs on the host. + +Configuring the `SiteConfig` CR enables workload partitioning at cluster installation time and applying the `PerformanceProfile` CR configures the specific allocation of CPUs to reserved and isolated sets. +Both of these steps happen at different points during cluster provisioning. [NOTE] ==== -For maximum performance, ensure that the `reserved` and `isolated` CPU sets do not share CPU cores across NUMA zones. +Configuring workload partitioning by using the `cpuPartitioningMode` field in the `SiteConfig` CR is a Tech Preview feature in {product-title} 4.13. + +Alternatively, you can specify cluster management CPU resources with the `cpuset` field of the `SiteConfig` custom resource (CR) and the `reserved` field of the group `PolicyGenTemplate` CR. +The {ztp} pipeline uses these values to populate the required fields in the workload partitioning `MachineConfig` CR (`cpuset`) and the `PerformanceProfile` CR (`reserved`) that configure the {sno} cluster. +This method is a General Availability feature in {product-title} 4.14. ==== -* The workload partitioning `MachineConfig` CR pins the {product-title} infrastructure pods to a defined `cpuset` configuration. -* The `PerformanceProfile` CR pins the systemd services to the reserved CPUs. +The workload partitioning configuration pins the {product-title} infrastructure pods to the `reserved` CPU set. +Platform services such as systemd, CRI-O, and kubelet run on the `reserved` CPU set. +The `isolated` CPU sets are exclusively allocated to your container workloads. +Isolating CPUs ensures that the workload has guaranteed access to the specified CPUs without contention from other applications running on the same node. +All CPUs that are not isolated should be reserved. [IMPORTANT] ==== -The value for the `reserved` field specified in the `PerformanceProfile` CR must match the `cpuset` field in the workload partitioning `MachineConfig` CR. +Ensure that `reserved` and `isolated` CPU sets do not overlap with each other. ==== diff --git a/modules/ztp-generating-install-and-config-crs-manually.adoc b/modules/ztp-generating-install-and-config-crs-manually.adoc index 2dab66f0d4..848371f524 100644 --- a/modules/ztp-generating-install-and-config-crs-manually.adoc +++ b/modules/ztp-generating-install-and-config-crs-manually.adoc @@ -60,13 +60,16 @@ $ mkdir -p ./site-install . Modify the example `SiteConfig` CR for the cluster type that you want to install. Copy `example-sno.yaml` to `site-1-sno.yaml` and modify the CR to match the details of the site and bare-metal host that you want to install, for example: + -include::snippets/ztp-example-siteconfig.adoc[] +[source,yaml] +---- +include::snippets/ztp_example-sno.yaml[] +---- -. Generate the day-0 installation CRs by processing the modified `SiteConfig` CR `site-1-sno.yaml` by running the following command: +. Generate the Day 0 installation CRs by processing the modified `SiteConfig` CR `site-1-sno.yaml` by running the following command: + [source,terminal,subs="attributes+"] ---- -$ podman run -it --rm -v `pwd`/out/argocd/example/siteconfig:/resources:Z -v `pwd`/site-install:/output:Z,U registry.redhat.io/openshift4/ztp-site-generate-rhel8:v{product-version}.1 generator install site-1-sno.yaml /output +$ podman run -it --rm -v `pwd`/out/argocd/example/siteconfig:/resources:Z -v `pwd`/site-install:/output:Z,U registry.redhat.io/openshift4/ztp-site-generate-rhel8:v{product-version} generator install site-1-sno.yaml /output ---- + .Example output @@ -88,7 +91,7 @@ site-install └── site-1-sno_nmstateconfig_example-node1.example.com.yaml ---- -. Optional: Generate just the day-0 `MachineConfig` installation CRs for a particular cluster type by processing the reference `SiteConfig` CR with the `-E` option. For example, run the following commands: +. Optional: Generate just the Day 0 `MachineConfig` installation CRs for a particular cluster type by processing the reference `SiteConfig` CR with the `-E` option. For example, run the following commands: .. Create an output folder for the `MachineConfig` CRs: + @@ -101,7 +104,7 @@ $ mkdir -p ./site-machineconfig + [source,terminal,subs="attributes+"] ---- -$ podman run -it --rm -v `pwd`/out/argocd/example/siteconfig:/resources:Z -v `pwd`/site-machineconfig:/output:Z,U registry.redhat.io/openshift4/ztp-site-generate-rhel8:v{product-version}.1 generator install -E site-1-sno.yaml /output +$ podman run -it --rm -v `pwd`/out/argocd/example/siteconfig:/resources:Z -v `pwd`/site-machineconfig:/output:Z,U registry.redhat.io/openshift4/ztp-site-generate-rhel8:v{product-version} generator install -E site-1-sno.yaml /output ---- + .Example output @@ -114,20 +117,20 @@ site-machineconfig └── site-1-sno_machineconfig_predefined-extra-manifests-worker.yaml ---- -. Generate and export the day-2 configuration CRs using the reference `PolicyGenTemplate` CRs from the previous step. Run the following commands: +. Generate and export the Day 2 configuration CRs using the reference `PolicyGenTemplate` CRs from the previous step. Run the following commands: -.. Create an output folder for the day-2 CRs: +.. Create an output folder for the Day 2 CRs: + [source,terminal] ---- $ mkdir -p ./ref ---- -.. Generate and export the day-2 configuration CRs: +.. Generate and export the Day 2 configuration CRs: + [source,terminal,subs="attributes+"] ---- -$ podman run -it --rm -v `pwd`/out/argocd/example/policygentemplates:/resources:Z -v `pwd`/ref:/output:Z,U registry.redhat.io/openshift4/ztp-site-generate-rhel8:v{product-version}.1 generator config -N . /output +$ podman run -it --rm -v `pwd`/out/argocd/example/policygentemplates:/resources:Z -v `pwd`/ref:/output:Z,U registry.redhat.io/openshift4/ztp-site-generate-rhel8:v{product-version} generator config -N . /output ---- + The command generates example group and site-specific `PolicyGenTemplate` CRs for {sno}, three-node clusters, and standard clusters in the `./ref` folder. diff --git a/modules/ztp-installation-crs.adoc b/modules/ztp-installation-crs.adoc index 29c59a6ba9..cc653ea610 100644 --- a/modules/ztp-installation-crs.adoc +++ b/modules/ztp-installation-crs.adoc @@ -22,7 +22,7 @@ The following table lists the installation CRs that are automatically applied by |`BareMetalHost` |Contains the connection information for the Baseboard Management Controller (BMC) of the target bare-metal host. -|Provides access to the BMC to load and start the discovery image on the target server. {ztp-first} supports iPXE and virtual media booting by using Redfish or IPMI protocols. To use iPXE booting, you must use {rh-rhacm} 2.8 or later. +|Provides access to the BMC to load and start the discovery image on the target server by using the Redfish protocol. |`InfraEnv` |Contains information for installing {product-title} on the target bare-metal host. diff --git a/modules/ztp-precaching-booting-from-live-os.adoc b/modules/ztp-precaching-booting-from-live-os.adoc index 4d9f4c470a..c5db739da0 100644 --- a/modules/ztp-precaching-booting-from-live-os.adoc +++ b/modules/ztp-precaching-booting-from-live-os.adoc @@ -2,7 +2,7 @@ // // * scalability_and_performance/ztp_far_edge/ztp-precaching-tool.adoc -:_module-type: PROCEDURE +:_content-type: PROCEDURE [id="ztp-booting-from-live-os_{context}"] = Booting from a live operating system image @@ -61,4 +61,4 @@ $ curl --globoff -L -w "%{http_code} %{url_effective}\\n" -ku ${username_passwor $ curl --globoff -L -w "%{http_code} %{url_effective}\\n" -ku ${username_password} -H "Content-Type: application/json" -H "Accept: application/json" -d '{"Boot":{ "BootSourceOverrideEnabled": "Once", "BootSourceOverrideTarget": "Cd", "BootSourceOverrideMode": "UEFI"}}' -X PATCH https://$BMC_ADDRESS/redfish/v1/Systems/Self ---- -. Reboot and ensure that the server is booting from virtual media. \ No newline at end of file +. Reboot and ensure that the server is booting from virtual media. diff --git a/modules/ztp-precaching-getting-tool.adoc b/modules/ztp-precaching-getting-tool.adoc index 385230e743..f56fc2c483 100644 --- a/modules/ztp-precaching-getting-tool.adoc +++ b/modules/ztp-precaching-getting-tool.adoc @@ -2,7 +2,7 @@ // // * scalability_and_performance/ztp_far_edge/ztp-precaching-tool.adoc -:_module-type: PROCEDURE +:_content-type: PROCEDURE [id="ztp-getting-tool_{context}"] = Getting the {factory-prestaging-tool} @@ -33,4 +33,4 @@ If you are working in a disconnected environment or have a private registry, you [source,terminal] ---- factory-precaching-cli version 20221018.120852+main.feecf17 ----- \ No newline at end of file +---- diff --git a/modules/ztp-precaching-troubleshooting.adoc b/modules/ztp-precaching-troubleshooting.adoc index 857c0af5e1..3b7cd434ce 100644 --- a/modules/ztp-precaching-troubleshooting.adoc +++ b/modules/ztp-precaching-troubleshooting.adoc @@ -2,7 +2,7 @@ // // * scalability_and_performance/ztp_far_edge/ztp-precaching-tool.adoc -:_module-type: PROCEDURE +:_content-type: PROCEDURE [id="ztp-pre-staging-troubleshooting_{context}"] = Troubleshooting @@ -44,7 +44,7 @@ error: error rendering new refs: render reference "eko4.cloud.lab.eng.bos.redhat # cp /tmp/eko4-ca.crt /etc/pki/ca-trust/source/anchors/. ---- -. Update the certificates trust store: +. Update the certificates truststore: + [source,terminal] ---- @@ -59,4 +59,4 @@ error: error rendering new refs: render reference "eko4.cloud.lab.eng.bos.redhat factory-precaching-cli download -r {product-version}.0 --acm-version 2.5.4 \ --mce-version 2.0.4 -f /mnt \--img quay.io/custom/repository --du-profile -s --skip-imageset ----- \ No newline at end of file +---- diff --git a/modules/ztp-preparing-the-hub-cluster-for-ztp.adoc b/modules/ztp-preparing-the-hub-cluster-for-ztp.adoc index e389ee5139..f88002dcf7 100644 --- a/modules/ztp-preparing-the-hub-cluster-for-ztp.adoc +++ b/modules/ztp-preparing-the-hub-cluster-for-ztp.adoc @@ -48,6 +48,14 @@ $ oc patch argocd openshift-gitops \ --patch-file out/argocd/deployment/argocd-openshift-gitops-patch.json ---- +. In {rh-rhacm} 2.7 and later, the multicluster engine enables the `cluster-proxy-addon` feature by default. +To disable this feature, apply the following patch to disable and remove the relevant hub cluster and managed cluster pods that are responsible for this add-on. + +[source,terminal] +---- +$ oc patch multiclusterengines.multicluster.openshift.io multiclusterengine --type=merge --patch-file out/argocd/deployment/disable-cluster-proxy-addon.json +---- + . Apply the pipeline configuration to your hub cluster by using the following command: + [source,terminal] diff --git a/modules/ztp-recommended-cluster-kernel-config.adoc b/modules/ztp-recommended-cluster-kernel-config.adoc index ad7b17a2de..cdcfdd99b7 100644 --- a/modules/ztp-recommended-cluster-kernel-config.adoc +++ b/modules/ztp-recommended-cluster-kernel-config.adoc @@ -16,6 +16,7 @@ spec: additionalKernelArgs: - "rcupdate.rcu_normal_after_boot=0" - "efi=runtime" + - "module_blacklist=irdma" ---- . Ensure that the `performance-patch` profile in the `Tuned` CR configures the correct CPU isolation set that matches the `isolated` CPU set in the related `PerformanceProfile` CR, for example: @@ -25,20 +26,19 @@ spec: spec: profile: - name: performance-patch - # The 'include' line must match the associated PerformanceProfile name - # And the cmdline_crash CPU set must match the 'isolated' set in the associated PerformanceProfile + # The 'include' line must match the associated PerformanceProfile name, for example: + # include=openshift-node-performance-${PerformanceProfile.metadata.name} + # When using the standard (non-realtime) kernel, remove the kernel.timer_migration override from the [sysctl] section data: | [main] summary=Configuration changes profile inherited from performance created tuned include=openshift-node-performance-openshift-node-performance-profile - [bootloader] - cmdline_crash=nohz_full=2-51,54-103 <1> [sysctl] kernel.timer_migration=1 [scheduler] group.ice-ptp=0:f:10:*:ice-ptp.* + group.ice-gnss=0:f:10:*:ice-gnss.* [service] service.stalld=start,enable service.chronyd=stop,disable ---- -<1> Listed CPUs depend on the host hardware configuration, specifically the number of available CPUs in the system and the CPU topology. diff --git a/modules/ztp-recommended-cluster-mc-crs.adoc b/modules/ztp-recommended-cluster-mc-crs.adoc index c71cdf1c33..e8a4ef1796 100644 --- a/modules/ztp-recommended-cluster-mc-crs.adoc +++ b/modules/ztp-recommended-cluster-mc-crs.adoc @@ -2,32 +2,55 @@ // // * scalability_and_performance/ztp_far_edge/ztp-vdu-validating-cluster-tuning.adoc -:_module-type: REFERENCE +:_content-type: REFERENCE [id="ztp-recommended-cluster-mc-crs_{context}"] -= Recommended cluster MachineConfig CRs += Recommended cluster MachineConfig CRs for {sno} clusters Check that the `MachineConfig` custom resources (CRs) that you extract from the `ztp-site-generate` container are applied in the cluster. The CRs can be found in the extracted `out/source-crs/extra-manifest/` folder. The following `MachineConfig` CRs from the `ztp-site-generate` container configure the cluster host: -.Recommended MachineConfig CRs +.Recommended {ztp} MachineConfig CRs [cols=2*, options="header"] |==== -|CR filename +|MachineConfig CR |Description -|`02-workload-partitioning.yaml` -|Configures workload partitioning for the cluster. Apply this `MachineConfig` CR when you install the cluster. +a|`01-container-mount-ns-and-kubelet-conf-master.yaml` -|`03-sctp-machine-config-master.yaml`, `03-sctp-machine-config-worker.yaml` +`01-container-mount-ns-and-kubelet-conf-worker.yaml` +|Configures the container mount namespace and kubelet configuration. + +|`02-workload-partitioning.yaml` +a|Configures workload partitioning for the cluster. Apply this `MachineConfig` CR when you install the cluster. +[NOTE] +==== +If you use the `cpuPartitioningMode` field in the `SiteConfig` CR to configure workload partitioning, you do not need to use the `02-workload-partitioning.yaml` CR. +Using the `cpuPartitioningMode` field is a Technology Preview feature in {product-title} 4.13. +For more information, see "Workload partitioning in {sno} with {ztp}". +==== + +a|`03-sctp-machine-config-master.yaml` + +`03-sctp-machine-config-worker.yaml` |Loads the SCTP kernel module. These `MachineConfig` CRs are optional and can be omitted if you do not require this kernel module. -|`01-container-mount-ns-and-kubelet-conf-master.yaml`, `01-container-mount-ns-and-kubelet-conf-worker.yaml` -|Configures the container mount namespace and Kubelet configuration. +a|`04-accelerated-container-startup-master.yaml` -|`04-accelerated-container-startup-master.yaml`, `04-accelerated-container-startup-worker.yaml` +`04-accelerated-container-startup-worker.yaml` |Configures accelerated startup for the cluster. -|`06-kdump-master.yaml`, `06-kdump-worker.yaml` -|Configures `kdump` for the cluster. +a|`05-kdump-config-master.yaml` + +`05-kdump-config-worker.yaml` + +`06-kdump-master.yaml` + +`06-kdump-worker.yaml` +|Configures kdump crash reporting for the cluster. + +a|`99-crio-disable-wipe-master.yaml` + +`99-crio-disable-wipe-worker.yaml` +|Disables the automatic CRI-O cache wipe following cluster reboot. |==== diff --git a/modules/ztp-sno-du-accelerating-container-startup.adoc b/modules/ztp-sno-du-accelerating-container-startup.adoc index c13509ee4e..996578f63a 100644 --- a/modules/ztp-sno-du-accelerating-container-startup.adoc +++ b/modules/ztp-sno-du-accelerating-container-startup.adoc @@ -8,4 +8,8 @@ The following `MachineConfig` CR configures core OpenShift processes and containers to use all available CPU cores during system startup and shutdown. This accelerates the system recovery during initial boot and reboots. -include::snippets/ztp-04-accelerated-container-startup-master.adoc[] +.Recommended accelerated container startup configuration (04-accelerated-container-startup-master.yaml) +[source,yaml] +---- +include::snippets/ztp_04-accelerated-container-startup-master.yaml[] +---- diff --git a/modules/ztp-sno-du-configuring-crun-container-runtime.adoc b/modules/ztp-sno-du-configuring-crun-container-runtime.adoc index 37a9f3e3dd..645c00d3b3 100644 --- a/modules/ztp-sno-du-configuring-crun-container-runtime.adoc +++ b/modules/ztp-sno-du-configuring-crun-container-runtime.adoc @@ -11,10 +11,18 @@ The crun container runtime is fast and lightweight and has a low memory footprin [IMPORTANT] ==== -For optimal performance, enable crun for master and worker nodes in {sno}, {3no}, and standard clusters. -To avoid the cluster rebooting when the CR is applied, apply the change as a {ztp} additional day-0 install-time manifest. +For optimal performance, enable crun for control plane and worker nodes in {sno}, {3no}, and standard clusters. +To avoid the cluster rebooting when the CR is applied, apply the change as a {ztp} additional Day 0 install-time manifest. ==== -include::snippets/ztp-07-ztp-sno-du-configuring-crun-container-runtime-master.adoc[] +.Recommended ContainerRuntimeConfig CR for control plane nodes (enable-crun-master.yaml) +[source,yaml] +---- +include::snippets/ztp_enable-crun-master.yaml[] +---- -include::snippets/ztp-08-ztp-sno-du-configuring-crun-container-runtime-worker.adoc[] +.Recommended ContainerRuntimeConfig CR for worker nodes (enable-crun-worker.yaml) +[source,yaml] +---- +include::snippets/ztp_enable-crun-worker.yaml[] +---- diff --git a/modules/ztp-sno-du-configuring-logging-locally-and-forwarding.adoc b/modules/ztp-sno-du-configuring-logging-locally-and-forwarding.adoc index b3b51bdb5f..290f1f70f3 100644 --- a/modules/ztp-sno-du-configuring-logging-locally-and-forwarding.adoc +++ b/modules/ztp-sno-du-configuring-logging-locally-and-forwarding.adoc @@ -6,6 +6,18 @@ [id="ztp-sno-du-configuring-logging-locally-and-forwarding_{context}"] = Cluster logging and log forwarding -{sno-caps} clusters that run DU workloads require logging and log forwarding for debugging. The following example YAML illustrates the required `ClusterLogging` and `ClusterLogForwarder` CRs. +{sno-caps} clusters that run DU workloads require logging and log forwarding for debugging. The following `ClusterLogging` and `ClusterLogForwarder` custom resources (CRs) are required. -include::snippets/ztp-cluster-logging.adoc[] +.Recommended cluster logging and log forwarding configuration +[source,yaml] +---- +include::snippets/ztp_ClusterLogging.yaml[] +---- + +.Recommended log forwarding configuration +[source,yaml] +---- +include::snippets/ztp_ClusterLogForwarder.yaml[] +---- + +Set the `spec.outputs.url` field to the URL of the Kafka server where the logs are forwarded to. diff --git a/modules/ztp-sno-du-configuring-lvms.adoc b/modules/ztp-sno-du-configuring-lvms.adoc index 569668dff9..3241212c65 100644 --- a/modules/ztp-sno-du-configuring-lvms.adoc +++ b/modules/ztp-sno-du-configuring-lvms.adoc @@ -15,4 +15,18 @@ The recommended storage solution for {sno} is the Local Storage Operator. Altern The following YAML example configures the storage of the node to be available to {product-title} applications. -include::snippets/ztp-storage-lvms.adoc[] \ No newline at end of file +.Recommended LVMCluster configuration (StorageLVMCluster.yaml) +[source,yaml] +---- +include::snippets/ztp_StorageLVMCluster.yaml[] +---- + +.LVMCluster CR options for {sno} clusters +[cols=2*, width="90%", options="header"] +|==== +|LVMCluster CR field +|Description + +|`deviceSelector.paths` +|Configure the disks used for LVM storage. If no disks are specified, the {lvms} uses all the unused disks in the specified thin pool. +|==== diff --git a/modules/ztp-sno-du-configuring-performance-addons.adoc b/modules/ztp-sno-du-configuring-performance-addons.adoc index 092e47a0dd..af4b6228ca 100644 --- a/modules/ztp-sno-du-configuring-performance-addons.adoc +++ b/modules/ztp-sno-du-configuring-performance-addons.adoc @@ -13,6 +13,12 @@ In earlier versions of {product-title}, the Performance Addon Operator was used to implement automatic tuning to achieve low latency performance for OpenShift applications. In {product-title} 4.11 and later, this functionality is part of the Node Tuning Operator. ==== -The following example `PerformanceProfile` CR illustrates the required cluster configuration. +The following example `PerformanceProfile` CR illustrates the required {sno} cluster configuration. -include::snippets/ztp-performance-profile.adoc[] +.Recommended performance profile configuration +[source,yaml] +---- +include::snippets/ztp_PerformanceProfile.yaml[] +---- + +include::snippets/performance-profile-workload-partitioning.adoc[] diff --git a/modules/ztp-sno-du-configuring-ptp.adoc b/modules/ztp-sno-du-configuring-ptp.adoc index 4827607aaf..34e191083b 100644 --- a/modules/ztp-sno-du-configuring-ptp.adoc +++ b/modules/ztp-sno-du-configuring-ptp.adoc @@ -8,4 +8,8 @@ {sno-caps} clusters use Precision Time Protocol (PTP) for network time synchronization. The following example `PtpConfig` CR illustrates the required PTP slave configuration. -include::snippets/ztp-ptp-config.adoc[] +.Recommended PTP configuration +[source,yaml] +---- +include::snippets/ztp_PtpConfigSlave.yaml[] +---- diff --git a/modules/ztp-sno-du-configuring-sriov.adoc b/modules/ztp-sno-du-configuring-sriov.adoc index 4b20f50910..a73b43c278 100644 --- a/modules/ztp-sno-du-configuring-sriov.adoc +++ b/modules/ztp-sno-du-configuring-sriov.adoc @@ -6,6 +6,81 @@ [id="ztp-sno-du-configuring-sriov_{context}"] = SR-IOV -Single root I/O virtualization (SR-IOV) is commonly used to enable the fronthaul and the midhaul networks. The following YAML example configures SR-IOV for a {sno} cluster. +Single root I/O virtualization (SR-IOV) is commonly used to enable fronthaul and midhaul networks. The following YAML example configures SR-IOV for a {sno} cluster. -include::snippets/ztp-sriov-du-config.adoc[] +[NOTE] +==== +The configuration of the `SriovNetwork` CR will vary depending on your specific network and infrastructure requirements. +==== + +.Recommended SriovOperatorConfig configuration +[source,yaml] +---- +include::snippets/ztp_SriovOperatorConfig.yaml[] +---- + +.SriovOperatorConfig CR options for {sno} clusters +[cols=2*, width="90%", options="header"] +|==== +|SriovOperatorConfig CR field +|Description + +|`spec.enableInjector` +a|Disable `Injector` pods to reduce the number of management pods. +Start with the `Injector` pods enabled, and only disable them after verifying the user manifests. +If the injector is disabled, containers that use SR-IOV resources must explicitly assign them in the `requests` and `limits` section of the container spec. + +For example: +[source,yaml] +---- +containers: +- name: my-sriov-workload-container + resources: + limits: + openshift.io/: "1" + requests: + openshift.io/: "1" +---- + +|`spec.enableOperatorWebhook` +|Disable `OperatorWebhook` pods to reduce the number of management pods. Start with the `OperatorWebhook` pods enabled, and only disable them after verifying the user manifests. + +|==== + +.Recommended SriovNetwork configuration +[source,yaml] +---- +include::snippets/ztp_SriovNetwork.yaml[] +---- + +.SriovNetwork CR options for {sno} clusters +[cols=2*, width="90%", options="header"] +|==== +|SriovNetwork CR field +|Description + +|`spec.vlan` +|Configure `vlan` with the VLAN for the midhaul network. +|==== + +.Recommended SriovNetworkNodePolicy configuration +[source,yaml] +---- +include::snippets/ztp_SriovNetworkNodePolicy.yaml[] +---- + +.SriovNetworkPolicy CR options for {sno} clusters +[cols=2*, width="90%", options="header"] +|==== +|SriovNetworkNodePolicy CR field +|Description + +|`spec.deviceType` +|Configure `deviceType` as `vfio-pci` or `netdevice`. + +|`spec.nicSelector.pfNames` +|Specifies the interface connected to the fronthaul network. + +|`spec.numVfs` +|Specifies the number of VFs for the fronthaul network. +|==== diff --git a/modules/ztp-sno-du-configuring-the-container-mountspace.adoc b/modules/ztp-sno-du-configuring-the-container-mountspace.adoc index 3422cbe266..ca3da9cf86 100644 --- a/modules/ztp-sno-du-configuring-the-container-mountspace.adoc +++ b/modules/ztp-sno-du-configuring-the-container-mountspace.adoc @@ -6,6 +6,11 @@ [id="ztp-sno-du-configuring-the-container-mountspace_{context}"] = Reduced platform management footprint -To reduce the overall management footprint of the platform, a `MachineConfig` custom resource (CR) is required that places all Kubernetes-specific mount points in a new namespace separate from the host operating system. The following base64-encoded example `MachineConfig` CR illustrates this configuration. +To reduce the overall management footprint of the platform, a `MachineConfig` custom resource (CR) is required that places all Kubernetes-specific mount points in a new namespace separate from the host operating system. +The following base64-encoded example `MachineConfig` CR illustrates this configuration. -include::snippets/ztp-container-mount-namespace-and-kubelet-conf-master.adoc[] +.Recommended container mount namespace configuration (01-container-mount-ns-and-kubelet-conf-master.yaml) +[source,yaml] +---- +include::snippets/ztp_01-container-mount-ns-and-kubelet-conf-master.yaml[] +---- diff --git a/modules/ztp-sno-du-configuring-the-operators.adoc b/modules/ztp-sno-du-configuring-the-operators.adoc index ad66a89f97..60ec01c21b 100644 --- a/modules/ztp-sno-du-configuring-the-operators.adoc +++ b/modules/ztp-sno-du-configuring-the-operators.adoc @@ -13,6 +13,37 @@ * PTP Operator * SR-IOV Network Operator -The following YAML summarizes these CRs: +The following CRs are required: -include::snippets/ztp-operator-groups-namespace.adoc[] +.Recommended Storage Operator Namespace and OperatorGroup configuration +[source,yaml] +---- +--- +include::snippets/ztp_StorageNS.yaml[] +--- +include::snippets/ztp_StorageOperGroup.yaml[] +---- + +.Recommended Cluster Logging Operator Namespace and OperatorGroup configuration +[source,yaml] +---- +include::snippets/ztp_ClusterLogNS.yaml[] +include::snippets/ztp_ClusterLogOperGroup.yaml[] +---- + +.Recommended PTP Operator Namespace and OperatorGroup configuration +[source,yaml] +---- +include::snippets/ztp_PtpSubscriptionNS.yaml[] +--- +include::snippets/ztp_PtpSubscriptionOperGroup.yaml[] +---- + +.Recommended SR-IOV Operator Namespace and OperatorGroup configuration +[source,yaml] +---- +--- +include::snippets/ztp_SriovSubscriptionNS.yaml[] +--- +include::snippets/ztp_SriovSubscriptionOperGroup.yaml[] +---- diff --git a/modules/ztp-sno-du-disabling-crio-wipe.adoc b/modules/ztp-sno-du-disabling-crio-wipe.adoc new file mode 100644 index 0000000000..0b91ecb7de --- /dev/null +++ b/modules/ztp-sno-du-disabling-crio-wipe.adoc @@ -0,0 +1,23 @@ +// Module included in the following assemblies: +// +// * scalability_and_performance/ztp_far_edge/ztp-reference-cluster-configuration-for-vdu.adoc + +:_content-type: CONCEPT +[id="ztp-sno-du-disabling-crio-wipe_{context}"] += Disable automatic CRI-O cache wipe + +After an uncontrolled host shutdown or cluster reboot, CRI-O automatically deletes the entire CRI-O cache, causing all images to be pulled from the registry when the node reboots. +This can result in unacceptably slow recovery times or recovery failures. +To prevent this from happening in {sno} clusters that you install with {ztp}, disable the CRI-O delete cache feature during cluster installation. + +.Recommended MachineConfig CR to disable CRI-O cache wipe on control plane nodes (99-crio-disable-wipe-master.yaml) +[source,yaml] +---- +include::snippets/ztp_99-crio-disable-wipe-master.yaml[] +---- + +.Recommended MachineConfig CR to disable CRI-O cache wipe on worker nodes (99-crio-disable-wipe-worker.yaml) +[source,yaml] +---- +include::snippets/ztp_99-crio-disable-wipe-worker.yaml[] +---- diff --git a/modules/ztp-sno-du-disabling-network-diagnostics.adoc b/modules/ztp-sno-du-disabling-network-diagnostics.adoc index 49846aaba2..6451e426c1 100644 --- a/modules/ztp-sno-du-disabling-network-diagnostics.adoc +++ b/modules/ztp-sno-du-disabling-network-diagnostics.adoc @@ -8,4 +8,8 @@ {sno-caps} clusters that run DU workloads require less inter-pod network connectivity checks to reduce the additional load created by these pods. The following custom resource (CR) disables these checks. -include::snippets/ztp-network-diagnostics.adoc[] +.Recommended network diagnostics configuration (DisableSnoNetworkDiag.yaml) +[source,yaml] +---- +include::snippets/ztp_DisableSnoNetworkDiag.yaml[] +---- diff --git a/modules/ztp-sno-du-enabling-kdump.adoc b/modules/ztp-sno-du-enabling-kdump.adoc index 1a7fcadaaa..f62225fb2e 100644 --- a/modules/ztp-sno-du-enabling-kdump.adoc +++ b/modules/ztp-sno-du-enabling-kdump.adoc @@ -6,6 +6,16 @@ [id="ztp-sno-du-enabling-kdump_{context}"] = Automatic kernel crash dumps with kdump -`kdump` is a Linux kernel feature that creates a kernel crash dump when the kernel crashes. `kdump` is enabled with the following `MachineConfig` CR: +`kdump` is a Linux kernel feature that creates a kernel crash dump when the kernel crashes. `kdump` is enabled with the following `MachineConfig` CRs. -include::snippets/ztp-06-kdump-enable-master.adoc[] +.Recommended MachineConfig to remove ice driver (05-kdump-config-master.yaml) +[source,yaml] +---- +include::snippets/ztp_05-kdump-config-master.yaml[] +---- + +.Recommended kdump configuration (06-kdump-master.yaml) +[source,yaml] +---- +include::snippets/ztp_06-kdump-master.yaml[] +---- diff --git a/modules/ztp-sno-du-enabling-sctp.adoc b/modules/ztp-sno-du-enabling-sctp.adoc index db1ce9e6ef..85411ee5ba 100644 --- a/modules/ztp-sno-du-enabling-sctp.adoc +++ b/modules/ztp-sno-du-enabling-sctp.adoc @@ -8,4 +8,8 @@ Stream Control Transmission Protocol (SCTP) is a key protocol used in RAN applications. This `MachineConfig` object adds the SCTP kernel module to the node to enable this protocol. -include::snippets/ztp-load-sctp-module.adoc[] +.Recommended SCTP configuration (03-sctp-machine-config-master.yaml) +[source,yaml] +---- +include::snippets/ztp_03-sctp-machine-config-master.yaml[] +---- diff --git a/modules/ztp-sno-du-enabling-workload-partitioning.adoc b/modules/ztp-sno-du-enabling-workload-partitioning.adoc index 42eaad4384..3cb3fe4ebd 100644 --- a/modules/ztp-sno-du-enabling-workload-partitioning.adoc +++ b/modules/ztp-sno-du-enabling-workload-partitioning.adoc @@ -10,42 +10,37 @@ [NOTE] ==== -Workload partitioning can only be enabled during cluster installation. You cannot disable workload partitioning post-installation. However, you can reconfigure workload partitioning by updating the `cpu` value that you define in the performance profile, and in the related `MachineConfig` custom resource (CR). +Workload partitioning can be enabled during cluster installation only. +You cannot disable workload partitioning post-installation. +You can however change the set of CPUs assigned to the isolated and reserved sets through the `PerformanceProfile` CR. +Changes to CPU settings cause the node to reboot. ==== -* The base64-encoded CR that enables workload partitioning contains the CPU set that the management workloads are constrained to. Encode host-specific values for `crio.conf` and `kubelet.conf` in base64. Adjust the content to match the CPU set that is specified in the cluster performance profile. It must match the number of cores in the cluster host. -+ -include::snippets/ztp-02-master-workload-partitioning.adoc[leveloffset=+1] +.Upgrading from {product-title} 4.12 to 4.13+ +[NOTE] +==== +When transitioning to using `cpuPartitioningMode` for enabling workload partitioning, remove the workload partitioning `MachineConfig` CRs from the `/extra-manifest` folder that you use to provision the cluster. +==== -* When configured in the cluster host, the contents of `/etc/crio/crio.conf.d/01-workload-partitioning` should look like this: -+ -[source,terminal] +.Recommended SiteConfig CR configuration for workload partitioning +[source,yaml] ---- -[crio.runtime.workloads.management] -activation_annotation = "target.workload.openshift.io/management" -annotation_prefix = "resources.workload.openshift.io" -resources = { "cpushares" = 0, "cpuset" = "0-1,52-53" } <1> +apiVersion: ran.openshift.io/v1 +kind: SiteConfig +metadata: + name: "" + namespace: "" +spec: + baseDomain: "example.com" + cpuPartitioningMode: AllNodes <1> ---- -<1> The `cpuset` value varies based on the installation. -If Hyper-Threading is enabled, specify both threads for each core. The `cpuset` value must match the reserved CPUs that you define in the `spec.cpu.reserved` field in the performance profile. - -* When configured in the cluster, the contents of `/etc/kubernetes/openshift-workload-pinning` should look like this: -+ -[source,terminal] ----- -{ - "management": { - "cpuset": "0-1,52-53" <1> - } -} ----- -<1> The `cpuset` must match the `cpuset` value in `/etc/crio/crio.conf.d/01-workload-partitioning`. +<1> Set the `cpuPartitioningMode` field to `AllNodes` to configure workload partitioning for all nodes in the cluster. .Verification Check that the applications and cluster system CPU pinning is correct. Run the following commands: -. Open a remote shell connection to the managed cluster: +. Open a remote shell prompt to the managed cluster: + [source,terminal] ---- diff --git a/modules/ztp-sno-du-reducing-resource-usage-with-cluster-monitoring.adoc b/modules/ztp-sno-du-reducing-resource-usage-with-cluster-monitoring.adoc index fa94713423..3369ba3148 100644 --- a/modules/ztp-sno-du-reducing-resource-usage-with-cluster-monitoring.adoc +++ b/modules/ztp-sno-du-reducing-resource-usage-with-cluster-monitoring.adoc @@ -8,4 +8,8 @@ {sno-caps} clusters that run DU workloads require reduced CPU resources consumed by the {product-title} monitoring components. The following `ConfigMap` custom resource (CR) disables Grafana and Alertmanager. -include::snippets/ztp-cluster-monitoring.adoc[] +.Recommended cluster monitoring configuration (ReduceMonitoringFootprint.yaml) +[source,yaml] +---- +include::snippets/ztp_ReduceMonitoringFootprint.yaml[] +---- diff --git a/modules/ztp-sno-du-removing-the-console-operator.adoc b/modules/ztp-sno-du-removing-the-console-operator.adoc index c70846576a..9b02252ccd 100644 --- a/modules/ztp-sno-du-removing-the-console-operator.adoc +++ b/modules/ztp-sno-du-removing-the-console-operator.adoc @@ -6,6 +6,14 @@ [id="ztp-sno-du-removing-the-console-operator_{context}"] = Console Operator -The console-operator installs and maintains the web console on a cluster. When the node is centrally managed the Operator is not needed and makes space for application workloads. The following `Console` custom resource (CR) example disables the console. +Use the cluster capabilities feature to prevent the Console Operator from being installed. +When the node is centrally managed it is not needed. +Removing the Operator provides additional space and capacity for application workloads. -include::snippets/ztp-disable-console.adoc[] + +To disable the Console Operator during the installation of the managed cluster, set the following in the `spec.clusters.0.installConfigOverrides` field of the `SiteConfig` custom resource (CR): + +[source,yaml] +---- +installConfigOverrides: "{\"capabilities\":{\"baselineCapabilitySet\": \"None\" }}" +---- diff --git a/modules/ztp-sno-du-subscribing-to-the-operators-needed-for-platform-configuration.adoc b/modules/ztp-sno-du-subscribing-to-the-operators-needed-for-platform-configuration.adoc index 5241e357ba..b8a0557da5 100644 --- a/modules/ztp-sno-du-subscribing-to-the-operators-needed-for-platform-configuration.adoc +++ b/modules/ztp-sno-du-subscribing-to-the-operators-needed-for-platform-configuration.adoc @@ -13,4 +13,37 @@ * PTP Operator * SR-IOV Network Operator -include::snippets/ztp-operator-subs.adoc[] +For each Operator subscription, specify the channel to get the Operator from. The recommended channel is `stable`. + +You can specify `Manual` or `Automatic` updates. +In `Automatic` mode, the Operator automatically updates to the latest versions in the channel as they become available in the registry. +In `Manual` mode, new Operator versions are installed only when they are explicitly approved. + +[NOTE] +==== +Use Manual mode for subscriptions. This allows you to control the timing of Operator updates to fit within planned/scheduled maintenance windows. +==== + +.Recommended Local Storage Operator subscription +[source,yaml] +---- +include::snippets/ztp_StorageSubscription.yaml[] +---- + +.Recommended SR-IOV Operator subscription +[source,yaml] +---- +include::snippets/ztp_SriovSubscription.yaml[] +---- + +.Recommended PTP Operator subscription +[source,yaml] +---- +include::snippets/ztp_PtpSubscription.yaml[] +---- + +.Recommended Cluster Logging Operator subscription +[source,yaml] +---- +include::snippets/ztp_ClusterLogSubscription.yaml[] +---- diff --git a/modules/ztp-sno-du-tuning-the-performance-patch.adoc b/modules/ztp-sno-du-tuning-the-performance-patch.adoc index dea8e8bb3c..b46047ac0c 100644 --- a/modules/ztp-sno-du-tuning-the-performance-patch.adoc +++ b/modules/ztp-sno-du-tuning-the-performance-patch.adoc @@ -8,4 +8,22 @@ {sno-caps} clusters that run DU workloads require additional performance tuning configurations necessary for high-performance workloads. The following example `Tuned` CR extends the `Tuned` profile: -include::snippets/ztp-performance-patch.adoc[] +.Recommended extended Tuned profile configuration +[source,yaml] +---- +include::snippets/ztp_TunedPerformancePatch.yaml[] +---- + +.Tuned CR options for {sno} clusters +[cols=2*, width="90%", options="header"] +|==== +|Tuned CR field +|Description + +|`spec.profile.data` +a|* The `include` line that you set in `spec.profile.data` must match the associated `PerformanceProfile` CR name. +For example, `include=openshift-node-performance-${PerformanceProfile.metadata.name}`. + +* When using the non-realtime kernel, remove the `timer_migration override` line from the `[sysctl]` section. + +|==== diff --git a/modules/ztp-sno-siteconfig-config-reference.adoc b/modules/ztp-sno-siteconfig-config-reference.adoc new file mode 100644 index 0000000000..b0f842140e --- /dev/null +++ b/modules/ztp-sno-siteconfig-config-reference.adoc @@ -0,0 +1,89 @@ +// Module included in the following assemblies: +// +// * scalability_and_performance/ztp_far_edge/ztp-deploying-far-edge-sites.adoc + +:_content-type: REFERENCE +[id="ztp-sno-siteconfig-config-reference_{context}"] += {sno-caps} SiteConfig CR installation reference + +.SiteConfig CR installation options for {sno} clusters +[cols="1,3", options="header"] +|==== +|SiteConfig CR field +|Description + +|`spec.cpuPartitioningMode` +a|Configure workload partitioning by setting the value for `cpuPartitioningMode` to `AllNodes`. +To complete the configuration, specify the `isolated` and `reserved` CPUs in the `PerformanceProfile` CR. + +[NOTE] +==== +Configuring workload partitioning by using the `cpuPartitioningMode` field in the `SiteConfig` CR is a Tech Preview feature in {product-title} 4.13. +==== + +|`metadata.name` +|Set `name` to `assisted-deployment-pull-secret` and create the `assisted-deployment-pull-secret` CR in the same namespace as the `SiteConfig` CR. + +|`clusterImageSetNameRef` +|Configure the image set available on the hub cluster. +To see the list of supported versions on your hub cluster, run `oc get clusterimagesets`. + +|`installConfigOverrides` +a|Set the `installConfigOverrides` field to enable or disable optional components prior to cluster installation. +[IMPORTANT] +==== +Use the reference configuration as specified in the example `SiteConfig` CR. +Adding additional components back into the system might require additional reserved CPU capacity. +==== + +|`spec.clusters.clusterLabels` +|Configure cluster labels to correspond to the `bindingRules` field in the `PolicyGenTemplate` CRs that you define. +For example, `policygentemplates/common-ranGen.yaml` applies to all clusters with `common: true` set, `policygentemplates/group-du-sno-ranGen.yaml` applies to all clusters with `group-du-sno: ""` set. + +|`spec.clusters.crTemplates.KlusterletAddonConfig` +|Optional. Set `KlusterletAddonConfig` to `KlusterletAddonConfigOverride.yaml to override the default `KlusterletAddonConfig` that is created for the cluster. + +|`spec.clusters.nodes.hostName` +|For single-node deployments, define a single host. +For three-node deployments, define three hosts. +For standard deployments, define three hosts with `role: master` and two or more hosts defined with `role: worker`. + +|`spec.clusters.nodes.bmcAddress` +|BMC address that you use to access the host. Applies to all cluster types. {ztp} supports iPXE and virtual media booting by using Redfish or IPMI protocols. To use iPXE booting, you must use {rh-rhacm} 2.8 or later. For more information about BMC addressing, see the "Additional resources" section. + +|`spec.clusters.nodes.bmcAddress` +a|BMC address that you use to access the host. +Applies to all cluster types. +{ztp} supports iPXE and virtual media booting by using Redfish or IPMI protocols. +To use iPXE booting, you must use {rh-rhacm} 2.8 or later. +For more information about BMC addressing, see the "Additional resources" section. +[NOTE] +==== +In far edge Telco use cases, only virtual media is supported for use with {ztp}. +==== + +|`spec.clusters.nodes.bmcCredentialsName` +|Configure the `bmh-secret` CR that you separately create with the host BMC credentials. +When creating the `bmh-secret` CR, use the same namespace as the `SiteConfig` CR that provisions the host. + +|`spec.clusters.nodes.bootMode` +|Set the boot mode for the host to `UEFI`. +The default value is `UEFI`. Use `UEFISecureBoot` to enable secure boot on the host. + +|`spec.clusters.nodes.diskPartition` +|Optional. The provided example `diskPartition` is used to configure additional disk partitions. + +|`spec.clusters.nodes.ignitionConfigOverride` +|Optional. Use this field to assign partitions for persistent storage. +Adjust disk ID and size to the specific hardware. + +|`spec.clusters.nodes.cpuset` +|Configure `cpuset` to match value that you set in the cluster `PerformanceProfile` CR `spec.cpu.reserved` field for workload partitioning. + +|`spec.clusters.nodes.nodeNetwork` +|Configure the network settings for the node. + +|`spec.clusters.nodes.nodeNetwork.config.interfaces.ipv6` +|Configure the IPv6 address for the host. +For {sno} clusters with static IP addresses, the node-specific API and Ingress IPs should be the same. +|==== diff --git a/scalability_and_performance/enabling-workload-partitioning.adoc b/scalability_and_performance/enabling-workload-partitioning.adoc index 9bdce1dc50..b0093d6a10 100644 --- a/scalability_and_performance/enabling-workload-partitioning.adoc +++ b/scalability_and_performance/enabling-workload-partitioning.adoc @@ -8,7 +8,6 @@ toc::[] :FeatureName: Workload partitioning include::snippets/technology-preview.adoc[] -:!FeatureName: In resource-constrained environments, you can use workload partitioning to isolate {product-title} services, cluster management workloads, and infrastructure pods to run on a reserved set of CPUs. @@ -35,41 +34,31 @@ compute: platform: {} replicas: 3 controlPlane: - architecture: amd64 + architecture: amd64 hyperthreading: Enabled name: master platform: {} replicas: 3 ---- -<1> Sets up a cluster for CPU partitioning at install time. The default value is `None`. +<1> Sets up a cluster for CPU partitioning at install time. The default value is `None`. + [NOTE] ==== Workload partitioning can only be enabled during cluster installation. You cannot disable workload partitioning post-installation. ==== -. In the performance profile, specify the `isolated` and `reserved` CPUs. +. In the performance profile, specify the `isolated` and `reserved` CPUs. + +.Recommended performance profile configuration [source,yaml] ---- -apiVersion: performance.openshift.io/v2 -kind: PerformanceProfile -metadata: - name: openshift-node-workload-partitioning-worker -spec: - cpu: - isolated: 0,1 <1> - reserved: "2-3" <2> - machineConfigPoolSelector: - pools.operator.machineconfiguration.openshift.io/worker: "" - nodeSelector: - node-role.kubernetes.io/worker: "" +include::snippets/ztp_PerformanceProfile.yaml[] ---- -<1> Sets the isolated CPUs. Ensure all of the Hyper-Threading pairs match. -<2> Specifies the CPU set to pin the workloads and the {product-title} infrastructure pods to. When workload partitioning is enabled, platform containers and platform services such as `systemd`, `CRI-O`, and `Kubelet` are restricted to these CPUs. All CPUs that are not isolated should be reserved. ++ +include::snippets/performance-profile-workload-partitioning.adoc[] Workload partitioning introduces an extended `management.workload.openshift.io/cores` resource type for platform pods. -Kubelet advertises the resources and CPU requests by pods allocated to the pool within the corresponding resource. +kubelet advertises the resources and CPU requests by pods allocated to the pool within the corresponding resource. When workload partitioning is enabled, the `management.workload.openshift.io/cores` resource allows the scheduler to correctly assign pods based on the `cpushares` capacity of the host, not just the default `cpuset`. [role="_additional-resources"] diff --git a/scalability_and_performance/ztp_far_edge/ztp-deploying-far-edge-sites.adoc b/scalability_and_performance/ztp_far_edge/ztp-deploying-far-edge-sites.adoc index 9259de8d3b..8e782aa1ea 100644 --- a/scalability_and_performance/ztp_far_edge/ztp-deploying-far-edge-sites.adoc +++ b/scalability_and_performance/ztp_far_edge/ztp-deploying-far-edge-sites.adoc @@ -23,6 +23,8 @@ include::modules/ztp-configuring-kernel-arguments-for-discovery-iso.adoc[levelof include::modules/ztp-deploying-a-site.adoc[leveloffset=+1] +include::modules/ztp-sno-siteconfig-config-reference.adoc[leveloffset=+2] + [role="_additional-resources"] .Additional resources diff --git a/scalability_and_performance/ztp_far_edge/ztp-reference-cluster-configuration-for-vdu.adoc b/scalability_and_performance/ztp_far_edge/ztp-reference-cluster-configuration-for-vdu.adoc index 1b2f6199f5..377373a187 100644 --- a/scalability_and_performance/ztp_far_edge/ztp-reference-cluster-configuration-for-vdu.adoc +++ b/scalability_and_performance/ztp_far_edge/ztp-reference-cluster-configuration-for-vdu.adoc @@ -31,7 +31,7 @@ include::modules/ztp-enabling-workload-partitioning-sno.adoc[leveloffset=+1] * For the recommended {sno} workload partitioning configuration, see xref:../../scalability_and_performance/ztp_far_edge/ztp-reference-cluster-configuration-for-vdu.adoc#ztp-sno-du-enabling-workload-partitioning_sno-configure-for-vdu[Workload partitioning]. [id="ztp-sno-install-time-cluster-config"] -== Recommended installation-time cluster configurations +== Recommended cluster install manifests The ZTP pipeline applies the following custom resources (CRs) during cluster installation. These configuration CRs ensure that the cluster meets the feature and performance requirements necessary for running a vDU application. @@ -52,6 +52,8 @@ include::modules/ztp-sno-du-accelerating-container-startup.adoc[leveloffset=+2] include::modules/ztp-sno-du-enabling-kdump.adoc[leveloffset=+2] +include::modules/ztp-sno-du-disabling-crio-wipe.adoc[leveloffset=+2] + include::modules/ztp-sno-du-configuring-crun-container-runtime.adoc[leveloffset=+2] [id="ztp-sno-post-install-time-cluster-config"] diff --git a/snippets/performance-profile-workload-partitioning.adoc b/snippets/performance-profile-workload-partitioning.adoc new file mode 100644 index 0000000000..56d95caae2 --- /dev/null +++ b/snippets/performance-profile-workload-partitioning.adoc @@ -0,0 +1,39 @@ +:_content-type: SNIPPET +.PerformanceProfile CR options for {sno} clusters +[cols=2*, width="90%", options="header"] +|==== +|PerformanceProfile CR field +|Description + +|`metadata.name` +a|Ensure that `name` matches the following fields set in related {ztp} custom resources (CRs): + +* `include=openshift-node-performance-${PerformanceProfile.metadata.name}` in `TunedPerformancePatch.yaml` +* `name: 50-performance-${PerformanceProfile.metadata.name}` in `validatorCRs/informDuValidator.yaml` + +|`spec.additionalKernelArgs` +|`"efi=runtime"` Configures UEFI secure boot for the cluster host. + +|`spec.cpu.isolated` +a|Set the isolated CPUs. Ensure all of the Hyper-Threading pairs match. + +[IMPORTANT] +==== +The reserved and isolated CPU pools must not overlap and together must span all available cores. CPU cores that are not accounted for cause an undefined behaviour in the system. +==== + +|`spec.cpu.reserved` +|Set the reserved CPUs. When workload partitioning is enabled, system processes, kernel threads, and system container threads are restricted to these CPUs. All CPUs that are not isolated should be reserved. + +|`spec.hugepages.pages` +a|* Set the number of huge pages (`count`) +* Set the huge pages size (`size`). +* Set `node` to the NUMA node where the `hugepages` are allocated (`node`) + +|`spec.realTimeKernel` +|Set `enabled` to `true` to use the realtime kernel. + +|`spec.realTimeKernel.workloadHints` +|Use `WorkloadHints` to define the set of top level flags for different type of workloads. +The example configuration configures the cluster for low latency and high performance. +|==== diff --git a/snippets/ztp-02-master-workload-partitioning.adoc b/snippets/ztp-02-master-workload-partitioning.adoc deleted file mode 100644 index 58f26e1454..0000000000 --- a/snippets/ztp-02-master-workload-partitioning.adoc +++ /dev/null @@ -1,31 +0,0 @@ -:_content-type: SNIPPET -.Recommended workload partitioning configuration -[source,yaml] ----- -apiVersion: machineconfiguration.openshift.io/v1 -kind: MachineConfig -metadata: - labels: - machineconfiguration.openshift.io/role: master - name: 02-master-workload-partitioning -spec: - config: - ignition: - version: 3.2.0 - storage: - files: - - contents: - source: data:text/plain;charset=utf-8;base64,W2NyaW8ucnVudGltZS53b3JrbG9hZHMubWFuYWdlbWVudF0KYWN0aXZhdGlvbl9hbm5vdGF0aW9uID0gInRhcmdldC53b3JrbG9hZC5vcGVuc2hpZnQuaW8vbWFuYWdlbWVudCIKYW5ub3RhdGlvbl9wcmVmaXggPSAicmVzb3VyY2VzLndvcmtsb2FkLm9wZW5zaGlmdC5pbyIKcmVzb3VyY2VzID0geyAiY3B1c2hhcmVzIiA9IDAsICJjcHVzZXQiID0gIjAtMSw1Mi01MyIgfQo= - mode: 420 - overwrite: true - path: /etc/crio/crio.conf.d/01-workload-partitioning - user: - name: root - - contents: - source: data:text/plain;charset=utf-8;base64,ewogICJtYW5hZ2VtZW50IjogewogICAgImNwdXNldCI6ICIwLTEsNTItNTMiCiAgfQp9Cg== - mode: 420 - overwrite: true - path: /etc/kubernetes/openshift-workload-pinning - user: - name: root ----- diff --git a/snippets/ztp-04-accelerated-container-startup-master.adoc b/snippets/ztp-04-accelerated-container-startup-master.adoc deleted file mode 100644 index 81d28130af..0000000000 --- a/snippets/ztp-04-accelerated-container-startup-master.adoc +++ /dev/null @@ -1,88 +0,0 @@ -:_content-type: SNIPPET -.Recommended accelerated container startup configuration -[source,yaml] ----- -apiVersion: machineconfiguration.openshift.io/v1 -kind: MachineConfig -metadata: - labels: - machineconfiguration.openshift.io/role: master - name: 04-accelerated-container-startup-master -spec: - config: - ignition: - version: 3.2.0 - storage: - files: - - contents: - source: data:text/plain;charset=utf-8;base64,#!/bin/bash
#
# Temporarily reset the core system processes's CPU affinity to be unrestricted to accelerate startup and shutdown
#
# The defaults below can be overridden via environment variables
#

# The default set of critical processes whose affinity should be temporarily unbound:
CRITICAL_PROCESSES=${CRITICAL_PROCESSES:-"systemd ovs crio kubelet NetworkManager conmon dbus"}

# Default wait time is 600s = 10m:
MAXIMUM_WAIT_TIME=${MAXIMUM_WAIT_TIME:-600}

# Default steady-state threshold = 2%
# Allowed values:
#  4  - absolute pod count (+/-)
#  4% - percent change (+/-)
#  -1 - disable the steady-state check
STEADY_STATE_THRESHOLD=${STEADY_STATE_THRESHOLD:-2%}

# Default steady-state window = 60s
# If the running pod count stays within the given threshold for this time
# period, return CPU utilization to normal before the maximum wait time has
# expires
STEADY_STATE_WINDOW=${STEADY_STATE_WINDOW:-60}

# Default steady-state allows any pod count to be "steady state"
# Increasing this will skip any steady-state checks until the count rises above
# this number to avoid false positives if there are some periods where the
# count doesn't increase but we know we can't be at steady-state yet.
STEADY_STATE_MINIMUM=${STEADY_STATE_MINIMUM:-0}

#######################################################

KUBELET_CPU_STATE=/var/lib/kubelet/cpu_manager_state
FULL_CPU_STATE=/sys/fs/cgroup/cpuset/cpuset.cpus
unrestrictedCpuset() {
  local cpus
  if [[ -e $KUBELET_CPU_STATE ]]; then
      cpus=$(jq -r '.defaultCpuSet' <$KUBELET_CPU_STATE)
  fi
  if [[ -z $cpus ]]; then
    # fall back to using all cpus if the kubelet state is not configured yet
    [[ -e $FULL_CPU_STATE ]] || return 1
    cpus=$(<$FULL_CPU_STATE)
  fi
  echo $cpus
}

restrictedCpuset() {
  for arg in $(</proc/cmdline); do
    if [[ $arg =~ ^systemd.cpu_affinity= ]]; then
      echo ${arg#*=}
      return 0
    fi
  done
  return 1
}

getCPUCount () {
  local cpuset="$1"
  local cpulist=()
  local cpus=0
  local mincpus=2

  if [[ -z $cpuset || $cpuset =~ [^0-9,-] ]]; then
    echo $mincpus
    return 1
  fi

  IFS=',' read -ra cpulist <<< $cpuset

  for elm in "${cpulist[@]}"; do
    if [[ $elm =~ ^[0-9]+$ ]]; then
      (( cpus++ ))
    elif [[ $elm =~ ^[0-9]+-[0-9]+$ ]]; then
      local low=0 high=0
      IFS='-' read low high <<< $elm
      (( cpus += high - low + 1 ))
    else
      echo $mincpus
      return 1
    fi
  done

  # Return a minimum of 2 cpus
  echo $(( cpus > $mincpus ? cpus : $mincpus ))
  return 0
}

resetOVSthreads () {
  local cpucount="$1"
  local curRevalidators=0
  local curHandlers=0
  local desiredRevalidators=0
  local desiredHandlers=0
  local rc=0

  curRevalidators=$(ps -Teo pid,tid,comm,cmd | grep -e revalidator | grep -c ovs-vswitchd)
  curHandlers=$(ps -Teo pid,tid,comm,cmd | grep -e handler | grep -c ovs-vswitchd)

  # Calculate the desired number of threads the same way OVS does.
  # OVS will set these thread count as a one shot process on startup, so we
  # have to adjust up or down during the boot up process. The desired outcome is
  # to not restrict the number of thread at startup until we reach a steady
  # state.  At which point we need to reset these based on our restricted  set
  # of cores.
  # See OVS function that calculates these thread counts:
  # https://github.com/openvswitch/ovs/blob/master/ofproto/ofproto-dpif-upcall.c#L635
  (( desiredRevalidators=$cpucount / 4 + 1 ))
  (( desiredHandlers=$cpucount - $desiredRevalidators ))


  if [[ $curRevalidators -ne $desiredRevalidators || $curHandlers -ne $desiredHandlers ]]; then

    logger "Recovery: Re-setting OVS revalidator threads: ${curRevalidators} -> ${desiredRevalidators}"
    logger "Recovery: Re-setting OVS handler threads: ${curHandlers} -> ${desiredHandlers}"

    ovs-vsctl set \
      Open_vSwitch . \
      other-config:n-handler-threads=${desiredHandlers} \
      other-config:n-revalidator-threads=${desiredRevalidators}
    rc=$?
  fi

  return $rc
}

resetAffinity() {
  local cpuset="$1"
  local failcount=0
  local successcount=0
  logger "Recovery: Setting CPU affinity for critical processes \"$CRITICAL_PROCESSES\" to $cpuset"
  for proc in $CRITICAL_PROCESSES; do
    local pids="$(pgrep $proc)"
    for pid in $pids; do
      local tasksetOutput
      tasksetOutput="$(taskset -apc "$cpuset" $pid 2>&1)"
      if [[ $? -ne 0 ]]; then
        echo "ERROR: $tasksetOutput"
        ((failcount++))
      else
        ((successcount++))
      fi
    done
  done

  resetOVSthreads "$(getCPUCount ${cpuset})"
  if [[ $? -ne 0 ]]; then
    ((failcount++))
  else
    ((successcount++))
  fi

  logger "Recovery: Re-affined $successcount pids successfully"
  if [[ $failcount -gt 0 ]]; then
    logger "Recovery: Failed to re-affine $failcount processes"
    return 1
  fi
}

setUnrestricted() {
  logger "Recovery: Setting critical system processes to have unrestricted CPU access"
  resetAffinity "$(unrestrictedCpuset)"
}

setRestricted() {
  logger "Recovery: Resetting critical system processes back to normally restricted access"
  resetAffinity "$(restrictedCpuset)"
}

currentAffinity() {
  local pid="$1"
  taskset -pc $pid | awk -F': ' '{print $2}'
}

within() {
  local last=$1 current=$2 threshold=$3
  local delta=0 pchange
  delta=$(( current - last ))
  if [[ $current -eq $last ]]; then
    pchange=0
  elif [[ $last -eq 0 ]]; then
    pchange=1000000
  else
    pchange=$(( ( $delta * 100) / last ))
  fi
  echo -n "last:$last current:$current delta:$delta pchange:${pchange}%: "
  local absolute limit
  case $threshold in
    *%)
      absolute=${pchange##-} # absolute value
      limit=${threshold%%%}
      ;;
    *)
      absolute=${delta##-} # absolute value
      limit=$threshold
      ;;
  esac
  if [[ $absolute -le $limit ]]; then
    echo "within (+/-)$threshold"
    return 0
  else
    echo "outside (+/-)$threshold"
    return 1
  fi
}

steadystate() {
  local last=$1 current=$2
  if [[ $last -lt $STEADY_STATE_MINIMUM ]]; then
    echo "last:$last current:$current Waiting to reach $STEADY_STATE_MINIMUM before checking for steady-state"
    return 1
  fi
  within $last $current $STEADY_STATE_THRESHOLD
}

waitForReady() {
  logger "Recovery: Waiting ${MAXIMUM_WAIT_TIME}s for the initialization to complete"
  local lastSystemdCpuset="$(currentAffinity 1)"
  local lastDesiredCpuset="$(unrestrictedCpuset)"
  local t=0 s=10
  local lastCcount=0 ccount=0 steadyStateTime=0
  while [[ $t -lt $MAXIMUM_WAIT_TIME ]]; do
    sleep $s
    ((t += s))
    # Re-check the current affinity of systemd, in case some other process has changed it
    local systemdCpuset="$(currentAffinity 1)"
    # Re-check the unrestricted Cpuset, as the allowed set of unreserved cores may change as pods are assigned to cores
    local desiredCpuset="$(unrestrictedCpuset)"
    if [[ $systemdCpuset != $lastSystemdCpuset || $lastDesiredCpuset != $desiredCpuset ]]; then
      resetAffinity "$desiredCpuset"
      lastSystemdCpuset="$(currentAffinity 1)"
      lastDesiredCpuset="$desiredCpuset"
    fi

    # Detect steady-state pod count
    ccount=$(crictl ps | wc -l)
    if steadystate $lastCcount $ccount; then
      ((steadyStateTime += s))
      echo "Steady-state for ${steadyStateTime}s/${STEADY_STATE_WINDOW}s"
      if [[ $steadyStateTime -ge $STEADY_STATE_WINDOW ]]; then
        logger "Recovery: Steady-state (+/- $STEADY_STATE_THRESHOLD) for ${STEADY_STATE_WINDOW}s: Done"
        return 0
      fi
    else
      if [[ $steadyStateTime -gt 0 ]]; then
        echo "Resetting steady-state timer"
        steadyStateTime=0
      fi
    fi
    lastCcount=$ccount
  done
  logger "Recovery: Recovery Complete Timeout"
}

main() {
  if ! unrestrictedCpuset >&/dev/null; then
    logger "Recovery: No unrestricted Cpuset could be detected"
    return 1
  fi

  if ! restrictedCpuset >&/dev/null; then
    logger "Recovery: No restricted Cpuset has been configured.  We are already running unrestricted."
    return 0
  fi

  # Ensure we reset the CPU affinity when we exit this script for any reason
  # This way either after the timer expires or after the process is interrupted
  # via ^C or SIGTERM, we return things back to the way they should be.
  trap setRestricted EXIT

  logger "Recovery: Recovery Mode Starting"
  setUnrestricted
  waitForReady
}

if [[ "${BASH_SOURCE[0]}" = "${0}" ]]; then
  main "${@}"
  exit $?
fi
 - mode: 493 - path: /usr/local/bin/accelerated-container-startup.sh - systemd: - units: - - contents: | - [Unit] - Description=Unlocks more CPUs for critical system processes during container startup - - [Service] - Type=simple - ExecStart=/usr/local/bin/accelerated-container-startup.sh - - # Maximum wait time is 600s = 10m: - Environment=MAXIMUM_WAIT_TIME=600 - - # Steady-state threshold = 2% - # Allowed values: - # 4 - absolute pod count (+/-) - # 4% - percent change (+/-) - # -1 - disable the steady-state check - # Note: '%' must be escaped as '%%' in systemd unit files - Environment=STEADY_STATE_THRESHOLD=2%% - - # Steady-state window = 120s - # If the running pod count stays within the given threshold for this time - # period, return CPU utilization to normal before the maximum wait time has - # expires - Environment=STEADY_STATE_WINDOW=120 - - # Steady-state minimum = 40 - # Increasing this will skip any steady-state checks until the count rises above - # this number to avoid false positives if there are some periods where the - # count doesn't increase but we know we can't be at steady-state yet. - Environment=STEADY_STATE_MINIMUM=40 - - [Install] - WantedBy=multi-user.target - enabled: true - name: accelerated-container-startup.service - - contents: | - [Unit] - Description=Unlocks more CPUs for critical system processes during container shutdown - DefaultDependencies=no - - [Service] - Type=simple - ExecStart=/usr/local/bin/accelerated-container-startup.sh - - # Maximum wait time is 600s = 10m: - Environment=MAXIMUM_WAIT_TIME=600 - - # Steady-state threshold - # Allowed values: - # 4 - absolute pod count (+/-) - # 4% - percent change (+/-) - # -1 - disable the steady-state check - # Note: '%' must be escaped as '%%' in systemd unit files - Environment=STEADY_STATE_THRESHOLD=-1 - - # Steady-state window = 60s - # If the running pod count stays within the given threshold for this time - # period, return CPU utilization to normal before the maximum wait time has - # expires - Environment=STEADY_STATE_WINDOW=60 - - [Install] - WantedBy=shutdown.target reboot.target halt.target - enabled: true - name: accelerated-container-shutdown.service ----- diff --git a/snippets/ztp-cluster-logging.adoc b/snippets/ztp-cluster-logging.adoc deleted file mode 100644 index ff2dd0f804..0000000000 --- a/snippets/ztp-cluster-logging.adoc +++ /dev/null @@ -1,48 +0,0 @@ -:_content-type: SNIPPET -.Recommended cluster logging and log forwarding configuration -[source,yaml] ----- -apiVersion: logging.openshift.io/v1 -kind: ClusterLogging <1> -metadata: - name: instance - namespace: openshift-logging -spec: - collection: - logs: - fluentd: {} - type: fluentd - curation: - type: "curator" - curator: - schedule: "30 3 * * *" - managementState: Managed ---- -apiVersion: logging.openshift.io/v1 -kind: ClusterLogForwarder <2> -metadata: - name: instance - namespace: openshift-logging -spec: - inputs: - - infrastructure: {} - name: infra-logs - outputs: - - name: kafka-open - type: kafka - url: tcp://10.46.55.190:9092/test <3> - pipelines: - - inputRefs: - - audit - name: audit-logs - outputRefs: - - kafka-open - - inputRefs: - - infrastructure - name: infrastructure-logs - outputRefs: - - kafka-open ----- -<1> Updates the existing `ClusterLogging` instance or creates the instance if it does not exist. -<2> Updates the existing `ClusterLogForwarder` instance or creates the instance if it does not exist. -<3> Specifies the URL of the Kafka server where the logs are forwarded to. diff --git a/snippets/ztp-disable-console.adoc b/snippets/ztp-disable-console.adoc deleted file mode 100644 index 7147b1b6ac..0000000000 --- a/snippets/ztp-disable-console.adoc +++ /dev/null @@ -1,18 +0,0 @@ -:_content-type: SNIPPET -.Recommended console configuration -[source,yaml] ----- -apiVersion: operator.openshift.io/v1 -kind: Console -metadata: - annotations: - include.release.openshift.io/ibm-cloud-managed: "false" - include.release.openshift.io/self-managed-high-availability: "false" - include.release.openshift.io/single-node-developer: "false" - release.openshift.io/create-only: "true" - name: cluster -spec: - logLevel: Normal - managementState: Removed - operatorLogLevel: Normal ----- diff --git a/snippets/ztp-disable-ntp.adoc b/snippets/ztp-disable-ntp.adoc deleted file mode 100644 index d5743e8d05..0000000000 --- a/snippets/ztp-disable-ntp.adoc +++ /dev/null @@ -1,36 +0,0 @@ -:_content-type: SNIPPET -[source,yaml] ----- -apiVersion: machineconfiguration.openshift.io/v1 -kind: MachineConfig -metadata: - labels: - machineconfiguration.openshift.io/role: master - name: disable-chronyd -spec: - config: - systemd: - units: - - contents: | - [Unit] - Description=NTP client/server - Documentation=man:chronyd(8) man:chrony.conf(5) - After=ntpdate.service sntp.service ntpd.service - Conflicts=ntpd.service systemd-timesyncd.service - ConditionCapability=CAP_SYS_TIME - [Service] - Type=forking - PIDFile=/run/chrony/chronyd.pid - EnvironmentFile=-/etc/sysconfig/chronyd - ExecStart=/usr/sbin/chronyd $OPTIONS - ExecStartPost=/usr/libexec/chrony-helper update-daemon - PrivateTmp=yes - ProtectHome=yes - ProtectSystem=full - [Install] - WantedBy=multi-user.target - enabled: false - name: chronyd.service - ignition: - version: 2.2.0 ----- diff --git a/snippets/ztp-operator-groups-namespace.adoc b/snippets/ztp-operator-groups-namespace.adoc deleted file mode 100644 index 12e14132ee..0000000000 --- a/snippets/ztp-operator-groups-namespace.adoc +++ /dev/null @@ -1,70 +0,0 @@ -:_content-type: SNIPPET -.Recommended Operator Namespace and OperatorGroup configuration -[source,yaml] ----- -apiVersion: v1 -kind: Namespace -metadata: - annotations: - workload.openshift.io/allowed: management - name: openshift-local-storage ---- -apiVersion: operators.coreos.com/v1 -kind: OperatorGroup -metadata: - name: openshift-local-storage - namespace: openshift-local-storage -spec: - targetNamespaces: - - openshift-local-storage ---- -apiVersion: v1 -kind: Namespace -metadata: - annotations: - workload.openshift.io/allowed: management - name: openshift-logging ---- -apiVersion: operators.coreos.com/v1 -kind: OperatorGroup -metadata: - name: cluster-logging - namespace: openshift-logging -spec: - targetNamespaces: - - openshift-logging ---- -apiVersion: v1 -kind: Namespace -metadata: - annotations: - workload.openshift.io/allowed: management - labels: - openshift.io/cluster-monitoring: "true" - name: openshift-ptp ---- -apiVersion: operators.coreos.com/v1 -kind: OperatorGroup -metadata: - name: ptp-operators - namespace: openshift-ptp -spec: - targetNamespaces: - - openshift-ptp ---- -apiVersion: v1 -kind: Namespace -metadata: - annotations: - workload.openshift.io/allowed: management - name: openshift-sriov-network-operator ---- -apiVersion: operators.coreos.com/v1 -kind: OperatorGroup -metadata: - name: sriov-network-operators - namespace: openshift-sriov-network-operator -spec: - targetNamespaces: - - openshift-sriov-network-operator ----- diff --git a/snippets/ztp-operator-subs.adoc b/snippets/ztp-operator-subs.adoc deleted file mode 100644 index bd5e4c65be..0000000000 --- a/snippets/ztp-operator-subs.adoc +++ /dev/null @@ -1,55 +0,0 @@ -:_content-type: SNIPPET -.Recommended Operator subscriptions -[source,yaml] ----- -apiVersion: operators.coreos.com/v1alpha1 -kind: Subscription -metadata: - name: cluster-logging - namespace: openshift-logging -spec: - channel: "stable" <1> - name: cluster-logging - source: redhat-operators - sourceNamespace: openshift-marketplace - installPlanApproval: Manual <2> ---- -apiVersion: operators.coreos.com/v1alpha1 -kind: Subscription -metadata: - name: local-storage-operator - namespace: openshift-local-storage -spec: - channel: "stable" - installPlanApproval: Automatic - name: local-storage-operator - source: redhat-operators - sourceNamespace: openshift-marketplace - installPlanApproval: Manual ---- -apiVersion: operators.coreos.com/v1alpha1 -kind: Subscription -metadata: - name: ptp-operator-subscription - namespace: openshift-ptp -spec: - channel: "stable" - name: ptp-operator - source: redhat-operators - sourceNamespace: openshift-marketplace - installPlanApproval: Manual ---- -apiVersion: operators.coreos.com/v1alpha1 -kind: Subscription -metadata: - name: sriov-network-operator-subscription - namespace: openshift-sriov-network-operator -spec: - channel: "stable" - name: sriov-network-operator - source: redhat-operators - sourceNamespace: openshift-marketplace - installPlanApproval: Manual ----- -<1> Specify the channel to get the Operator from. `stable` is the recommended channel. -<2> Specify `Manual` or `Automatic`. In `Automatic` mode, the Operator automatically updates to the latest versions in the channel as they become available in the registry. In `Manual` mode, new Operator versions are installed only after they are explicitly approved. diff --git a/snippets/ztp-performance-profile.adoc b/snippets/ztp-performance-profile.adoc deleted file mode 100644 index 3f31031c6e..0000000000 --- a/snippets/ztp-performance-profile.adoc +++ /dev/null @@ -1,43 +0,0 @@ -:_content-type: SNIPPET -.Recommended performance profile configuration -[source,yaml] ----- -apiVersion: performance.openshift.io/v2 -kind: PerformanceProfile -metadata: - name: openshift-node-performance-profile <1> -spec: - additionalKernelArgs: - - "rcupdate.rcu_normal_after_boot=0" - - "efi=runtime" <2> - cpu: - isolated: 2-51,54-103 <3> - reserved: 0-1,52-53 <4> - hugepages: - defaultHugepagesSize: 1G - pages: - - count: 32 <5> - size: 1G <6> - node: 0 <7> - machineConfigPoolSelector: - pools.operator.machineconfiguration.openshift.io/master: "" - nodeSelector: - node-role.kubernetes.io/master: "" - numa: - topologyPolicy: "restricted" - realTimeKernel: - enabled: true <8> ----- -<1> Ensure that the value for `name` matches that specified in the `spec.profile.data` field of `TunedPerformancePatch.yaml` and the `status.configuration.source.name` field of `validatorCRs/informDuValidator.yaml`. -<2> Configures UEFI secure boot for the cluster host. -<3> Set the isolated CPUs. Ensure all of the Hyper-Threading pairs match. -+ -[IMPORTANT] -==== -The reserved and isolated CPU pools must not overlap and together must span all available cores. CPU cores that are not accounted for cause an undefined behaviour in the system. -==== -<4> Set the reserved CPUs. When workload partitioning is enabled, system processes, kernel threads, and system container threads are restricted to these CPUs. All CPUs that are not isolated should be reserved. -<5> Set the number of huge pages. -<6> Set the huge page size. -<7> Set `node` to the NUMA node where the `hugepages` are allocated. -<8> Set `enabled` to `true` to install the real-time Linux kernel. diff --git a/snippets/ztp-ptp-config.adoc b/snippets/ztp-ptp-config.adoc deleted file mode 100644 index 868f7f1fb5..0000000000 --- a/snippets/ztp-ptp-config.adoc +++ /dev/null @@ -1,123 +0,0 @@ -:_content-type: SNIPPET -.Recommended PTP configuration -[source,yaml] ----- -apiVersion: ptp.openshift.io/v1 -kind: PtpConfig -metadata: - name: du-ptp-slave - namespace: openshift-ptp -spec: - profile: - - interface: ens5f0 <1> - name: slave - phc2sysOpts: -a -r -n 24 - ptp4lConf: | - [global] - # - # Default Data Set - # - twoStepFlag 1 - slaveOnly 0 - priority1 128 - priority2 128 - domainNumber 24 - #utc_offset 37 - clockClass 248 - clockAccuracy 0xFE - offsetScaledLogVariance 0xFFFF - free_running 0 - freq_est_interval 1 - dscp_event 0 - dscp_general 0 - dataset_comparison ieee1588 - G.8275.defaultDS.localPriority 128 - # - # Port Data Set - # - logAnnounceInterval -3 - logSyncInterval -4 - logMinDelayReqInterval -4 - logMinPdelayReqInterval -4 - announceReceiptTimeout 3 - syncReceiptTimeout 0 - delayAsymmetry 0 - fault_reset_interval 4 - neighborPropDelayThresh 20000000 - masterOnly 0 - G.8275.portDS.localPriority 128 - # - # Run time options - # - assume_two_step 0 - logging_level 6 - path_trace_enabled 0 - follow_up_info 0 - hybrid_e2e 0 - inhibit_multicast_service 0 - net_sync_monitor 0 - tc_spanning_tree 0 - tx_timestamp_timeout 1 - unicast_listen 0 - unicast_master_table 0 - unicast_req_duration 3600 - use_syslog 1 - verbose 0 - summary_interval 0 - kernel_leap 1 - check_fup_sync 0 - # - # Servo Options - # - pi_proportional_const 0.0 - pi_integral_const 0.0 - pi_proportional_scale 0.0 - pi_proportional_exponent -0.3 - pi_proportional_norm_max 0.7 - pi_integral_scale 0.0 - pi_integral_exponent 0.4 - pi_integral_norm_max 0.3 - step_threshold 2.0 - first_step_threshold 0.00002 - max_frequency 900000000 - clock_servo pi - sanity_freq_limit 200000000 - ntpshm_segment 0 - # - # Transport options - # - transportSpecific 0x0 - ptp_dst_mac 01:1B:19:00:00:00 - p2p_dst_mac 01:80:C2:00:00:0E - udp_ttl 1 - udp6_scope 0x0E - uds_address /var/run/ptp4l - # - # Default interface options - # - clock_type OC - network_transport L2 - delay_mechanism E2E - time_stamping hardware - tsproc_mode filter - delay_filter moving_median - delay_filter_length 10 - egressLatency 0 - ingressLatency 0 - boundary_clock_jbod 0 - # - # Clock description - # - productDescription ;; - revisionData ;; - manufacturerIdentity 00:00:00 - userDescription ; - timeSource 0xA0 - ptp4lOpts: -2 -s --summary_interval -4 -recommend: - - match: - - nodeLabel: node-role.kubernetes.io/master - priority: 4 - profile: slave ----- -<1> Sets the interface used to receive the PTP clock signal. diff --git a/snippets/ztp-sriov-du-config.adoc b/snippets/ztp-sriov-du-config.adoc deleted file mode 100644 index a72c68dbe5..0000000000 --- a/snippets/ztp-sriov-du-config.adoc +++ /dev/null @@ -1,78 +0,0 @@ -:_content-type: SNIPPET -.Recommended SR-IOV configuration -[source,yaml] ----- -apiVersion: sriovnetwork.openshift.io/v1 -kind: SriovOperatorConfig -metadata: - name: default - namespace: openshift-sriov-network-operator -spec: - configDaemonNodeSelector: - node-role.kubernetes.io/master: "" - disableDrain: true - enableInjector: true - enableOperatorWebhook: true ---- -apiVersion: sriovnetwork.openshift.io/v1 -kind: SriovNetwork -metadata: - name: sriov-nw-du-mh - namespace: openshift-sriov-network-operator -spec: - networkNamespace: openshift-sriov-network-operator - resourceName: du_mh - vlan: 150 <1> ---- -apiVersion: sriovnetwork.openshift.io/v1 -kind: SriovNetworkNodePolicy -metadata: - name: sriov-nnp-du-mh - namespace: openshift-sriov-network-operator -spec: - deviceType: vfio-pci <2> - isRdma: false - nicSelector: - pfNames: - - ens7f0 <3> - nodeSelector: - node-role.kubernetes.io/master: "" - numVfs: 8 <4> - priority: 10 - resourceName: du_mh ---- -apiVersion: sriovnetwork.openshift.io/v1 -kind: SriovNetwork -metadata: - name: sriov-nw-du-fh - namespace: openshift-sriov-network-operator -spec: - networkNamespace: openshift-sriov-network-operator - resourceName: du_fh - vlan: 140 <5> ---- -apiVersion: sriovnetwork.openshift.io/v1 -kind: SriovNetworkNodePolicy -metadata: - name: sriov-nnp-du-fh - namespace: openshift-sriov-network-operator -spec: - deviceType: netdevice <6> - isRdma: true - nicSelector: - pfNames: - - ens5f0 <7> - nodeSelector: - node-role.kubernetes.io/master: "" - numVfs: 8 <8> - priority: 10 - resourceName: du_fh ----- -<1> Specifies the VLAN for the midhaul network. -<2> Select either `vfio-pci` or `netdevice`, as needed. -<3> Specifies the interface connected to the midhaul network. -<4> Specifies the number of VFs for the midhaul network. -<5> The VLAN for the fronthaul network. -<6> Select either `vfio-pci` or `netdevice`, as needed. -<7> Specifies the interface connected to the fronthaul network. -<8> Specifies the number of VFs for the fronthaul network. diff --git a/snippets/ztp-container-mount-namespace-and-kubelet-conf-master.adoc b/snippets/ztp_01-container-mount-ns-and-kubelet-conf-master.yaml similarity index 97% rename from snippets/ztp-container-mount-namespace-and-kubelet-conf-master.adoc rename to snippets/ztp_01-container-mount-ns-and-kubelet-conf-master.yaml index 5f91ce477f..88f2ad2184 100644 --- a/snippets/ztp-container-mount-namespace-and-kubelet-conf-master.adoc +++ b/snippets/ztp_01-container-mount-ns-and-kubelet-conf-master.yaml @@ -1,7 +1,5 @@ -:_content-type: SNIPPET -.Recommended container mount namespace configuration -[source,yaml] ----- +# Automatically generated by extra-manifests-builder +# Do not make changes directly. apiVersion: machineconfiguration.openshift.io/v1 kind: MachineConfig metadata: @@ -38,7 +36,6 @@ spec: ExecStartPre=touch ${BIND_POINT} ExecStart=unshare --mount=${BIND_POINT} --propagation slave mount --make-rshared / ExecStop=umount -R ${RUNTIME_DIRECTORY} - enabled: true name: container-mount-namespace.service - dropins: - contents: | @@ -73,4 +70,3 @@ spec: Environment="OPENSHIFT_EVICTION_MONITORING_PERIOD_DURATION=30s" name: 30-kubelet-interval-tuning.conf name: kubelet.service ----- diff --git a/snippets/ztp-load-sctp-module.adoc b/snippets/ztp_03-sctp-machine-config-master.yaml similarity index 61% rename from snippets/ztp-load-sctp-module.adoc rename to snippets/ztp_03-sctp-machine-config-master.yaml index 802c376c53..34ef791a31 100644 --- a/snippets/ztp-load-sctp-module.adoc +++ b/snippets/ztp_03-sctp-machine-config-master.yaml @@ -1,13 +1,11 @@ -:_content-type: SNIPPET -.Recommended SCTP configuration -[source,yaml] ----- +# Automatically generated by extra-manifests-builder +# Do not make changes directly. apiVersion: machineconfiguration.openshift.io/v1 kind: MachineConfig metadata: labels: machineconfiguration.openshift.io/role: master - name: load-sctp-module + name: load-sctp-module-master spec: config: ignition: @@ -18,11 +16,10 @@ spec: source: data:, verification: {} filesystem: root - mode: 420 - path: /etc/modprobe.d/sctp-blacklist.conf + mode: 420 + path: /etc/modprobe.d/sctp-blacklist.conf - contents: source: data:text/plain;charset=utf-8,sctp filesystem: root - mode: 420 - path: /etc/modules-load.d/sctp-load.conf ----- + mode: 420 + path: /etc/modules-load.d/sctp-load.conf diff --git a/snippets/ztp_04-accelerated-container-startup-master.yaml b/snippets/ztp_04-accelerated-container-startup-master.yaml new file mode 100644 index 0000000000..355c61f7af --- /dev/null +++ b/snippets/ztp_04-accelerated-container-startup-master.yaml @@ -0,0 +1,85 @@ +# Automatically generated by extra-manifests-builder +# Do not make changes directly. +apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfig +metadata: + labels: + machineconfiguration.openshift.io/role: master + name: 04-accelerated-container-startup-master +spec: + config: + ignition: + version: 3.2.0 + storage: + files: + - contents: + source: data:text/plain;charset=utf-8;base64,#!/bin/bash
#
# Temporarily reset the core system processes's CPU affinity to be unrestricted to accelerate startup and shutdown
#
# The defaults below can be overridden via environment variables
#

# The default set of critical processes whose affinity should be temporarily unbound:
CRITICAL_PROCESSES=${CRITICAL_PROCESSES:-"crio kubelet NetworkManager conmon dbus"}

# Default wait time is 600s = 10m:
MAXIMUM_WAIT_TIME=${MAXIMUM_WAIT_TIME:-600}

# Default steady-state threshold = 2%
# Allowed values:
#  4  - absolute pod count (+/-)
#  4% - percent change (+/-)
#  -1 - disable the steady-state check
STEADY_STATE_THRESHOLD=${STEADY_STATE_THRESHOLD:-2%}

# Default steady-state window = 60s
# If the running pod count stays within the given threshold for this time
# period, return CPU utilization to normal before the maximum wait time has
# expires
STEADY_STATE_WINDOW=${STEADY_STATE_WINDOW:-60}

# Default steady-state allows any pod count to be "steady state"
# Increasing this will skip any steady-state checks until the count rises above
# this number to avoid false positives if there are some periods where the
# count doesn't increase but we know we can't be at steady-state yet.
STEADY_STATE_MINIMUM=${STEADY_STATE_MINIMUM:-0}

#######################################################

KUBELET_CPU_STATE=/var/lib/kubelet/cpu_manager_state
FULL_CPU_STATE=/sys/fs/cgroup/cpuset/cpuset.cpus
KUBELET_CONF=/etc/kubernetes/kubelet.conf
unrestrictedCpuset() {
  local cpus
  if [[ -e $KUBELET_CPU_STATE ]]; then
    cpus=$(jq -r '.defaultCpuSet' <$KUBELET_CPU_STATE)
    if [[ -n "${cpus}" && -e ${KUBELET_CONF} ]]; then
      reserved_cpus=$(jq -r '.reservedSystemCPUs' </etc/kubernetes/kubelet.conf)
      if [[ -n "${reserved_cpus}" ]]; then
        # Use taskset to merge the two cpusets
        cpus=$(taskset -c "${reserved_cpus},${cpus}" grep -i Cpus_allowed_list /proc/self/status | awk '{print $2}')
      fi
    fi
  fi
  if [[ -z $cpus ]]; then
    # fall back to using all cpus if the kubelet state is not configured yet
    [[ -e $FULL_CPU_STATE ]] || return 1
    cpus=$(<$FULL_CPU_STATE)
  fi
  echo $cpus
}

restrictedCpuset() {
  for arg in $(</proc/cmdline); do
    if [[ $arg =~ ^systemd.cpu_affinity= ]]; then
      echo ${arg#*=}
      return 0
    fi
  done
  return 1
}

resetAffinity() {
  local cpuset="$1"
  local failcount=0
  local successcount=0
  logger "Recovery: Setting CPU affinity for critical processes \"$CRITICAL_PROCESSES\" to $cpuset"
  for proc in $CRITICAL_PROCESSES; do
    local pids="$(pgrep $proc)"
    for pid in $pids; do
      local tasksetOutput
      tasksetOutput="$(taskset -apc "$cpuset" $pid 2>&1)"
      if [[ $? -ne 0 ]]; then
        echo "ERROR: $tasksetOutput"
        ((failcount++))
      else
        ((successcount++))
      fi
    done
  done

  logger "Recovery: Re-affined $successcount pids successfully"
  if [[ $failcount -gt 0 ]]; then
    logger "Recovery: Failed to re-affine $failcount processes"
    return 1
  fi
}

setUnrestricted() {
  logger "Recovery: Setting critical system processes to have unrestricted CPU access"
  resetAffinity "$(unrestrictedCpuset)"
}

setRestricted() {
  logger "Recovery: Resetting critical system processes back to normally restricted access"
  resetAffinity "$(restrictedCpuset)"
}

currentAffinity() {
  local pid="$1"
  taskset -pc $pid | awk -F': ' '{print $2}'
}

within() {
  local last=$1 current=$2 threshold=$3
  local delta=0 pchange
  delta=$(( current - last ))
  if [[ $current -eq $last ]]; then
    pchange=0
  elif [[ $last -eq 0 ]]; then
    pchange=1000000
  else
    pchange=$(( ( $delta * 100) / last ))
  fi
  echo -n "last:$last current:$current delta:$delta pchange:${pchange}%: "
  local absolute limit
  case $threshold in
    *%)
      absolute=${pchange##-} # absolute value
      limit=${threshold%%%}
      ;;
    *)
      absolute=${delta##-} # absolute value
      limit=$threshold
      ;;
  esac
  if [[ $absolute -le $limit ]]; then
    echo "within (+/-)$threshold"
    return 0
  else
    echo "outside (+/-)$threshold"
    return 1
  fi
}

steadystate() {
  local last=$1 current=$2
  if [[ $last -lt $STEADY_STATE_MINIMUM ]]; then
    echo "last:$last current:$current Waiting to reach $STEADY_STATE_MINIMUM before checking for steady-state"
    return 1
  fi
  within $last $current $STEADY_STATE_THRESHOLD
}

waitForReady() {
  logger "Recovery: Waiting ${MAXIMUM_WAIT_TIME}s for the initialization to complete"
  local lastSystemdCpuset="$(currentAffinity 1)"
  local lastDesiredCpuset="$(unrestrictedCpuset)"
  local t=0 s=10
  local lastCcount=0 ccount=0 steadyStateTime=0
  while [[ $t -lt $MAXIMUM_WAIT_TIME ]]; do
    sleep $s
    ((t += s))
    # Re-check the current affinity of systemd, in case some other process has changed it
    local systemdCpuset="$(currentAffinity 1)"
    # Re-check the unrestricted Cpuset, as the allowed set of unreserved cores may change as pods are assigned to cores
    local desiredCpuset="$(unrestrictedCpuset)"
    if [[ $systemdCpuset != $lastSystemdCpuset || $lastDesiredCpuset != $desiredCpuset ]]; then
      resetAffinity "$desiredCpuset"
      lastSystemdCpuset="$(currentAffinity 1)"
      lastDesiredCpuset="$desiredCpuset"
    fi

    # Detect steady-state pod count
    ccount=$(crictl ps | wc -l)
    if steadystate $lastCcount $ccount; then
      ((steadyStateTime += s))
      echo "Steady-state for ${steadyStateTime}s/${STEADY_STATE_WINDOW}s"
      if [[ $steadyStateTime -ge $STEADY_STATE_WINDOW ]]; then
        logger "Recovery: Steady-state (+/- $STEADY_STATE_THRESHOLD) for ${STEADY_STATE_WINDOW}s: Done"
        return 0
      fi
    else
      if [[ $steadyStateTime -gt 0 ]]; then
        echo "Resetting steady-state timer"
        steadyStateTime=0
      fi
    fi
    lastCcount=$ccount
  done
  logger "Recovery: Recovery Complete Timeout"
}

main() {
  if ! unrestrictedCpuset >&/dev/null; then
    logger "Recovery: No unrestricted Cpuset could be detected"
    return 1
  fi

  if ! restrictedCpuset >&/dev/null; then
    logger "Recovery: No restricted Cpuset has been configured.  We are already running unrestricted."
    return 0
  fi

  # Ensure we reset the CPU affinity when we exit this script for any reason
  # This way either after the timer expires or after the process is interrupted
  # via ^C or SIGTERM, we return things back to the way they should be.
  trap setRestricted EXIT

  logger "Recovery: Recovery Mode Starting"
  setUnrestricted
  waitForReady
}

if [[ "${BASH_SOURCE[0]}" = "${0}" ]]; then
  main "${@}"
  exit $?
fi
 + mode: 493 + path: /usr/local/bin/accelerated-container-startup.sh + systemd: + units: + - contents: | + [Unit] + Description=Unlocks more CPUs for critical system processes during container startup + + [Service] + Type=simple + ExecStart=/usr/local/bin/accelerated-container-startup.sh + + # Maximum wait time is 600s = 10m: + Environment=MAXIMUM_WAIT_TIME=600 + + # Steady-state threshold = 2% + # Allowed values: + # 4 - absolute pod count (+/-) + # 4% - percent change (+/-) + # -1 - disable the steady-state check + # Note: '%' must be escaped as '%%' in systemd unit files + Environment=STEADY_STATE_THRESHOLD=2%% + + # Steady-state window = 120s + # If the running pod count stays within the given threshold for this time + # period, return CPU utilization to normal before the maximum wait time has + # expires + Environment=STEADY_STATE_WINDOW=120 + + # Steady-state minimum = 40 + # Increasing this will skip any steady-state checks until the count rises above + # this number to avoid false positives if there are some periods where the + # count doesn't increase but we know we can't be at steady-state yet. + Environment=STEADY_STATE_MINIMUM=40 + + [Install] + WantedBy=multi-user.target + enabled: true + name: accelerated-container-startup.service + - contents: | + [Unit] + Description=Unlocks more CPUs for critical system processes during container shutdown + DefaultDependencies=no + + [Service] + Type=simple + ExecStart=/usr/local/bin/accelerated-container-startup.sh + + # Maximum wait time is 600s = 10m: + Environment=MAXIMUM_WAIT_TIME=600 + + # Steady-state threshold + # Allowed values: + # 4 - absolute pod count (+/-) + # 4% - percent change (+/-) + # -1 - disable the steady-state check + # Note: '%' must be escaped as '%%' in systemd unit files + Environment=STEADY_STATE_THRESHOLD=-1 + + # Steady-state window = 60s + # If the running pod count stays within the given threshold for this time + # period, return CPU utilization to normal before the maximum wait time has + # expires + Environment=STEADY_STATE_WINDOW=60 + + [Install] + WantedBy=shutdown.target reboot.target halt.target + enabled: true + name: accelerated-container-shutdown.service diff --git a/snippets/ztp_05-kdump-config-master.yaml b/snippets/ztp_05-kdump-config-master.yaml new file mode 100644 index 0000000000..dbe1c46efd --- /dev/null +++ b/snippets/ztp_05-kdump-config-master.yaml @@ -0,0 +1,32 @@ +# Automatically generated by extra-manifests-builder +# Do not make changes directly. +apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfig +metadata: + labels: + machineconfiguration.openshift.io/role: master + name: 05-kdump-config-master +spec: + config: + ignition: + version: 3.2.0 + systemd: + units: + - enabled: true + name: kdump-remove-ice-module.service + contents: | + [Unit] + Description=Remove ice module when doing kdump + Before=kdump.service + [Service] + Type=oneshot + RemainAfterExit=true + ExecStart=/usr/local/bin/kdump-remove-ice-module.sh + [Install] + WantedBy=multi-user.target + storage: + files: + - contents: + source: data:text/plain;charset=utf-8;base64,IyEvdXNyL2Jpbi9lbnYgYmFzaAoKIyBUaGlzIHNjcmlwdCByZW1vdmVzIHRoZSBpY2UgbW9kdWxlIGZyb20ga2R1bXAgdG8gcHJldmVudCBrZHVtcCBmYWlsdXJlcyBvbiBjZXJ0YWluIHNlcnZlcnMuCiMgVGhpcyBpcyBhIHRlbXBvcmFyeSB3b3JrYXJvdW5kIGZvciBSSEVMUExBTi0xMzgyMzYgYW5kIGNhbiBiZSByZW1vdmVkIHdoZW4gdGhhdCBpc3N1ZSBpcwojIGZpeGVkLgoKc2V0IC14CgpTRUQ9Ii91c3IvYmluL3NlZCIKR1JFUD0iL3Vzci9iaW4vZ3JlcCIKCiMgb3ZlcnJpZGUgZm9yIHRlc3RpbmcgcHVycG9zZXMKS0RVTVBfQ09ORj0iJHsxOi0vZXRjL3N5c2NvbmZpZy9rZHVtcH0iClJFTU9WRV9JQ0VfU1RSPSJtb2R1bGVfYmxhY2tsaXN0PWljZSIKCiMgZXhpdCBpZiBmaWxlIGRvZXNuJ3QgZXhpc3QKWyAhIC1mICR7S0RVTVBfQ09ORn0gXSAmJiBleGl0IDAKCiMgZXhpdCBpZiBmaWxlIGFscmVhZHkgdXBkYXRlZAoke0dSRVB9IC1GcSAke1JFTU9WRV9JQ0VfU1RSfSAke0tEVU1QX0NPTkZ9ICYmIGV4aXQgMAoKIyBUYXJnZXQgbGluZSBsb29rcyBzb21ldGhpbmcgbGlrZSB0aGlzOgojIEtEVU1QX0NPTU1BTkRMSU5FX0FQUEVORD0iaXJxcG9sbCBucl9jcHVzPTEgLi4uIGhlc3RfZGlzYWJsZSIKIyBVc2Ugc2VkIHRvIG1hdGNoIGV2ZXJ5dGhpbmcgYmV0d2VlbiB0aGUgcXVvdGVzIGFuZCBhcHBlbmQgdGhlIFJFTU9WRV9JQ0VfU1RSIHRvIGl0CiR7U0VEfSAtaSAncy9eS0RVTVBfQ09NTUFORExJTkVfQVBQRU5EPSJbXiJdKi8mICcke1JFTU9WRV9JQ0VfU1RSfScvJyAke0tEVU1QX0NPTkZ9IHx8IGV4aXQgMAo= + mode: 448 + path: /usr/local/bin/kdump-remove-ice-module.sh diff --git a/snippets/ztp-06-kdump-enable-master.adoc b/snippets/ztp_06-kdump-master.yaml similarity index 80% rename from snippets/ztp-06-kdump-enable-master.adoc rename to snippets/ztp_06-kdump-master.yaml index 954a70d7a8..62a3b68749 100644 --- a/snippets/ztp-06-kdump-enable-master.adoc +++ b/snippets/ztp_06-kdump-master.yaml @@ -1,7 +1,5 @@ -:_content-type: SNIPPET -.Recommended kdump configuration -[source,yaml] ----- +# Automatically generated by extra-manifests-builder +# Do not make changes directly. apiVersion: machineconfiguration.openshift.io/v1 kind: MachineConfig metadata: @@ -18,4 +16,3 @@ spec: name: kdump.service kernelArguments: - crashkernel=512M ----- diff --git a/snippets/ztp_99-crio-disable-wipe-master.yaml b/snippets/ztp_99-crio-disable-wipe-master.yaml new file mode 100644 index 0000000000..34b036de44 --- /dev/null +++ b/snippets/ztp_99-crio-disable-wipe-master.yaml @@ -0,0 +1,18 @@ +# Automatically generated by extra-manifests-builder +# Do not make changes directly. +apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfig +metadata: + labels: + machineconfiguration.openshift.io/role: master + name: 99-crio-disable-wipe-master +spec: + config: + ignition: + version: 3.2.0 + storage: + files: + - contents: + source: data:text/plain;charset=utf-8;base64,W2NyaW9dCmNsZWFuX3NodXRkb3duX2ZpbGUgPSAiIgo= + mode: 420 + path: /etc/crio/crio.conf.d/99-crio-disable-wipe.toml diff --git a/snippets/ztp_99-crio-disable-wipe-worker.yaml b/snippets/ztp_99-crio-disable-wipe-worker.yaml new file mode 100644 index 0000000000..fc42063961 --- /dev/null +++ b/snippets/ztp_99-crio-disable-wipe-worker.yaml @@ -0,0 +1,18 @@ +# Automatically generated by extra-manifests-builder +# Do not make changes directly. +apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfig +metadata: + labels: + machineconfiguration.openshift.io/role: worker + name: 99-crio-disable-wipe-worker +spec: + config: + ignition: + version: 3.2.0 + storage: + files: + - contents: + source: data:text/plain;charset=utf-8;base64,W2NyaW9dCmNsZWFuX3NodXRkb3duX2ZpbGUgPSAiIgo= + mode: 420 + path: /etc/crio/crio.conf.d/99-crio-disable-wipe.toml diff --git a/snippets/ztp_ClusterLogForwarder.yaml b/snippets/ztp_ClusterLogForwarder.yaml new file mode 100644 index 0000000000..512452ede6 --- /dev/null +++ b/snippets/ztp_ClusterLogForwarder.yaml @@ -0,0 +1,24 @@ +apiVersion: "logging.openshift.io/v1" +kind: ClusterLogForwarder +metadata: + name: instance + namespace: openshift-logging +spec: + outputs: + - type: "kafka" + name: kafka-open + url: tcp://10.46.55.190:9092/test + inputs: + - name: infra-logs + infrastructure: {} + pipelines: + - name: audit-logs + inputRefs: + - audit + outputRefs: + - kafka-open + - name: infrastructure-logs + inputRefs: + - infrastructure + outputRefs: + - kafka-open diff --git a/snippets/ztp_ClusterLogNS.yaml b/snippets/ztp_ClusterLogNS.yaml new file mode 100644 index 0000000000..1fcd5d63d7 --- /dev/null +++ b/snippets/ztp_ClusterLogNS.yaml @@ -0,0 +1,7 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: openshift-logging + annotations: + workload.openshift.io/allowed: management diff --git a/snippets/ztp_ClusterLogOperGroup.yaml b/snippets/ztp_ClusterLogOperGroup.yaml new file mode 100644 index 0000000000..5626035b7b --- /dev/null +++ b/snippets/ztp_ClusterLogOperGroup.yaml @@ -0,0 +1,9 @@ +--- +apiVersion: operators.coreos.com/v1 +kind: OperatorGroup +metadata: + name: cluster-logging + namespace: openshift-logging +spec: + targetNamespaces: + - openshift-logging diff --git a/snippets/ztp_ClusterLogSubscription.yaml b/snippets/ztp_ClusterLogSubscription.yaml new file mode 100644 index 0000000000..b38dd61555 --- /dev/null +++ b/snippets/ztp_ClusterLogSubscription.yaml @@ -0,0 +1,13 @@ +apiVersion: operators.coreos.com/v1alpha1 +kind: Subscription +metadata: + name: cluster-logging + namespace: openshift-logging +spec: + channel: "stable" + name: cluster-logging + source: redhat-operators + sourceNamespace: openshift-marketplace + installPlanApproval: Manual +status: + state: AtLatestKnown diff --git a/snippets/ztp_ClusterLogging.yaml b/snippets/ztp_ClusterLogging.yaml new file mode 100644 index 0000000000..8924ed155e --- /dev/null +++ b/snippets/ztp_ClusterLogging.yaml @@ -0,0 +1,15 @@ +apiVersion: logging.openshift.io/v1 +kind: ClusterLogging +metadata: + name: instance + namespace: openshift-logging +spec: + managementState: "Managed" + curation: + type: "curator" + curator: + schedule: "30 3 * * *" + collection: + logs: + type: "fluentd" + fluentd: {} diff --git a/snippets/ztp-network-diagnostics.adoc b/snippets/ztp_DisableSnoNetworkDiag.yaml similarity index 55% rename from snippets/ztp-network-diagnostics.adoc rename to snippets/ztp_DisableSnoNetworkDiag.yaml index 9d77430bf2..2152d3bae4 100644 --- a/snippets/ztp-network-diagnostics.adoc +++ b/snippets/ztp_DisableSnoNetworkDiag.yaml @@ -1,11 +1,6 @@ -:_content-type: SNIPPET -.Recommended network diagnostics configuration -[source,yaml] ----- apiVersion: operator.openshift.io/v1 kind: Network metadata: name: cluster spec: disableNetworkDiagnostics: true ----- diff --git a/snippets/ztp_PerformanceProfile.yaml b/snippets/ztp_PerformanceProfile.yaml new file mode 100644 index 0000000000..642a1eea27 --- /dev/null +++ b/snippets/ztp_PerformanceProfile.yaml @@ -0,0 +1,30 @@ +apiVersion: performance.openshift.io/v2 +kind: PerformanceProfile +metadata: + name: openshift-node-performance-profile +spec: + additionalKernelArgs: + - "rcupdate.rcu_normal_after_boot=0" + - "efi=runtime" + - "module_blacklist=irdma" + cpu: + isolated: 2-51,54-103 + reserved: 0-1,52-53 + hugepages: + defaultHugepagesSize: 1G + pages: + - count: 32 + size: 1G + node: 0 + machineConfigPoolSelector: + pools.operator.machineconfiguration.openshift.io/master: "" + nodeSelector: + node-role.kubernetes.io/master: '' + numa: + topologyPolicy: "restricted" + realTimeKernel: + enabled: true + workloadHints: + realTime: true + highPowerConsumption: false + perPodPowerManagement: false diff --git a/snippets/ztp_PtpConfigSlave.yaml b/snippets/ztp_PtpConfigSlave.yaml new file mode 100644 index 0000000000..f0713680c7 --- /dev/null +++ b/snippets/ztp_PtpConfigSlave.yaml @@ -0,0 +1,122 @@ +apiVersion: ptp.openshift.io/v1 +kind: PtpConfig +metadata: + name: slave + namespace: openshift-ptp +spec: + profile: + - name: "slave" + # The interface name is hardware-specific + interface: ens5f0 + ptp4lOpts: "-2 -s" + phc2sysOpts: "-a -r -n 24" + ptpSchedulingPolicy: SCHED_FIFO + ptpSchedulingPriority: 10 + ptpSettings: + logReduce: "true" + ptp4lConf: | + [global] + # + # Default Data Set + # + twoStepFlag 1 + slaveOnly 0 + priority1 128 + priority2 128 + domainNumber 24 + #utc_offset 37 + clockClass 255 + clockAccuracy 0xFE + offsetScaledLogVariance 0xFFFF + free_running 0 + freq_est_interval 1 + dscp_event 0 + dscp_general 0 + dataset_comparison G.8275.x + G.8275.defaultDS.localPriority 128 + # + # Port Data Set + # + logAnnounceInterval -3 + logSyncInterval -4 + logMinDelayReqInterval -4 + logMinPdelayReqInterval -4 + announceReceiptTimeout 3 + syncReceiptTimeout 0 + delayAsymmetry 0 + fault_reset_interval 4 + neighborPropDelayThresh 20000000 + masterOnly 0 + G.8275.portDS.localPriority 128 + # + # Run time options + # + assume_two_step 0 + logging_level 6 + path_trace_enabled 0 + follow_up_info 0 + hybrid_e2e 0 + inhibit_multicast_service 0 + net_sync_monitor 0 + tc_spanning_tree 0 + tx_timestamp_timeout 50 + unicast_listen 0 + unicast_master_table 0 + unicast_req_duration 3600 + use_syslog 1 + verbose 0 + summary_interval 0 + kernel_leap 1 + check_fup_sync 0 + # + # Servo Options + # + pi_proportional_const 0.0 + pi_integral_const 0.0 + pi_proportional_scale 0.0 + pi_proportional_exponent -0.3 + pi_proportional_norm_max 0.7 + pi_integral_scale 0.0 + pi_integral_exponent 0.4 + pi_integral_norm_max 0.3 + step_threshold 2.0 + first_step_threshold 0.00002 + max_frequency 900000000 + clock_servo pi + sanity_freq_limit 200000000 + ntpshm_segment 0 + # + # Transport options + # + transportSpecific 0x0 + ptp_dst_mac 01:1B:19:00:00:00 + p2p_dst_mac 01:80:C2:00:00:0E + udp_ttl 1 + udp6_scope 0x0E + uds_address /var/run/ptp4l + # + # Default interface options + # + clock_type OC + network_transport L2 + delay_mechanism E2E + time_stamping hardware + tsproc_mode filter + delay_filter moving_median + delay_filter_length 10 + egressLatency 0 + ingressLatency 0 + boundary_clock_jbod 0 + # + # Clock description + # + productDescription ;; + revisionData ;; + manufacturerIdentity 00:00:00 + userDescription ; + timeSource 0xA0 + recommend: + - profile: "slave" + priority: 4 + match: + - nodeLabel: "node-role.kubernetes.io/master" diff --git a/snippets/ztp_PtpSubscription.yaml b/snippets/ztp_PtpSubscription.yaml new file mode 100644 index 0000000000..3948e3a5e5 --- /dev/null +++ b/snippets/ztp_PtpSubscription.yaml @@ -0,0 +1,14 @@ +--- +apiVersion: operators.coreos.com/v1alpha1 +kind: Subscription +metadata: + name: ptp-operator-subscription + namespace: openshift-ptp +spec: + channel: "stable" + name: ptp-operator + source: redhat-operators + sourceNamespace: openshift-marketplace + installPlanApproval: Manual +status: + state: AtLatestKnown diff --git a/snippets/ztp_PtpSubscriptionNS.yaml b/snippets/ztp_PtpSubscriptionNS.yaml new file mode 100644 index 0000000000..b4d149c054 --- /dev/null +++ b/snippets/ztp_PtpSubscriptionNS.yaml @@ -0,0 +1,9 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: openshift-ptp + annotations: + workload.openshift.io/allowed: management + labels: + openshift.io/cluster-monitoring: "true" diff --git a/snippets/ztp_PtpSubscriptionOperGroup.yaml b/snippets/ztp_PtpSubscriptionOperGroup.yaml new file mode 100644 index 0000000000..9e69725ca7 --- /dev/null +++ b/snippets/ztp_PtpSubscriptionOperGroup.yaml @@ -0,0 +1,8 @@ +apiVersion: operators.coreos.com/v1 +kind: OperatorGroup +metadata: + name: ptp-operators + namespace: openshift-ptp +spec: + targetNamespaces: + - openshift-ptp diff --git a/snippets/ztp-cluster-monitoring.adoc b/snippets/ztp_ReduceMonitoringFootprint.yaml similarity index 72% rename from snippets/ztp-cluster-monitoring.adoc rename to snippets/ztp_ReduceMonitoringFootprint.yaml index 21c74a89b6..ffc1edff4f 100644 --- a/snippets/ztp-cluster-monitoring.adoc +++ b/snippets/ztp_ReduceMonitoringFootprint.yaml @@ -1,12 +1,10 @@ -:_content-type: SNIPPET -.Recommended cluster monitoring configuration -[source,yaml] ----- apiVersion: v1 kind: ConfigMap metadata: name: cluster-monitoring-config namespace: openshift-monitoring + annotations: + ran.openshift.io/ztp-deploy-wave: "1" data: config.yaml: | grafana: @@ -15,4 +13,3 @@ data: enabled: false prometheusK8s: retention: 24h ----- diff --git a/snippets/ztp_SriovNetwork.yaml b/snippets/ztp_SriovNetwork.yaml new file mode 100644 index 0000000000..22408b3515 --- /dev/null +++ b/snippets/ztp_SriovNetwork.yaml @@ -0,0 +1,17 @@ +apiVersion: sriovnetwork.openshift.io/v1 +kind: SriovNetwork +metadata: + name: "" + namespace: openshift-sriov-network-operator +spec: + resourceName: "du_mh" + networkNamespace: openshift-sriov-network-operator + vlan: "150" + spoofChk: "" + ipam: "" + linkState: "" + maxTxRate: "" + minTxRate: "" + vlanQoS: "" + trust: "" + capabilities: "" diff --git a/snippets/ztp_SriovNetworkNodePolicy.yaml b/snippets/ztp_SriovNetworkNodePolicy.yaml new file mode 100644 index 0000000000..41e50c39fe --- /dev/null +++ b/snippets/ztp_SriovNetworkNodePolicy.yaml @@ -0,0 +1,17 @@ +apiVersion: sriovnetwork.openshift.io/v1 +kind: SriovNetworkNodePolicy +metadata: + name: $name + namespace: openshift-sriov-network-operator +spec: + # Attributes for Mellanox/Intel based NICs + deviceType: netdevice/vfio-pci + isRdma: true/false + nicSelector: + # The exact physical function name must match the hardware used + pfNames: [ens7f0] + nodeSelector: + node-role.kubernetes.io/master: "" + numVfs: 8 + priority: 10 + resourceName: du_mh diff --git a/snippets/ztp_SriovOperatorConfig.yaml b/snippets/ztp_SriovOperatorConfig.yaml new file mode 100644 index 0000000000..9d34b13c34 --- /dev/null +++ b/snippets/ztp_SriovOperatorConfig.yaml @@ -0,0 +1,10 @@ +apiVersion: sriovnetwork.openshift.io/v1 +kind: SriovOperatorConfig +metadata: + name: default + namespace: openshift-sriov-network-operator +spec: + configDaemonNodeSelector: + "node-role.kubernetes.io/master": "" + enableInjector: true + enableOperatorWebhook: true diff --git a/snippets/ztp_SriovSubscription.yaml b/snippets/ztp_SriovSubscription.yaml new file mode 100644 index 0000000000..30d2e0464e --- /dev/null +++ b/snippets/ztp_SriovSubscription.yaml @@ -0,0 +1,13 @@ +apiVersion: operators.coreos.com/v1alpha1 +kind: Subscription +metadata: + name: sriov-network-operator-subscription + namespace: openshift-sriov-network-operator +spec: + channel: "stable" + name: sriov-network-operator + source: redhat-operators + sourceNamespace: openshift-marketplace + installPlanApproval: Manual +status: + state: AtLatestKnown diff --git a/snippets/ztp_SriovSubscriptionNS.yaml b/snippets/ztp_SriovSubscriptionNS.yaml new file mode 100644 index 0000000000..0071359bb2 --- /dev/null +++ b/snippets/ztp_SriovSubscriptionNS.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: openshift-sriov-network-operator + annotations: + workload.openshift.io/allowed: management diff --git a/snippets/ztp_SriovSubscriptionOperGroup.yaml b/snippets/ztp_SriovSubscriptionOperGroup.yaml new file mode 100644 index 0000000000..7c61d446d1 --- /dev/null +++ b/snippets/ztp_SriovSubscriptionOperGroup.yaml @@ -0,0 +1,8 @@ +apiVersion: operators.coreos.com/v1 +kind: OperatorGroup +metadata: + name: sriov-network-operators + namespace: openshift-sriov-network-operator +spec: + targetNamespaces: + - openshift-sriov-network-operator diff --git a/snippets/ztp-storage-lvms.adoc b/snippets/ztp_StorageLVMCluster.yaml similarity index 58% rename from snippets/ztp-storage-lvms.adoc rename to snippets/ztp_StorageLVMCluster.yaml index fb8f87189a..b376640264 100644 --- a/snippets/ztp-storage-lvms.adoc +++ b/snippets/ztp_StorageLVMCluster.yaml @@ -1,7 +1,3 @@ -:_content-type: SNIPPET -.Recommended LVMCluster configuration -[source,yaml] ----- apiVersion: lvm.topolvm.io/v1alpha1 kind: LVMCluster metadata: @@ -11,12 +7,10 @@ spec: storage: deviceClasses: - name: vg1 - deviceSelector: <1> + deviceSelector: paths: - /usr/disk/by-path/pci-0000:11:00.0-nvme-1 thinPoolConfig: name: thin-pool-1 overprovisionRatio: 10 sizePercent: 90 ----- -<1> If no disks are specified in the `deviceSelector.paths` field, the {lvms} uses all the unused disks in the specified thin pool. \ No newline at end of file diff --git a/snippets/ztp_StorageNS.yaml b/snippets/ztp_StorageNS.yaml new file mode 100644 index 0000000000..a9ebbc39b9 --- /dev/null +++ b/snippets/ztp_StorageNS.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: openshift-local-storage + annotations: + workload.openshift.io/allowed: management diff --git a/snippets/ztp_StorageOperGroup.yaml b/snippets/ztp_StorageOperGroup.yaml new file mode 100644 index 0000000000..da89b0b282 --- /dev/null +++ b/snippets/ztp_StorageOperGroup.yaml @@ -0,0 +1,8 @@ +apiVersion: operators.coreos.com/v1 +kind: OperatorGroup +metadata: + name: openshift-local-storage + namespace: openshift-local-storage +spec: + targetNamespaces: + - openshift-local-storage diff --git a/snippets/ztp_StorageSubscription.yaml b/snippets/ztp_StorageSubscription.yaml new file mode 100644 index 0000000000..d661d214dd --- /dev/null +++ b/snippets/ztp_StorageSubscription.yaml @@ -0,0 +1,13 @@ +apiVersion: operators.coreos.com/v1alpha1 +kind: Subscription +metadata: + name: local-storage-operator + namespace: openshift-local-storage +spec: + channel: "stable" + name: local-storage-operator + source: redhat-operators + sourceNamespace: openshift-marketplace + installPlanApproval: Manual +status: + state: AtLatestKnown diff --git a/snippets/ztp-performance-patch.adoc b/snippets/ztp_TunedPerformancePatch.yaml similarity index 70% rename from snippets/ztp-performance-patch.adoc rename to snippets/ztp_TunedPerformancePatch.yaml index 73b36cce73..a1325e2a41 100644 --- a/snippets/ztp-performance-patch.adoc +++ b/snippets/ztp_TunedPerformancePatch.yaml @@ -1,7 +1,3 @@ -:_content-type: SNIPPET -.Recommended extended Tuned profile configuration -[source,yaml] ----- apiVersion: tuned.openshift.io/v1 kind: Tuned metadata: @@ -9,23 +5,21 @@ metadata: namespace: openshift-cluster-node-tuning-operator spec: profile: - - data: | + - name: performance-patch + data: | [main] summary=Configuration changes profile inherited from performance created tuned include=openshift-node-performance-openshift-node-performance-profile - [bootloader] - cmdline_crash=nohz_full=2-51,54-103 [sysctl] kernel.timer_migration=1 [scheduler] group.ice-ptp=0:f:10:*:ice-ptp.* + group.ice-gnss=0:f:10:*:ice-gnss.* [service] service.stalld=start,enable service.chronyd=stop,disable - name: performance-patch recommend: - machineConfigLabels: - machineconfiguration.openshift.io/role: master + machineconfiguration.openshift.io/role: "master" priority: 19 profile: performance-patch ----- diff --git a/snippets/ztp-07-ztp-sno-du-configuring-crun-container-runtime-master.adoc b/snippets/ztp_enable-crun-master.yaml similarity index 71% rename from snippets/ztp-07-ztp-sno-du-configuring-crun-container-runtime-master.adoc rename to snippets/ztp_enable-crun-master.yaml index 59070a8d76..8d886536e0 100644 --- a/snippets/ztp-07-ztp-sno-du-configuring-crun-container-runtime-master.adoc +++ b/snippets/ztp_enable-crun-master.yaml @@ -1,7 +1,3 @@ -:_content-type: SNIPPET -.Recommended `ContainerRuntimeConfig` CR for control plane nodes -[source,yaml] ----- apiVersion: machineconfiguration.openshift.io/v1 kind: ContainerRuntimeConfig metadata: @@ -12,4 +8,3 @@ spec: pools.operator.machineconfiguration.openshift.io/master: "" containerRuntimeConfig: defaultRuntime: crun ----- diff --git a/snippets/ztp-08-ztp-sno-du-configuring-crun-container-runtime-worker.adoc b/snippets/ztp_enable-crun-worker.yaml similarity index 72% rename from snippets/ztp-08-ztp-sno-du-configuring-crun-container-runtime-worker.adoc rename to snippets/ztp_enable-crun-worker.yaml index f754179643..d10bd862b0 100644 --- a/snippets/ztp-08-ztp-sno-du-configuring-crun-container-runtime-worker.adoc +++ b/snippets/ztp_enable-crun-worker.yaml @@ -1,7 +1,3 @@ -:_content-type: SNIPPET -.Recommended `ContainerRuntimeConfig` CR for worker nodes -[source,yaml] ----- apiVersion: machineconfiguration.openshift.io/v1 kind: ContainerRuntimeConfig metadata: @@ -12,4 +8,3 @@ spec: pools.operator.machineconfiguration.openshift.io/worker: "" containerRuntimeConfig: defaultRuntime: crun ----- diff --git a/snippets/ztp_example-sno.yaml b/snippets/ztp_example-sno.yaml new file mode 100644 index 0000000000..96de050a2d --- /dev/null +++ b/snippets/ztp_example-sno.yaml @@ -0,0 +1,126 @@ +# example-node1-bmh-secret & assisted-deployment-pull-secret need to be created under same namespace example-sno +--- +apiVersion: ran.openshift.io/v1 +kind: SiteConfig +metadata: + name: "example-sno" + namespace: "example-sno" +spec: + baseDomain: "example.com" + cpuPartitioningMode: AllNodes + pullSecretRef: + name: "assisted-deployment-pull-secret" + clusterImageSetNameRef: "openshift-4.10" + sshPublicKey: "ssh-rsa AAAA..." + clusters: + - clusterName: "example-sno" + networkType: "OVNKubernetes" + installConfigOverrides: | + { + "capabilities": { + "baselineCapabilitySet": "None", + "additionalEnabledCapabilities": [ + "marketplace", + "NodeTuning" + ] + } + } + clusterLabels: + common: true + group-du-sno: "" + sites : "example-sno" + clusterNetwork: + - cidr: 1001:1::/48 + hostPrefix: 64 + machineNetwork: + - cidr: 1111:2222:3333:4444::/64 + serviceNetwork: + - 1001:2::/112 + additionalNTPSources: + - 1111:2222:3333:4444::2 + # crTemplates: + # KlusterletAddonConfig: "KlusterletAddonConfigOverride.yaml" + nodes: + - hostName: "example-node1.example.com" + role: "master" + # biosConfigRef: + # filePath: "example-hw.profile" + bmcAddress: "idrac-virtualmedia+https://[1111:2222:3333:4444::bbbb:1]/redfish/v1/Systems/System.Embedded.1" + bmcCredentialsName: + name: "example-node1-bmh-secret" + bootMACAddress: "AA:BB:CC:DD:EE:11" + bootMode: "UEFI" + rootDeviceHints: + wwn: "0x11111000000asd123" + # diskPartition: + # - device: /dev/disk/by-id/wwn-0x11111000000asd123 # match rootDeviceHints + # partitions: + # - mount_point: /var/imageregistry + # size: 102500 + # start: 344844 + ignitionConfigOverride: | + { + "ignition": { + "version": "3.2.0" + }, + "storage": { + "disks": [ + { + "device": "/dev/disk/by-id/wwn-0x11111000000asd123", + "wipeTable": false, + "partitions": [ + { + "sizeMiB": 16, + "label": "httpevent1", + "startMiB": 350000 + }, + { + "sizeMiB": 16, + "label": "httpevent2", + "startMiB": 350016 + } + ] + } + ], + "filesystem": [ + { + "device": "/dev/disk/by-partlabel/httpevent1", + "format": "xfs", + "wipeFilesystem": true + }, + { + "device": "/dev/disk/by-partlabel/httpevent2", + "format": "xfs", + "wipeFilesystem": true + } + ] + } + } + nodeNetwork: + interfaces: + - name: eno1 + macAddress: "AA:BB:CC:DD:EE:11" + config: + interfaces: + - name: eno1 + type: ethernet + state: up + ipv4: + enabled: false + ipv6: + enabled: true + address: + - ip: 1111:2222:3333:4444::aaaa:1 + prefix-length: 64 + dns-resolver: + config: + search: + - example.com + server: + - 1111:2222:3333:4444::2 + routes: + config: + - destination: ::/0 + next-hop-interface: eno1 + next-hop-address: 1111:2222:3333:4444::1 + table-id: 254