Merge pull request #75494 from openshift-cherrypick-robot/cherry-pick-73787-to-enterprise-4.16

[enterprise-4.16] TELCODOCS-1485: Updates to reflect new backend remediation process by TALM
2026-02-06 06:46:26 +01:00 · 2024-05-03 10:53:21 -04:00
parent efbca5430e cd168fa4b4
commit 01ac7e0809
6 changed files with 59 additions and 113 deletions
--- a/modules/cnf-about-topology-aware-lifecycle-manager-blocking-crs.adoc
+++ b/modules/cnf-about-topology-aware-lifecycle-manager-blocking-crs.adoc
@@ -1,6 +1,6 @@
 // Module included in the following assemblies:
 //
-// * scalability_and_performance/cnf-talm-for-cluster-upgrades.adoc
+// * edge_computing/cnf-talm-for-cluster-upgrades.adoc

 :_mod-docs-content-type: PROCEDURE
 [id="cnf-about-topology-aware-lifecycle-manager-blocking-crs_{context}"]
@@ -54,10 +54,6 @@ status:
    reason: UpgradeNotStarted
    status: "False"
    type: Ready
-  copiedPolicies:
-  - cgu-a-policy1-common-cluster-version-policy
-  - cgu-a-policy2-common-pao-sub-policy
-  - cgu-a-policy3-common-ptp-sub-policy
  managedPoliciesForUpgrade:
  - name: policy1-common-cluster-version-policy
    namespace: default
@@ -108,11 +104,6 @@ status:
    reason: UpgradeNotStarted
    status: "False"
    type: Ready
-  copiedPolicies:
-  - cgu-b-policy1-common-cluster-version-policy
-  - cgu-b-policy2-common-pao-sub-policy
-  - cgu-b-policy3-common-ptp-sub-policy
-  - cgu-b-policy4-common-sriov-sub-policy
  managedPoliciesForUpgrade:
  - name: policy1-common-cluster-version-policy
    namespace: default
@@ -164,9 +155,6 @@ status:
    reason: UpgradeNotStarted
    status: "False"
    type: Ready
-  copiedPolicies:
-  - cgu-c-policy1-common-cluster-version-policy
-  - cgu-c-policy4-common-sriov-sub-policy
  managedPoliciesCompliantBeforeUpgrade:
  - policy2-common-pao-sub-policy
  - policy3-common-ptp-sub-policy
@@ -238,10 +226,6 @@ status:
    reason: UpgradeCannotStart
    status: "False"
    type: Ready
-  copiedPolicies:
-  - cgu-a-policy1-common-cluster-version-policy
-  - cgu-a-policy2-common-pao-sub-policy
-  - cgu-a-policy3-common-ptp-sub-policy
  managedPoliciesForUpgrade:
  - name: policy1-common-cluster-version-policy
    namespace: default
@@ -296,11 +280,6 @@ status:
    reason: UpgradeCannotStart
    status: "False"
    type: Ready
-  copiedPolicies:
-  - cgu-b-policy1-common-cluster-version-policy
-  - cgu-b-policy2-common-pao-sub-policy
-  - cgu-b-policy3-common-ptp-sub-policy
-  - cgu-b-policy4-common-sriov-sub-policy
  managedPoliciesForUpgrade:
  - name: policy1-common-cluster-version-policy
    namespace: default
@@ -354,9 +333,6 @@ status:
    reason: UpgradeNotCompleted
    status: "False"
    type: Ready
-  copiedPolicies:
-  - cgu-c-policy1-common-cluster-version-policy
-  - cgu-c-policy4-common-sriov-sub-policy
  managedPoliciesCompliantBeforeUpgrade:
  - policy2-common-pao-sub-policy
  - policy3-common-ptp-sub-policy
--- a/modules/cnf-topology-aware-lifecycle-manager-apply-policies.adoc
+++ b/modules/cnf-topology-aware-lifecycle-manager-apply-policies.adoc
@@ -1,6 +1,6 @@
 // Module included in the following assemblies:
 // Epic CNF-2600 (CNF-2133) (4.10), Story TELCODOCS-285
-// * scalability_and_performance/cnf-talm-for-cluster-upgrades.adoc
+// * edge_computing/cnf-talm-for-cluster-upgrades.adoc

 :_mod-docs-content-type: PROCEDURE
 [id="talo-apply-policies_{context}"]
@@ -11,6 +11,7 @@ You can update your managed clusters by applying your policies.
 .Prerequisites

 * Install the {cgu-operator-first}.
+* {cgu-operator} 4.16 requires {rh-rhacm} 2.9 or later.
 * Provision one or more managed clusters.
 * Log in as a user with `cluster-admin` privileges.
 * Create {rh-rhacm} policies in the hub cluster.
@@ -64,7 +65,6 @@ $ oc get cgu --all-namespaces
 ----
 +
 .Example output
-+
 [source,terminal]
 ----
 NAMESPACE   NAME  AGE  STATE      DETAILS
@@ -79,7 +79,6 @@ $ oc get cgu -n default cgu-1 -ojsonpath='{.status}' | jq
 ----
 +
 .Example output
-+
 [source,json]
 ----
 {
@@ -93,12 +92,6 @@ $ oc get cgu -n default cgu-1 -ojsonpath='{.status}' | jq
      "type": "Progressing"
    }
  ],
-  "copiedPolicies": [
-    "cgu-policy1-common-cluster-version-policy",
-    "cgu-policy2-common-nto-sub-policy",
-    "cgu-policy3-common-ptp-sub-policy",
-    "cgu-policy4-common-sriov-sub-policy"
-  ],
  "managedPoliciesContent": {
    "policy1-common-cluster-version-policy": "null",
    "policy2-common-nto-sub-policy": "[{\"kind\":\"Subscription\",\"name\":\"node-tuning-operator\",\"namespace\":\"openshift-cluster-node-tuning-operator\"}]",
@@ -141,9 +134,6 @@ $ oc get cgu -n default cgu-1 -ojsonpath='{.status}' | jq
    "cgu-policy3-common-ptp-sub-policy",
    "cgu-policy4-common-sriov-sub-policy"
  ],
-  "precaching": {
-    "spec": {}
-  },
  "remediationPlan": [
    [
      "spoke1",
@@ -159,28 +149,6 @@ $ oc get cgu -n default cgu-1 -ojsonpath='{.status}' | jq
 ----
 <1> The `spec.enable` field in the `ClusterGroupUpgrade` CR is set to `false`.

-.. Check the status of the policies by running the following command:
-+
-[source,terminal]
----
-$ oc get policies -A
----
-+
-.Example output
-[source,terminal]
----
-NAMESPACE   NAME                                                 REMEDIATION ACTION   COMPLIANCE STATE   AGE
-default     cgu-policy1-common-cluster-version-policy            enforce                                 17m <1>
-default     cgu-policy2-common-nto-sub-policy                    enforce                                 17m
-default     cgu-policy3-common-ptp-sub-policy                    enforce                                 17m
-default     cgu-policy4-common-sriov-sub-policy                  enforce                                 17m
-default     policy1-common-cluster-version-policy                inform               NonCompliant       15h
-default     policy2-common-nto-sub-policy                        inform               NonCompliant       15h
-default     policy3-common-ptp-sub-policy                        inform               NonCompliant       18m
-default     policy4-common-sriov-sub-policy                      inform               NonCompliant       18m
----
-<1> The `spec.remediationAction` field of policies currently applied on the clusters is set to `enforce`. The managed policies in `inform` mode from the `ClusterGroupUpgrade` CR remain in `inform` mode during the update.
-
 . Change the value of the `spec.enable` field to `true` by running the following command:
 +
 [source,terminal]
@@ -191,7 +159,7 @@ $ oc --namespace=default patch clustergroupupgrade.ran.openshift.io/cgu-1 \

 .Verification

-. Check the status of the update again by running the following command:
+. Check the status of the update by running the following command:
 +
 [source,terminal]
 ----
@@ -199,7 +167,6 @@ $ oc get cgu -n default cgu-1 -ojsonpath='{.status}' | jq
 ----
 +
 .Example output
-+
 [source,json]
 ----
 {
@@ -210,12 +177,16 @@ $ oc get cgu -n default cgu-1 -ojsonpath='{.status}' | jq
      "message": "All selected clusters are valid",
      "reason": "ClusterSelectionCompleted",
      "status": "True",
-      "type": "ClustersSelected",
+      "type": "ClustersSelected"
+    },
+    {
      "lastTransitionTime": "2022-02-25T15:33:07Z",
      "message": "Completed validation",
      "reason": "ValidationCompleted",
      "status": "True",
-      "type": "Validated",
+      "type": "Validated"
+    },
+    {
      "lastTransitionTime": "2022-02-25T15:34:07Z",
      "message": "Remediating non-compliant policies",
      "reason": "InProgress",
@@ -223,12 +194,6 @@ $ oc get cgu -n default cgu-1 -ojsonpath='{.status}' | jq
      "type": "Progressing"
    }
  ],
-  "copiedPolicies": [
-    "cgu-policy1-common-cluster-version-policy",
-    "cgu-policy2-common-nto-sub-policy",
-    "cgu-policy3-common-ptp-sub-policy",
-    "cgu-policy4-common-sriov-sub-policy"
-  ],
  "managedPoliciesContent": {
    "policy1-common-cluster-version-policy": "null",
    "policy2-common-nto-sub-policy": "[{\"kind\":\"Subscription\",\"name\":\"node-tuning-operator\",\"namespace\":\"openshift-cluster-node-tuning-operator\"}]",
@@ -271,9 +236,6 @@ $ oc get cgu -n default cgu-1 -ojsonpath='{.status}' | jq
    "cgu-policy3-common-ptp-sub-policy",
    "cgu-policy4-common-sriov-sub-policy"
  ],
-  "precaching": {
-    "spec": {}
-  },
  "remediationPlan": [
    [
      "spoke1",
@@ -286,17 +248,52 @@ $ oc get cgu -n default cgu-1 -ojsonpath='{.status}' | jq
  ],
  "status": {
    "currentBatch": 1,
-    "currentBatchStartedAt": "2022-02-25T15:54:16Z",
-    "remediationPlanForBatch": {
-      "spoke1": 0,
-      "spoke2": 1
+    "currentBatchRemediationProgress": {
+       "spoke1": {
+          "policyIndex": 1,
+          "state": "InProgress"
+       },
+       "spoke2": {
+          "policyIndex": 1,
+          "state": "InProgress"
+       }
    },
+    "currentBatchStartedAt": "2022-02-25T15:54:16Z",
    "startedAt": "2022-02-25T15:54:16Z"
  }
 }
 ----
 <1> Reflects the update progress of the current batch. Run this command again to receive updated information about the progress.

+. Check the status of the policies by running the following command:
+
+[source,terminal]
+----
+oc get policies -A
+----
+
+.Example output
+[source,terminal]
+----
+NAMESPACE   NAME                                        REMEDIATION ACTION    COMPLIANCE STATE     AGE
+spoke1    default.policy1-common-cluster-version-policy enforce               Compliant            18m
+spoke1    default.policy2-common-nto-sub-policy         enforce               NonCompliant         18m
+spoke2    default.policy1-common-cluster-version-policy enforce               Compliant            18m
+spoke2    default.policy2-common-nto-sub-policy         enforce               NonCompliant         18m
+spoke5    default.policy3-common-ptp-sub-policy         inform                NonCompliant         18m
+spoke5    default.policy4-common-sriov-sub-policy       inform                NonCompliant         18m
+spoke6    default.policy3-common-ptp-sub-policy         inform                NonCompliant         18m
+spoke6    default.policy4-common-sriov-sub-policy       inform                NonCompliant         18m
+default   policy1-common-ptp-sub-policy                 inform                Compliant            18m
+default   policy2-common-sriov-sub-policy               inform                NonCompliant         18m
+default   policy3-common-ptp-sub-policy                 inform                NonCompliant         18m
+default   policy4-common-sriov-sub-policy               inform                NonCompliant         18m
+----
+
+* The `spec.remediationAction` value changes to `enforce` for the child policies applied to the clusters from the current batch.
+* The `spec.remedationAction` value remains `inform` for the child policies in the rest of the clusters.
+* After the batch is complete, the `spec.remediationAction` value changes back to `inform` for the enforced child policies.
+
 . If the policies include Operator subscriptions, you can check the installation progress directly on the single-node cluster.

 .. Export the `KUBECONFIG` file of the single-node cluster you want to check the installation progress for by running the following command:
@@ -314,7 +311,6 @@ $ oc get subs -A | grep -i <subscription_name>
 ----
 +
 .Example output for `cluster-logging` policy
-+
 [source,terminal]
 ----
 NAMESPACE                              NAME                         PACKAGE                      SOURCE             CHANNEL
@@ -329,7 +325,6 @@ $ oc get clusterversion
 ----
 +
 .Example output
-+
 [source,terminal,subs="attributes+"]
 ----
 NAME      VERSION   AVAILABLE   PROGRESSING   SINCE   STATUS
@@ -351,7 +346,6 @@ $ oc get installplan -n <subscription_namespace>
 ----
 +
 .Example output for `cluster-logging` Operator
-+
 [source,terminal]
 ----
 NAMESPACE                              NAME            CSV                                 APPROVAL   APPROVED
@@ -373,7 +367,6 @@ $ oc get csv -n <operator_namespace>
 ----
 +
 .Example output for OpenShift Logging Operator
-+
 [source,terminal]
 ----
 NAME                    DISPLAY                     VERSION   REPLACES   PHASE
--- a/modules/cnf-topology-aware-lifecycle-manager-installation-cli.adoc
+++ b/modules/cnf-topology-aware-lifecycle-manager-installation-cli.adoc
@@ -1,6 +1,6 @@
 // Module included in the following assemblies:
 // Epic CNF-2600 (CNF-2133) (4.10), Story TELCODOCS-285
-// * scalability_and_performance/cnf-talm-for-cluster-upgrades.adoc
+// * edge_computing/cnf-talm-for-cluster-upgrades.adoc

 :_mod-docs-content-type: PROCEDURE
 [id="installing-topology-aware-lifecycle-manager-using-cli_{context}"]
@@ -12,6 +12,7 @@ You can use the OpenShift CLI (`oc`) to install the {cgu-operator-first}.

 * Install the OpenShift CLI (`oc`).
 * Install the latest version of the {rh-rhacm} Operator.
+* {cgu-operator} 4.16 requires {rh-rhacm} 2.9 or later.
 * Set up a hub cluster with disconnected registry.
 * Log in as a user with `cluster-admin` privileges.

--- a/modules/cnf-topology-aware-lifecycle-manager-installation-web-console.adoc
+++ b/modules/cnf-topology-aware-lifecycle-manager-installation-web-console.adoc
@@ -1,6 +1,6 @@
 // Module included in the following assemblies:
 // Epic CNF-2600 (CNF-2133) (4.10), Story TELCODOCS-285
-// * scalability_and_performance/cnf-talm-for-cluster-upgrades.adoc
+// * edge_computing/cnf-talm-for-cluster-upgrades.adoc

 :_mod-docs-content-type: PROCEDURE
 [id="installing-topology-aware-lifecycle-manager-using-web-console_{context}"]
@@ -13,7 +13,8 @@ You can use the {product-title} web console to install the {cgu-operator-full}.
 // Based on polarion test cases

 * Install the latest version of the {rh-rhacm} Operator.
-* Set up a hub cluster with disconnected regitry.
+* {cgu-operator} 4.16 requires {rh-rhacm} 2.9 or later.
+* Set up a hub cluster with a disconnected registry.
 * Log in as a user with `cluster-admin` privileges.

 .Procedure
--- a/modules/cnf-topology-aware-lifecycle-manager-policies-concept.adoc
+++ b/modules/cnf-topology-aware-lifecycle-manager-policies-concept.adoc
@@ -1,14 +1,14 @@
 // Module included in the following assemblies:
 // Epic CNF-2600 (CNF-2133) (4.10), Story TELCODOCS-285
-// * scalability_and_performance/cnf-talm-for-cluster-upgrades.adoc
+// * edge_computing/cnf-talm-for-cluster-upgrades.adoc

 :_mod-docs-content-type: CONCEPT
 [id="talo-policies-concept_{context}"]
 = Update policies on managed clusters

-The {cgu-operator-first} remediates a set of `inform` policies for the clusters specified in the `ClusterGroupUpgrade` CR. {cgu-operator} remediates `inform` policies by making `enforce` copies of the managed {rh-rhacm} policies. Each copied policy has its own corresponding {rh-rhacm} placement rule and {rh-rhacm} placement binding.
+The {cgu-operator-first} remediates a set of `inform` policies for the clusters specified in the `ClusterGroupUpgrade` custom resource (CR). {cgu-operator} remediates `inform` policies by controlling the `remediationAction` specification in a `Policy` CR through the `bindingOverrides.remediationAction` and `subFilter` specifications in the `PlacementBinding` CR. Each policy has its own corresponding {rh-rhacm} placement rule and {rh-rhacm} placement binding.

-One by one, {cgu-operator} adds each cluster from the current batch to the placement rule that corresponds with the applicable managed policy. If a cluster is already compliant with a policy, {cgu-operator} skips applying that policy on the compliant cluster. {cgu-operator} then moves on to applying the next policy to the non-compliant cluster. After {cgu-operator} completes the updates in a batch, all clusters are removed from the placement rules associated with the copied policies. Then, the update of the next batch starts.
+One by one, {cgu-operator} adds each cluster from the current batch to the placement rule that corresponds with the applicable managed policy. If a cluster is already compliant with a policy, {cgu-operator} skips applying that policy on the compliant cluster. {cgu-operator} then moves on to applying the next policy to the non-compliant cluster. After {cgu-operator} completes the updates in a batch, all clusters are removed from the placement rules associated with the policies. Then, the update of the next batch starts.

 If a spoke cluster does not report any compliant state to {rh-rhacm}, the managed policies on the hub cluster can be missing status information that {cgu-operator} needs. {cgu-operator} handles these cases in the following ways:

@@ -16,7 +16,7 @@ If a spoke cluster does not report any compliant state to {rh-rhacm}, the manage
 * If a policy's `status.status` is missing, {cgu-operator} produces an error.
 * If a cluster's compliance status is missing in the policy's `status.status` field, {cgu-operator} considers that cluster to be non-compliant with that policy.

-The `ClusterGroupUpgrade` CR's `batchTimeoutAction` determines what happens if an upgrade fails for a cluster. You can specify `continue` to skip the failing cluster and continue to upgrade other clusters, or specify `abort` to stop the policy remediation for all clusters. Once the timeout elapses, {cgu-operator} removes all enforce policies to ensure that no further updates are made to clusters.
+The `ClusterGroupUpgrade` CR's `batchTimeoutAction` determines what happens if an upgrade fails for a cluster. You can specify `continue` to skip the failing cluster and continue to upgrade other clusters, or specify `abort` to stop the policy remediation for all clusters. Once the timeout elapses, {cgu-operator} removes all the resources it created to ensure that no further updates are made to clusters.

 include::snippets/cnf-example-upgrade-policy.adoc[]

--- a/modules/cnf-topology-aware-lifecycle-manager-troubleshooting.adoc
+++ b/modules/cnf-topology-aware-lifecycle-manager-troubleshooting.adoc
@@ -1,6 +1,6 @@
 // Module included in the following assemblies:
 //
-// * scalability_and_performance/cnf-talm-for-cluster-upgrades.adoc
+// * edge_computing/cnf-talm-for-cluster-upgrades.adoc

 :_mod-docs-content-type: PROCEDURE
 [id="talo-troubleshooting_{context}"]
@@ -368,31 +368,6 @@ $ oc get cgu lab-upgrade -ojsonpath='{.status.conditions}'
 {"lastTransitionTime":"2022-02-17T22:25:28Z", "message":"Missing managed policies:[policyList]", "reason":"NotAllManagedPoliciesExist", "status":"False", "type":"Validated"}
 ----

-[discrete]
-=== Checking corresponding copied policies
-
-Issue:: You want to check if every policy from `status.managedPoliciesForUpgrade` has a corresponding policy in `status.copiedPolicies`.
-
-Resolution:: Run the following command:
-+
-[source,terminal]
----
-$ oc get cgu lab-upgrade -oyaml
----
-+
-.Example output
-+
-[source,yaml]
----
-status:
-  …
-  copiedPolicies:
-  - lab-upgrade-policy3-common-ptp-sub-policy
-  managedPoliciesForUpgrade:
-  - name: policy3-common-ptp-sub-policy
-    namespace: default
----
-
 [discrete]
 === Checking if status.remediationPlan was computed