diff --git a/modules/cnf-about-topology-aware-lifecycle-manager-blocking-crs.adoc b/modules/cnf-about-topology-aware-lifecycle-manager-blocking-crs.adoc index 34c4285cda..8f103c858f 100644 --- a/modules/cnf-about-topology-aware-lifecycle-manager-blocking-crs.adoc +++ b/modules/cnf-about-topology-aware-lifecycle-manager-blocking-crs.adoc @@ -1,6 +1,6 @@ // Module included in the following assemblies: // -// * scalability_and_performance/cnf-talm-for-cluster-upgrades.adoc +// * edge_computing/cnf-talm-for-cluster-upgrades.adoc :_mod-docs-content-type: PROCEDURE [id="cnf-about-topology-aware-lifecycle-manager-blocking-crs_{context}"] @@ -54,10 +54,6 @@ status: reason: UpgradeNotStarted status: "False" type: Ready - copiedPolicies: - - cgu-a-policy1-common-cluster-version-policy - - cgu-a-policy2-common-pao-sub-policy - - cgu-a-policy3-common-ptp-sub-policy managedPoliciesForUpgrade: - name: policy1-common-cluster-version-policy namespace: default @@ -108,11 +104,6 @@ status: reason: UpgradeNotStarted status: "False" type: Ready - copiedPolicies: - - cgu-b-policy1-common-cluster-version-policy - - cgu-b-policy2-common-pao-sub-policy - - cgu-b-policy3-common-ptp-sub-policy - - cgu-b-policy4-common-sriov-sub-policy managedPoliciesForUpgrade: - name: policy1-common-cluster-version-policy namespace: default @@ -164,9 +155,6 @@ status: reason: UpgradeNotStarted status: "False" type: Ready - copiedPolicies: - - cgu-c-policy1-common-cluster-version-policy - - cgu-c-policy4-common-sriov-sub-policy managedPoliciesCompliantBeforeUpgrade: - policy2-common-pao-sub-policy - policy3-common-ptp-sub-policy @@ -238,10 +226,6 @@ status: reason: UpgradeCannotStart status: "False" type: Ready - copiedPolicies: - - cgu-a-policy1-common-cluster-version-policy - - cgu-a-policy2-common-pao-sub-policy - - cgu-a-policy3-common-ptp-sub-policy managedPoliciesForUpgrade: - name: policy1-common-cluster-version-policy namespace: default @@ -296,11 +280,6 @@ status: reason: UpgradeCannotStart status: "False" type: Ready - copiedPolicies: - - cgu-b-policy1-common-cluster-version-policy - - cgu-b-policy2-common-pao-sub-policy - - cgu-b-policy3-common-ptp-sub-policy - - cgu-b-policy4-common-sriov-sub-policy managedPoliciesForUpgrade: - name: policy1-common-cluster-version-policy namespace: default @@ -354,9 +333,6 @@ status: reason: UpgradeNotCompleted status: "False" type: Ready - copiedPolicies: - - cgu-c-policy1-common-cluster-version-policy - - cgu-c-policy4-common-sriov-sub-policy managedPoliciesCompliantBeforeUpgrade: - policy2-common-pao-sub-policy - policy3-common-ptp-sub-policy diff --git a/modules/cnf-topology-aware-lifecycle-manager-apply-policies.adoc b/modules/cnf-topology-aware-lifecycle-manager-apply-policies.adoc index b656798e8e..44ef338b4e 100644 --- a/modules/cnf-topology-aware-lifecycle-manager-apply-policies.adoc +++ b/modules/cnf-topology-aware-lifecycle-manager-apply-policies.adoc @@ -1,6 +1,6 @@ // Module included in the following assemblies: // Epic CNF-2600 (CNF-2133) (4.10), Story TELCODOCS-285 -// * scalability_and_performance/cnf-talm-for-cluster-upgrades.adoc +// * edge_computing/cnf-talm-for-cluster-upgrades.adoc :_mod-docs-content-type: PROCEDURE [id="talo-apply-policies_{context}"] @@ -11,6 +11,7 @@ You can update your managed clusters by applying your policies. .Prerequisites * Install the {cgu-operator-first}. +* {cgu-operator} 4.16 requires {rh-rhacm} 2.9 or later. * Provision one or more managed clusters. * Log in as a user with `cluster-admin` privileges. * Create {rh-rhacm} policies in the hub cluster. @@ -64,7 +65,6 @@ $ oc get cgu --all-namespaces ---- + .Example output -+ [source,terminal] ---- NAMESPACE NAME AGE STATE DETAILS @@ -79,7 +79,6 @@ $ oc get cgu -n default cgu-1 -ojsonpath='{.status}' | jq ---- + .Example output -+ [source,json] ---- { @@ -93,12 +92,6 @@ $ oc get cgu -n default cgu-1 -ojsonpath='{.status}' | jq "type": "Progressing" } ], - "copiedPolicies": [ - "cgu-policy1-common-cluster-version-policy", - "cgu-policy2-common-nto-sub-policy", - "cgu-policy3-common-ptp-sub-policy", - "cgu-policy4-common-sriov-sub-policy" - ], "managedPoliciesContent": { "policy1-common-cluster-version-policy": "null", "policy2-common-nto-sub-policy": "[{\"kind\":\"Subscription\",\"name\":\"node-tuning-operator\",\"namespace\":\"openshift-cluster-node-tuning-operator\"}]", @@ -141,9 +134,6 @@ $ oc get cgu -n default cgu-1 -ojsonpath='{.status}' | jq "cgu-policy3-common-ptp-sub-policy", "cgu-policy4-common-sriov-sub-policy" ], - "precaching": { - "spec": {} - }, "remediationPlan": [ [ "spoke1", @@ -159,28 +149,6 @@ $ oc get cgu -n default cgu-1 -ojsonpath='{.status}' | jq ---- <1> The `spec.enable` field in the `ClusterGroupUpgrade` CR is set to `false`. -.. Check the status of the policies by running the following command: -+ -[source,terminal] ----- -$ oc get policies -A ----- -+ -.Example output -[source,terminal] ----- -NAMESPACE NAME REMEDIATION ACTION COMPLIANCE STATE AGE -default cgu-policy1-common-cluster-version-policy enforce 17m <1> -default cgu-policy2-common-nto-sub-policy enforce 17m -default cgu-policy3-common-ptp-sub-policy enforce 17m -default cgu-policy4-common-sriov-sub-policy enforce 17m -default policy1-common-cluster-version-policy inform NonCompliant 15h -default policy2-common-nto-sub-policy inform NonCompliant 15h -default policy3-common-ptp-sub-policy inform NonCompliant 18m -default policy4-common-sriov-sub-policy inform NonCompliant 18m ----- -<1> The `spec.remediationAction` field of policies currently applied on the clusters is set to `enforce`. The managed policies in `inform` mode from the `ClusterGroupUpgrade` CR remain in `inform` mode during the update. - . Change the value of the `spec.enable` field to `true` by running the following command: + [source,terminal] @@ -191,7 +159,7 @@ $ oc --namespace=default patch clustergroupupgrade.ran.openshift.io/cgu-1 \ .Verification -. Check the status of the update again by running the following command: +. Check the status of the update by running the following command: + [source,terminal] ---- @@ -199,7 +167,6 @@ $ oc get cgu -n default cgu-1 -ojsonpath='{.status}' | jq ---- + .Example output -+ [source,json] ---- { @@ -210,12 +177,16 @@ $ oc get cgu -n default cgu-1 -ojsonpath='{.status}' | jq "message": "All selected clusters are valid", "reason": "ClusterSelectionCompleted", "status": "True", - "type": "ClustersSelected", + "type": "ClustersSelected" + }, + { "lastTransitionTime": "2022-02-25T15:33:07Z", "message": "Completed validation", "reason": "ValidationCompleted", "status": "True", - "type": "Validated", + "type": "Validated" + }, + { "lastTransitionTime": "2022-02-25T15:34:07Z", "message": "Remediating non-compliant policies", "reason": "InProgress", @@ -223,12 +194,6 @@ $ oc get cgu -n default cgu-1 -ojsonpath='{.status}' | jq "type": "Progressing" } ], - "copiedPolicies": [ - "cgu-policy1-common-cluster-version-policy", - "cgu-policy2-common-nto-sub-policy", - "cgu-policy3-common-ptp-sub-policy", - "cgu-policy4-common-sriov-sub-policy" - ], "managedPoliciesContent": { "policy1-common-cluster-version-policy": "null", "policy2-common-nto-sub-policy": "[{\"kind\":\"Subscription\",\"name\":\"node-tuning-operator\",\"namespace\":\"openshift-cluster-node-tuning-operator\"}]", @@ -271,9 +236,6 @@ $ oc get cgu -n default cgu-1 -ojsonpath='{.status}' | jq "cgu-policy3-common-ptp-sub-policy", "cgu-policy4-common-sriov-sub-policy" ], - "precaching": { - "spec": {} - }, "remediationPlan": [ [ "spoke1", @@ -286,17 +248,52 @@ $ oc get cgu -n default cgu-1 -ojsonpath='{.status}' | jq ], "status": { "currentBatch": 1, - "currentBatchStartedAt": "2022-02-25T15:54:16Z", - "remediationPlanForBatch": { - "spoke1": 0, - "spoke2": 1 + "currentBatchRemediationProgress": { + "spoke1": { + "policyIndex": 1, + "state": "InProgress" + }, + "spoke2": { + "policyIndex": 1, + "state": "InProgress" + } }, + "currentBatchStartedAt": "2022-02-25T15:54:16Z", "startedAt": "2022-02-25T15:54:16Z" } } ---- <1> Reflects the update progress of the current batch. Run this command again to receive updated information about the progress. +. Check the status of the policies by running the following command: ++ +[source,terminal] +---- +oc get policies -A +---- ++ +.Example output +[source,terminal] +---- +NAMESPACE NAME REMEDIATION ACTION COMPLIANCE STATE AGE +spoke1 default.policy1-common-cluster-version-policy enforce Compliant 18m +spoke1 default.policy2-common-nto-sub-policy enforce NonCompliant 18m +spoke2 default.policy1-common-cluster-version-policy enforce Compliant 18m +spoke2 default.policy2-common-nto-sub-policy enforce NonCompliant 18m +spoke5 default.policy3-common-ptp-sub-policy inform NonCompliant 18m +spoke5 default.policy4-common-sriov-sub-policy inform NonCompliant 18m +spoke6 default.policy3-common-ptp-sub-policy inform NonCompliant 18m +spoke6 default.policy4-common-sriov-sub-policy inform NonCompliant 18m +default policy1-common-ptp-sub-policy inform Compliant 18m +default policy2-common-sriov-sub-policy inform NonCompliant 18m +default policy3-common-ptp-sub-policy inform NonCompliant 18m +default policy4-common-sriov-sub-policy inform NonCompliant 18m +---- ++ +* The `spec.remediationAction` value changes to `enforce` for the child policies applied to the clusters from the current batch. +* The `spec.remedationAction` value remains `inform` for the child policies in the rest of the clusters. +* After the batch is complete, the `spec.remediationAction` value changes back to `inform` for the enforced child policies. + . If the policies include Operator subscriptions, you can check the installation progress directly on the single-node cluster. .. Export the `KUBECONFIG` file of the single-node cluster you want to check the installation progress for by running the following command: @@ -314,7 +311,6 @@ $ oc get subs -A | grep -i ---- + .Example output for `cluster-logging` policy -+ [source,terminal] ---- NAMESPACE NAME PACKAGE SOURCE CHANNEL @@ -329,7 +325,6 @@ $ oc get clusterversion ---- + .Example output -+ [source,terminal,subs="attributes+"] ---- NAME VERSION AVAILABLE PROGRESSING SINCE STATUS @@ -351,7 +346,6 @@ $ oc get installplan -n ---- + .Example output for `cluster-logging` Operator -+ [source,terminal] ---- NAMESPACE NAME CSV APPROVAL APPROVED @@ -373,7 +367,6 @@ $ oc get csv -n ---- + .Example output for OpenShift Logging Operator -+ [source,terminal] ---- NAME DISPLAY VERSION REPLACES PHASE diff --git a/modules/cnf-topology-aware-lifecycle-manager-installation-cli.adoc b/modules/cnf-topology-aware-lifecycle-manager-installation-cli.adoc index 60f7e5d535..c6feb9f235 100644 --- a/modules/cnf-topology-aware-lifecycle-manager-installation-cli.adoc +++ b/modules/cnf-topology-aware-lifecycle-manager-installation-cli.adoc @@ -1,6 +1,6 @@ // Module included in the following assemblies: // Epic CNF-2600 (CNF-2133) (4.10), Story TELCODOCS-285 -// * scalability_and_performance/cnf-talm-for-cluster-upgrades.adoc +// * edge_computing/cnf-talm-for-cluster-upgrades.adoc :_mod-docs-content-type: PROCEDURE [id="installing-topology-aware-lifecycle-manager-using-cli_{context}"] @@ -12,6 +12,7 @@ You can use the OpenShift CLI (`oc`) to install the {cgu-operator-first}. * Install the OpenShift CLI (`oc`). * Install the latest version of the {rh-rhacm} Operator. +* {cgu-operator} 4.16 requires {rh-rhacm} 2.9 or later. * Set up a hub cluster with disconnected registry. * Log in as a user with `cluster-admin` privileges. diff --git a/modules/cnf-topology-aware-lifecycle-manager-installation-web-console.adoc b/modules/cnf-topology-aware-lifecycle-manager-installation-web-console.adoc index e5372de3d3..de4dc9dc94 100644 --- a/modules/cnf-topology-aware-lifecycle-manager-installation-web-console.adoc +++ b/modules/cnf-topology-aware-lifecycle-manager-installation-web-console.adoc @@ -1,6 +1,6 @@ // Module included in the following assemblies: // Epic CNF-2600 (CNF-2133) (4.10), Story TELCODOCS-285 -// * scalability_and_performance/cnf-talm-for-cluster-upgrades.adoc +// * edge_computing/cnf-talm-for-cluster-upgrades.adoc :_mod-docs-content-type: PROCEDURE [id="installing-topology-aware-lifecycle-manager-using-web-console_{context}"] @@ -13,7 +13,8 @@ You can use the {product-title} web console to install the {cgu-operator-full}. // Based on polarion test cases * Install the latest version of the {rh-rhacm} Operator. -* Set up a hub cluster with disconnected regitry. +* {cgu-operator} 4.16 requires {rh-rhacm} 2.9 or later. +* Set up a hub cluster with a disconnected registry. * Log in as a user with `cluster-admin` privileges. .Procedure diff --git a/modules/cnf-topology-aware-lifecycle-manager-policies-concept.adoc b/modules/cnf-topology-aware-lifecycle-manager-policies-concept.adoc index 2c7d148d41..abe4678f28 100644 --- a/modules/cnf-topology-aware-lifecycle-manager-policies-concept.adoc +++ b/modules/cnf-topology-aware-lifecycle-manager-policies-concept.adoc @@ -1,14 +1,14 @@ // Module included in the following assemblies: // Epic CNF-2600 (CNF-2133) (4.10), Story TELCODOCS-285 -// * scalability_and_performance/cnf-talm-for-cluster-upgrades.adoc +// * edge_computing/cnf-talm-for-cluster-upgrades.adoc :_mod-docs-content-type: CONCEPT [id="talo-policies-concept_{context}"] = Update policies on managed clusters -The {cgu-operator-first} remediates a set of `inform` policies for the clusters specified in the `ClusterGroupUpgrade` CR. {cgu-operator} remediates `inform` policies by making `enforce` copies of the managed {rh-rhacm} policies. Each copied policy has its own corresponding {rh-rhacm} placement rule and {rh-rhacm} placement binding. +The {cgu-operator-first} remediates a set of `inform` policies for the clusters specified in the `ClusterGroupUpgrade` custom resource (CR). {cgu-operator} remediates `inform` policies by controlling the `remediationAction` specification in a `Policy` CR through the `bindingOverrides.remediationAction` and `subFilter` specifications in the `PlacementBinding` CR. Each policy has its own corresponding {rh-rhacm} placement rule and {rh-rhacm} placement binding. -One by one, {cgu-operator} adds each cluster from the current batch to the placement rule that corresponds with the applicable managed policy. If a cluster is already compliant with a policy, {cgu-operator} skips applying that policy on the compliant cluster. {cgu-operator} then moves on to applying the next policy to the non-compliant cluster. After {cgu-operator} completes the updates in a batch, all clusters are removed from the placement rules associated with the copied policies. Then, the update of the next batch starts. +One by one, {cgu-operator} adds each cluster from the current batch to the placement rule that corresponds with the applicable managed policy. If a cluster is already compliant with a policy, {cgu-operator} skips applying that policy on the compliant cluster. {cgu-operator} then moves on to applying the next policy to the non-compliant cluster. After {cgu-operator} completes the updates in a batch, all clusters are removed from the placement rules associated with the policies. Then, the update of the next batch starts. If a spoke cluster does not report any compliant state to {rh-rhacm}, the managed policies on the hub cluster can be missing status information that {cgu-operator} needs. {cgu-operator} handles these cases in the following ways: @@ -16,7 +16,7 @@ If a spoke cluster does not report any compliant state to {rh-rhacm}, the manage * If a policy's `status.status` is missing, {cgu-operator} produces an error. * If a cluster's compliance status is missing in the policy's `status.status` field, {cgu-operator} considers that cluster to be non-compliant with that policy. -The `ClusterGroupUpgrade` CR's `batchTimeoutAction` determines what happens if an upgrade fails for a cluster. You can specify `continue` to skip the failing cluster and continue to upgrade other clusters, or specify `abort` to stop the policy remediation for all clusters. Once the timeout elapses, {cgu-operator} removes all enforce policies to ensure that no further updates are made to clusters. +The `ClusterGroupUpgrade` CR's `batchTimeoutAction` determines what happens if an upgrade fails for a cluster. You can specify `continue` to skip the failing cluster and continue to upgrade other clusters, or specify `abort` to stop the policy remediation for all clusters. Once the timeout elapses, {cgu-operator} removes all the resources it created to ensure that no further updates are made to clusters. include::snippets/cnf-example-upgrade-policy.adoc[] diff --git a/modules/cnf-topology-aware-lifecycle-manager-troubleshooting.adoc b/modules/cnf-topology-aware-lifecycle-manager-troubleshooting.adoc index d38b6d6e28..6b92cba9f0 100644 --- a/modules/cnf-topology-aware-lifecycle-manager-troubleshooting.adoc +++ b/modules/cnf-topology-aware-lifecycle-manager-troubleshooting.adoc @@ -1,6 +1,6 @@ // Module included in the following assemblies: // -// * scalability_and_performance/cnf-talm-for-cluster-upgrades.adoc +// * edge_computing/cnf-talm-for-cluster-upgrades.adoc :_mod-docs-content-type: PROCEDURE [id="talo-troubleshooting_{context}"] @@ -368,31 +368,6 @@ $ oc get cgu lab-upgrade -ojsonpath='{.status.conditions}' {"lastTransitionTime":"2022-02-17T22:25:28Z", "message":"Missing managed policies:[policyList]", "reason":"NotAllManagedPoliciesExist", "status":"False", "type":"Validated"} ---- -[discrete] -=== Checking corresponding copied policies - -Issue:: You want to check if every policy from `status.managedPoliciesForUpgrade` has a corresponding policy in `status.copiedPolicies`. - -Resolution:: Run the following command: -+ -[source,terminal] ----- -$ oc get cgu lab-upgrade -oyaml ----- -+ -.Example output -+ -[source,yaml] ----- -status: - … - copiedPolicies: - - lab-upgrade-policy3-common-ptp-sub-policy - managedPoliciesForUpgrade: - - name: policy3-common-ptp-sub-policy - namespace: default ----- - [discrete] === Checking if status.remediationPlan was computed