mirror of
https://github.com/openshift/openshift-docs.git
synced 2026-02-06 06:46:26 +01:00
Merge pull request #75494 from openshift-cherrypick-robot/cherry-pick-73787-to-enterprise-4.16
[enterprise-4.16] TELCODOCS-1485: Updates to reflect new backend remediation process by TALM
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
// Module included in the following assemblies:
|
||||
//
|
||||
// * scalability_and_performance/cnf-talm-for-cluster-upgrades.adoc
|
||||
// * edge_computing/cnf-talm-for-cluster-upgrades.adoc
|
||||
|
||||
:_mod-docs-content-type: PROCEDURE
|
||||
[id="cnf-about-topology-aware-lifecycle-manager-blocking-crs_{context}"]
|
||||
@@ -54,10 +54,6 @@ status:
|
||||
reason: UpgradeNotStarted
|
||||
status: "False"
|
||||
type: Ready
|
||||
copiedPolicies:
|
||||
- cgu-a-policy1-common-cluster-version-policy
|
||||
- cgu-a-policy2-common-pao-sub-policy
|
||||
- cgu-a-policy3-common-ptp-sub-policy
|
||||
managedPoliciesForUpgrade:
|
||||
- name: policy1-common-cluster-version-policy
|
||||
namespace: default
|
||||
@@ -108,11 +104,6 @@ status:
|
||||
reason: UpgradeNotStarted
|
||||
status: "False"
|
||||
type: Ready
|
||||
copiedPolicies:
|
||||
- cgu-b-policy1-common-cluster-version-policy
|
||||
- cgu-b-policy2-common-pao-sub-policy
|
||||
- cgu-b-policy3-common-ptp-sub-policy
|
||||
- cgu-b-policy4-common-sriov-sub-policy
|
||||
managedPoliciesForUpgrade:
|
||||
- name: policy1-common-cluster-version-policy
|
||||
namespace: default
|
||||
@@ -164,9 +155,6 @@ status:
|
||||
reason: UpgradeNotStarted
|
||||
status: "False"
|
||||
type: Ready
|
||||
copiedPolicies:
|
||||
- cgu-c-policy1-common-cluster-version-policy
|
||||
- cgu-c-policy4-common-sriov-sub-policy
|
||||
managedPoliciesCompliantBeforeUpgrade:
|
||||
- policy2-common-pao-sub-policy
|
||||
- policy3-common-ptp-sub-policy
|
||||
@@ -238,10 +226,6 @@ status:
|
||||
reason: UpgradeCannotStart
|
||||
status: "False"
|
||||
type: Ready
|
||||
copiedPolicies:
|
||||
- cgu-a-policy1-common-cluster-version-policy
|
||||
- cgu-a-policy2-common-pao-sub-policy
|
||||
- cgu-a-policy3-common-ptp-sub-policy
|
||||
managedPoliciesForUpgrade:
|
||||
- name: policy1-common-cluster-version-policy
|
||||
namespace: default
|
||||
@@ -296,11 +280,6 @@ status:
|
||||
reason: UpgradeCannotStart
|
||||
status: "False"
|
||||
type: Ready
|
||||
copiedPolicies:
|
||||
- cgu-b-policy1-common-cluster-version-policy
|
||||
- cgu-b-policy2-common-pao-sub-policy
|
||||
- cgu-b-policy3-common-ptp-sub-policy
|
||||
- cgu-b-policy4-common-sriov-sub-policy
|
||||
managedPoliciesForUpgrade:
|
||||
- name: policy1-common-cluster-version-policy
|
||||
namespace: default
|
||||
@@ -354,9 +333,6 @@ status:
|
||||
reason: UpgradeNotCompleted
|
||||
status: "False"
|
||||
type: Ready
|
||||
copiedPolicies:
|
||||
- cgu-c-policy1-common-cluster-version-policy
|
||||
- cgu-c-policy4-common-sriov-sub-policy
|
||||
managedPoliciesCompliantBeforeUpgrade:
|
||||
- policy2-common-pao-sub-policy
|
||||
- policy3-common-ptp-sub-policy
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Module included in the following assemblies:
|
||||
// Epic CNF-2600 (CNF-2133) (4.10), Story TELCODOCS-285
|
||||
// * scalability_and_performance/cnf-talm-for-cluster-upgrades.adoc
|
||||
// * edge_computing/cnf-talm-for-cluster-upgrades.adoc
|
||||
|
||||
:_mod-docs-content-type: PROCEDURE
|
||||
[id="talo-apply-policies_{context}"]
|
||||
@@ -11,6 +11,7 @@ You can update your managed clusters by applying your policies.
|
||||
.Prerequisites
|
||||
|
||||
* Install the {cgu-operator-first}.
|
||||
* {cgu-operator} 4.16 requires {rh-rhacm} 2.9 or later.
|
||||
* Provision one or more managed clusters.
|
||||
* Log in as a user with `cluster-admin` privileges.
|
||||
* Create {rh-rhacm} policies in the hub cluster.
|
||||
@@ -64,7 +65,6 @@ $ oc get cgu --all-namespaces
|
||||
----
|
||||
+
|
||||
.Example output
|
||||
+
|
||||
[source,terminal]
|
||||
----
|
||||
NAMESPACE NAME AGE STATE DETAILS
|
||||
@@ -79,7 +79,6 @@ $ oc get cgu -n default cgu-1 -ojsonpath='{.status}' | jq
|
||||
----
|
||||
+
|
||||
.Example output
|
||||
+
|
||||
[source,json]
|
||||
----
|
||||
{
|
||||
@@ -93,12 +92,6 @@ $ oc get cgu -n default cgu-1 -ojsonpath='{.status}' | jq
|
||||
"type": "Progressing"
|
||||
}
|
||||
],
|
||||
"copiedPolicies": [
|
||||
"cgu-policy1-common-cluster-version-policy",
|
||||
"cgu-policy2-common-nto-sub-policy",
|
||||
"cgu-policy3-common-ptp-sub-policy",
|
||||
"cgu-policy4-common-sriov-sub-policy"
|
||||
],
|
||||
"managedPoliciesContent": {
|
||||
"policy1-common-cluster-version-policy": "null",
|
||||
"policy2-common-nto-sub-policy": "[{\"kind\":\"Subscription\",\"name\":\"node-tuning-operator\",\"namespace\":\"openshift-cluster-node-tuning-operator\"}]",
|
||||
@@ -141,9 +134,6 @@ $ oc get cgu -n default cgu-1 -ojsonpath='{.status}' | jq
|
||||
"cgu-policy3-common-ptp-sub-policy",
|
||||
"cgu-policy4-common-sriov-sub-policy"
|
||||
],
|
||||
"precaching": {
|
||||
"spec": {}
|
||||
},
|
||||
"remediationPlan": [
|
||||
[
|
||||
"spoke1",
|
||||
@@ -159,28 +149,6 @@ $ oc get cgu -n default cgu-1 -ojsonpath='{.status}' | jq
|
||||
----
|
||||
<1> The `spec.enable` field in the `ClusterGroupUpgrade` CR is set to `false`.
|
||||
|
||||
.. Check the status of the policies by running the following command:
|
||||
+
|
||||
[source,terminal]
|
||||
----
|
||||
$ oc get policies -A
|
||||
----
|
||||
+
|
||||
.Example output
|
||||
[source,terminal]
|
||||
----
|
||||
NAMESPACE NAME REMEDIATION ACTION COMPLIANCE STATE AGE
|
||||
default cgu-policy1-common-cluster-version-policy enforce 17m <1>
|
||||
default cgu-policy2-common-nto-sub-policy enforce 17m
|
||||
default cgu-policy3-common-ptp-sub-policy enforce 17m
|
||||
default cgu-policy4-common-sriov-sub-policy enforce 17m
|
||||
default policy1-common-cluster-version-policy inform NonCompliant 15h
|
||||
default policy2-common-nto-sub-policy inform NonCompliant 15h
|
||||
default policy3-common-ptp-sub-policy inform NonCompliant 18m
|
||||
default policy4-common-sriov-sub-policy inform NonCompliant 18m
|
||||
----
|
||||
<1> The `spec.remediationAction` field of policies currently applied on the clusters is set to `enforce`. The managed policies in `inform` mode from the `ClusterGroupUpgrade` CR remain in `inform` mode during the update.
|
||||
|
||||
. Change the value of the `spec.enable` field to `true` by running the following command:
|
||||
+
|
||||
[source,terminal]
|
||||
@@ -191,7 +159,7 @@ $ oc --namespace=default patch clustergroupupgrade.ran.openshift.io/cgu-1 \
|
||||
|
||||
.Verification
|
||||
|
||||
. Check the status of the update again by running the following command:
|
||||
. Check the status of the update by running the following command:
|
||||
+
|
||||
[source,terminal]
|
||||
----
|
||||
@@ -199,7 +167,6 @@ $ oc get cgu -n default cgu-1 -ojsonpath='{.status}' | jq
|
||||
----
|
||||
+
|
||||
.Example output
|
||||
+
|
||||
[source,json]
|
||||
----
|
||||
{
|
||||
@@ -210,12 +177,16 @@ $ oc get cgu -n default cgu-1 -ojsonpath='{.status}' | jq
|
||||
"message": "All selected clusters are valid",
|
||||
"reason": "ClusterSelectionCompleted",
|
||||
"status": "True",
|
||||
"type": "ClustersSelected",
|
||||
"type": "ClustersSelected"
|
||||
},
|
||||
{
|
||||
"lastTransitionTime": "2022-02-25T15:33:07Z",
|
||||
"message": "Completed validation",
|
||||
"reason": "ValidationCompleted",
|
||||
"status": "True",
|
||||
"type": "Validated",
|
||||
"type": "Validated"
|
||||
},
|
||||
{
|
||||
"lastTransitionTime": "2022-02-25T15:34:07Z",
|
||||
"message": "Remediating non-compliant policies",
|
||||
"reason": "InProgress",
|
||||
@@ -223,12 +194,6 @@ $ oc get cgu -n default cgu-1 -ojsonpath='{.status}' | jq
|
||||
"type": "Progressing"
|
||||
}
|
||||
],
|
||||
"copiedPolicies": [
|
||||
"cgu-policy1-common-cluster-version-policy",
|
||||
"cgu-policy2-common-nto-sub-policy",
|
||||
"cgu-policy3-common-ptp-sub-policy",
|
||||
"cgu-policy4-common-sriov-sub-policy"
|
||||
],
|
||||
"managedPoliciesContent": {
|
||||
"policy1-common-cluster-version-policy": "null",
|
||||
"policy2-common-nto-sub-policy": "[{\"kind\":\"Subscription\",\"name\":\"node-tuning-operator\",\"namespace\":\"openshift-cluster-node-tuning-operator\"}]",
|
||||
@@ -271,9 +236,6 @@ $ oc get cgu -n default cgu-1 -ojsonpath='{.status}' | jq
|
||||
"cgu-policy3-common-ptp-sub-policy",
|
||||
"cgu-policy4-common-sriov-sub-policy"
|
||||
],
|
||||
"precaching": {
|
||||
"spec": {}
|
||||
},
|
||||
"remediationPlan": [
|
||||
[
|
||||
"spoke1",
|
||||
@@ -286,17 +248,52 @@ $ oc get cgu -n default cgu-1 -ojsonpath='{.status}' | jq
|
||||
],
|
||||
"status": {
|
||||
"currentBatch": 1,
|
||||
"currentBatchStartedAt": "2022-02-25T15:54:16Z",
|
||||
"remediationPlanForBatch": {
|
||||
"spoke1": 0,
|
||||
"spoke2": 1
|
||||
"currentBatchRemediationProgress": {
|
||||
"spoke1": {
|
||||
"policyIndex": 1,
|
||||
"state": "InProgress"
|
||||
},
|
||||
"spoke2": {
|
||||
"policyIndex": 1,
|
||||
"state": "InProgress"
|
||||
}
|
||||
},
|
||||
"currentBatchStartedAt": "2022-02-25T15:54:16Z",
|
||||
"startedAt": "2022-02-25T15:54:16Z"
|
||||
}
|
||||
}
|
||||
----
|
||||
<1> Reflects the update progress of the current batch. Run this command again to receive updated information about the progress.
|
||||
|
||||
. Check the status of the policies by running the following command:
|
||||
+
|
||||
[source,terminal]
|
||||
----
|
||||
oc get policies -A
|
||||
----
|
||||
+
|
||||
.Example output
|
||||
[source,terminal]
|
||||
----
|
||||
NAMESPACE NAME REMEDIATION ACTION COMPLIANCE STATE AGE
|
||||
spoke1 default.policy1-common-cluster-version-policy enforce Compliant 18m
|
||||
spoke1 default.policy2-common-nto-sub-policy enforce NonCompliant 18m
|
||||
spoke2 default.policy1-common-cluster-version-policy enforce Compliant 18m
|
||||
spoke2 default.policy2-common-nto-sub-policy enforce NonCompliant 18m
|
||||
spoke5 default.policy3-common-ptp-sub-policy inform NonCompliant 18m
|
||||
spoke5 default.policy4-common-sriov-sub-policy inform NonCompliant 18m
|
||||
spoke6 default.policy3-common-ptp-sub-policy inform NonCompliant 18m
|
||||
spoke6 default.policy4-common-sriov-sub-policy inform NonCompliant 18m
|
||||
default policy1-common-ptp-sub-policy inform Compliant 18m
|
||||
default policy2-common-sriov-sub-policy inform NonCompliant 18m
|
||||
default policy3-common-ptp-sub-policy inform NonCompliant 18m
|
||||
default policy4-common-sriov-sub-policy inform NonCompliant 18m
|
||||
----
|
||||
+
|
||||
* The `spec.remediationAction` value changes to `enforce` for the child policies applied to the clusters from the current batch.
|
||||
* The `spec.remedationAction` value remains `inform` for the child policies in the rest of the clusters.
|
||||
* After the batch is complete, the `spec.remediationAction` value changes back to `inform` for the enforced child policies.
|
||||
|
||||
. If the policies include Operator subscriptions, you can check the installation progress directly on the single-node cluster.
|
||||
|
||||
.. Export the `KUBECONFIG` file of the single-node cluster you want to check the installation progress for by running the following command:
|
||||
@@ -314,7 +311,6 @@ $ oc get subs -A | grep -i <subscription_name>
|
||||
----
|
||||
+
|
||||
.Example output for `cluster-logging` policy
|
||||
+
|
||||
[source,terminal]
|
||||
----
|
||||
NAMESPACE NAME PACKAGE SOURCE CHANNEL
|
||||
@@ -329,7 +325,6 @@ $ oc get clusterversion
|
||||
----
|
||||
+
|
||||
.Example output
|
||||
+
|
||||
[source,terminal,subs="attributes+"]
|
||||
----
|
||||
NAME VERSION AVAILABLE PROGRESSING SINCE STATUS
|
||||
@@ -351,7 +346,6 @@ $ oc get installplan -n <subscription_namespace>
|
||||
----
|
||||
+
|
||||
.Example output for `cluster-logging` Operator
|
||||
+
|
||||
[source,terminal]
|
||||
----
|
||||
NAMESPACE NAME CSV APPROVAL APPROVED
|
||||
@@ -373,7 +367,6 @@ $ oc get csv -n <operator_namespace>
|
||||
----
|
||||
+
|
||||
.Example output for OpenShift Logging Operator
|
||||
+
|
||||
[source,terminal]
|
||||
----
|
||||
NAME DISPLAY VERSION REPLACES PHASE
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Module included in the following assemblies:
|
||||
// Epic CNF-2600 (CNF-2133) (4.10), Story TELCODOCS-285
|
||||
// * scalability_and_performance/cnf-talm-for-cluster-upgrades.adoc
|
||||
// * edge_computing/cnf-talm-for-cluster-upgrades.adoc
|
||||
|
||||
:_mod-docs-content-type: PROCEDURE
|
||||
[id="installing-topology-aware-lifecycle-manager-using-cli_{context}"]
|
||||
@@ -12,6 +12,7 @@ You can use the OpenShift CLI (`oc`) to install the {cgu-operator-first}.
|
||||
|
||||
* Install the OpenShift CLI (`oc`).
|
||||
* Install the latest version of the {rh-rhacm} Operator.
|
||||
* {cgu-operator} 4.16 requires {rh-rhacm} 2.9 or later.
|
||||
* Set up a hub cluster with disconnected registry.
|
||||
* Log in as a user with `cluster-admin` privileges.
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Module included in the following assemblies:
|
||||
// Epic CNF-2600 (CNF-2133) (4.10), Story TELCODOCS-285
|
||||
// * scalability_and_performance/cnf-talm-for-cluster-upgrades.adoc
|
||||
// * edge_computing/cnf-talm-for-cluster-upgrades.adoc
|
||||
|
||||
:_mod-docs-content-type: PROCEDURE
|
||||
[id="installing-topology-aware-lifecycle-manager-using-web-console_{context}"]
|
||||
@@ -13,7 +13,8 @@ You can use the {product-title} web console to install the {cgu-operator-full}.
|
||||
// Based on polarion test cases
|
||||
|
||||
* Install the latest version of the {rh-rhacm} Operator.
|
||||
* Set up a hub cluster with disconnected regitry.
|
||||
* {cgu-operator} 4.16 requires {rh-rhacm} 2.9 or later.
|
||||
* Set up a hub cluster with a disconnected registry.
|
||||
* Log in as a user with `cluster-admin` privileges.
|
||||
|
||||
.Procedure
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
// Module included in the following assemblies:
|
||||
// Epic CNF-2600 (CNF-2133) (4.10), Story TELCODOCS-285
|
||||
// * scalability_and_performance/cnf-talm-for-cluster-upgrades.adoc
|
||||
// * edge_computing/cnf-talm-for-cluster-upgrades.adoc
|
||||
|
||||
:_mod-docs-content-type: CONCEPT
|
||||
[id="talo-policies-concept_{context}"]
|
||||
= Update policies on managed clusters
|
||||
|
||||
The {cgu-operator-first} remediates a set of `inform` policies for the clusters specified in the `ClusterGroupUpgrade` CR. {cgu-operator} remediates `inform` policies by making `enforce` copies of the managed {rh-rhacm} policies. Each copied policy has its own corresponding {rh-rhacm} placement rule and {rh-rhacm} placement binding.
|
||||
The {cgu-operator-first} remediates a set of `inform` policies for the clusters specified in the `ClusterGroupUpgrade` custom resource (CR). {cgu-operator} remediates `inform` policies by controlling the `remediationAction` specification in a `Policy` CR through the `bindingOverrides.remediationAction` and `subFilter` specifications in the `PlacementBinding` CR. Each policy has its own corresponding {rh-rhacm} placement rule and {rh-rhacm} placement binding.
|
||||
|
||||
One by one, {cgu-operator} adds each cluster from the current batch to the placement rule that corresponds with the applicable managed policy. If a cluster is already compliant with a policy, {cgu-operator} skips applying that policy on the compliant cluster. {cgu-operator} then moves on to applying the next policy to the non-compliant cluster. After {cgu-operator} completes the updates in a batch, all clusters are removed from the placement rules associated with the copied policies. Then, the update of the next batch starts.
|
||||
One by one, {cgu-operator} adds each cluster from the current batch to the placement rule that corresponds with the applicable managed policy. If a cluster is already compliant with a policy, {cgu-operator} skips applying that policy on the compliant cluster. {cgu-operator} then moves on to applying the next policy to the non-compliant cluster. After {cgu-operator} completes the updates in a batch, all clusters are removed from the placement rules associated with the policies. Then, the update of the next batch starts.
|
||||
|
||||
If a spoke cluster does not report any compliant state to {rh-rhacm}, the managed policies on the hub cluster can be missing status information that {cgu-operator} needs. {cgu-operator} handles these cases in the following ways:
|
||||
|
||||
@@ -16,7 +16,7 @@ If a spoke cluster does not report any compliant state to {rh-rhacm}, the manage
|
||||
* If a policy's `status.status` is missing, {cgu-operator} produces an error.
|
||||
* If a cluster's compliance status is missing in the policy's `status.status` field, {cgu-operator} considers that cluster to be non-compliant with that policy.
|
||||
|
||||
The `ClusterGroupUpgrade` CR's `batchTimeoutAction` determines what happens if an upgrade fails for a cluster. You can specify `continue` to skip the failing cluster and continue to upgrade other clusters, or specify `abort` to stop the policy remediation for all clusters. Once the timeout elapses, {cgu-operator} removes all enforce policies to ensure that no further updates are made to clusters.
|
||||
The `ClusterGroupUpgrade` CR's `batchTimeoutAction` determines what happens if an upgrade fails for a cluster. You can specify `continue` to skip the failing cluster and continue to upgrade other clusters, or specify `abort` to stop the policy remediation for all clusters. Once the timeout elapses, {cgu-operator} removes all the resources it created to ensure that no further updates are made to clusters.
|
||||
|
||||
include::snippets/cnf-example-upgrade-policy.adoc[]
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
// Module included in the following assemblies:
|
||||
//
|
||||
// * scalability_and_performance/cnf-talm-for-cluster-upgrades.adoc
|
||||
// * edge_computing/cnf-talm-for-cluster-upgrades.adoc
|
||||
|
||||
:_mod-docs-content-type: PROCEDURE
|
||||
[id="talo-troubleshooting_{context}"]
|
||||
@@ -368,31 +368,6 @@ $ oc get cgu lab-upgrade -ojsonpath='{.status.conditions}'
|
||||
{"lastTransitionTime":"2022-02-17T22:25:28Z", "message":"Missing managed policies:[policyList]", "reason":"NotAllManagedPoliciesExist", "status":"False", "type":"Validated"}
|
||||
----
|
||||
|
||||
[discrete]
|
||||
=== Checking corresponding copied policies
|
||||
|
||||
Issue:: You want to check if every policy from `status.managedPoliciesForUpgrade` has a corresponding policy in `status.copiedPolicies`.
|
||||
|
||||
Resolution:: Run the following command:
|
||||
+
|
||||
[source,terminal]
|
||||
----
|
||||
$ oc get cgu lab-upgrade -oyaml
|
||||
----
|
||||
+
|
||||
.Example output
|
||||
+
|
||||
[source,yaml]
|
||||
----
|
||||
status:
|
||||
…
|
||||
copiedPolicies:
|
||||
- lab-upgrade-policy3-common-ptp-sub-policy
|
||||
managedPoliciesForUpgrade:
|
||||
- name: policy3-common-ptp-sub-policy
|
||||
namespace: default
|
||||
----
|
||||
|
||||
[discrete]
|
||||
=== Checking if status.remediationPlan was computed
|
||||
|
||||
|
||||
Reference in New Issue
Block a user