1
0
mirror of https://github.com/openshift/openshift-docs.git synced 2026-02-05 12:46:18 +01:00
Files
openshift-docs/modules/dr-hosted-cluster-within-aws-region-backup.adoc
2025-04-11 19:35:21 +00:00

339 lines
11 KiB
Plaintext

// Module included in the following assembly:
//
// * hosted_control_planes/hcp_high_availability/hcp-backup-restore-aws.adoc
:_mod-docs-content-type: PROCEDURE
[id="dr-hosted-cluster-within-aws-region-backup_{context}"]
= Backing up a hosted cluster on {aws-short}
To recover your hosted cluster in your target management cluster, you first need to back up all of the relevant data.
.Procedure
. Create a config map file to declare the source management cluster by entering the following command:
+
[source,terminal]
----
$ oc create configmap mgmt-parent-cluster -n default \
--from-literal=from=${MGMT_CLUSTER_NAME}
----
. Shut down the reconciliation in the hosted cluster and in the node pools by entering the following commands:
+
[source,terminal]
----
$ PAUSED_UNTIL="true"
----
+
[source,terminal]
----
$ oc patch -n ${HC_CLUSTER_NS} hostedclusters/${HC_CLUSTER_NAME} \
-p '{"spec":{"pausedUntil":"'${PAUSED_UNTIL}'"}}' --type=merge
----
+
[source,terminal]
----
$ oc patch -n ${HC_CLUSTER_NS} nodepools/${NODEPOOLS} \
-p '{"spec":{"pausedUntil":"'${PAUSED_UNTIL}'"}}' --type=merge
----
+
[source,terminal]
----
$ oc scale deployment -n ${HC_CLUSTER_NS}-${HC_CLUSTER_NAME} --replicas=0 \
kube-apiserver openshift-apiserver openshift-oauth-apiserver control-plane-operator
----
. Back up etcd and upload the data to an S3 bucket by running the following bash script:
+
[TIP]
====
Wrap this script in a function and call it from the main function.
====
+
[source,terminal]
----
# ETCD Backup
ETCD_PODS="etcd-0"
if [ "${CONTROL_PLANE_AVAILABILITY_POLICY}" = "HighlyAvailable" ]; then
ETCD_PODS="etcd-0 etcd-1 etcd-2"
fi
for POD in ${ETCD_PODS}; do
# Create an etcd snapshot
oc exec -it ${POD} -n ${HC_CLUSTER_NS}-${HC_CLUSTER_NAME} -- env ETCDCTL_API=3 /usr/bin/etcdctl --cacert /etc/etcd/tls/client/etcd-client-ca.crt --cert /etc/etcd/tls/client/etcd-client.crt --key /etc/etcd/tls/client/etcd-client.key --endpoints=localhost:2379 snapshot save /var/lib/data/snapshot.db
oc exec -it ${POD} -n ${HC_CLUSTER_NS}-${HC_CLUSTER_NAME} -- env ETCDCTL_API=3 /usr/bin/etcdctl -w table snapshot status /var/lib/data/snapshot.db
FILEPATH="/${BUCKET_NAME}/${HC_CLUSTER_NAME}-${POD}-snapshot.db"
CONTENT_TYPE="application/x-compressed-tar"
DATE_VALUE=`date -R`
SIGNATURE_STRING="PUT\n\n${CONTENT_TYPE}\n${DATE_VALUE}\n${FILEPATH}"
set +x
ACCESS_KEY=$(grep aws_access_key_id ${AWS_CREDS} | head -n1 | cut -d= -f2 | sed "s/ //g")
SECRET_KEY=$(grep aws_secret_access_key ${AWS_CREDS} | head -n1 | cut -d= -f2 | sed "s/ //g")
SIGNATURE_HASH=$(echo -en ${SIGNATURE_STRING} | openssl sha1 -hmac "${SECRET_KEY}" -binary | base64)
set -x
# FIXME: this is pushing to the OIDC bucket
oc exec -it etcd-0 -n ${HC_CLUSTER_NS}-${HC_CLUSTER_NAME} -- curl -X PUT -T "/var/lib/data/snapshot.db" \
-H "Host: ${BUCKET_NAME}.s3.amazonaws.com" \
-H "Date: ${DATE_VALUE}" \
-H "Content-Type: ${CONTENT_TYPE}" \
-H "Authorization: AWS ${ACCESS_KEY}:${SIGNATURE_HASH}" \
https://${BUCKET_NAME}.s3.amazonaws.com/${HC_CLUSTER_NAME}-${POD}-snapshot.db
done
----
+
For more information about backing up etcd, see "Backing up and restoring etcd on a hosted cluster".
. Back up Kubernetes and {product-title} objects by entering the following commands. You need to back up the following objects:
* `HostedCluster` and `NodePool` objects from the HostedCluster namespace
* `HostedCluster` secrets from the HostedCluster namespace
* `HostedControlPlane` from the Hosted Control Plane namespace
* `Cluster` from the Hosted Control Plane namespace
* `AWSCluster`, `AWSMachineTemplate`, and `AWSMachine` from the Hosted Control Plane namespace
* `MachineDeployments`, `MachineSets`, and `Machines` from the Hosted Control Plane namespace
* `ControlPlane` secrets from the Hosted Control Plane namespace
+
.. Enter the following commands:
+
[source,terminal]
----
$ mkdir -p ${BACKUP_DIR}/namespaces/${HC_CLUSTER_NS} \
${BACKUP_DIR}/namespaces/${HC_CLUSTER_NS}-${HC_CLUSTER_NAME}
----
+
[source,terminal]
----
$ chmod 700 ${BACKUP_DIR}/namespaces/
----
+
.. Back up the `HostedCluster` objects from the `HostedCluster` namespace by entering the following commands:
+
[source,terminal]
----
$ echo "Backing Up HostedCluster Objects:"
----
+
[source,terminal]
----
$ oc get hc ${HC_CLUSTER_NAME} -n ${HC_CLUSTER_NS} -o yaml > \
${BACKUP_DIR}/namespaces/${HC_CLUSTER_NS}/hc-${HC_CLUSTER_NAME}.yaml
----
+
[source,terminal]
----
$ echo "--> HostedCluster"
----
+
[source,terminal]
----
$ sed -i '' -e '/^status:$/,$d' \
${BACKUP_DIR}/namespaces/${HC_CLUSTER_NS}/hc-${HC_CLUSTER_NAME}.yaml
----
+
.. Back up the `NodePool` objects from the `HostedCluster` namespace by entering the following commands:
+
[source,terminal]
----
$ oc get np ${NODEPOOLS} -n ${HC_CLUSTER_NS} -o yaml > \
${BACKUP_DIR}/namespaces/${HC_CLUSTER_NS}/np-${NODEPOOLS}.yaml
----
+
[source,terminal]
----
$ echo "--> NodePool"
----
+
[source,terminal]
----
$ sed -i '' -e '/^status:$/,$ d' \
${BACKUP_DIR}/namespaces/${HC_CLUSTER_NS}/np-${NODEPOOLS}.yaml
----
+
.. Back up the secrets in the `HostedCluster` namespace by running the following shell script:
+
[source,terminal]
----
$ echo "--> HostedCluster Secrets:"
for s in $(oc get secret -n ${HC_CLUSTER_NS} | grep "^${HC_CLUSTER_NAME}" | awk '{print $1}'); do
oc get secret -n ${HC_CLUSTER_NS} $s -o yaml > ${BACKUP_DIR}/namespaces/${HC_CLUSTER_NS}/secret-${s}.yaml
done
----
+
.. Back up the secrets in the `HostedCluster` control plane namespace by running the following shell script:
+
[source,terminal]
----
$ echo "--> HostedCluster ControlPlane Secrets:"
for s in $(oc get secret -n ${HC_CLUSTER_NS}-${HC_CLUSTER_NAME} | egrep -v "docker|service-account-token|oauth-openshift|NAME|token-${HC_CLUSTER_NAME}" | awk '{print $1}'); do
oc get secret -n ${HC_CLUSTER_NS}-${HC_CLUSTER_NAME} $s -o yaml > ${BACKUP_DIR}/namespaces/${HC_CLUSTER_NS}-${HC_CLUSTER_NAME}/secret-${s}.yaml
done
----
+
.. Back up the hosted control plane by entering the following commands:
+
[source,terminal]
----
$ echo "--> HostedControlPlane:"
----
+
[source,terminal]
----
$ oc get hcp ${HC_CLUSTER_NAME} -n ${HC_CLUSTER_NS}-${HC_CLUSTER_NAME} -o yaml > \
${BACKUP_DIR}/namespaces/${HC_CLUSTER_NS}-${HC_CLUSTER_NAME}/hcp-${HC_CLUSTER_NAME}.yaml
----
+
.. Back up the cluster by entering the following commands:
+
[source,terminal]
----
$ echo "--> Cluster:"
----
+
[source,terminal]
----
$ CL_NAME=$(oc get hcp ${HC_CLUSTER_NAME} -n ${HC_CLUSTER_NS}-${HC_CLUSTER_NAME} \
-o jsonpath={.metadata.labels.\*} | grep ${HC_CLUSTER_NAME})
----
+
[source,terminal]
----
$ oc get cluster ${CL_NAME} -n ${HC_CLUSTER_NS}-${HC_CLUSTER_NAME} -o yaml > \
${BACKUP_DIR}/namespaces/${HC_CLUSTER_NS}-${HC_CLUSTER_NAME}/cl-${HC_CLUSTER_NAME}.yaml
----
+
.. Back up the {aws-short} cluster by entering the following commands:
+
[source,terminal]
----
$ echo "--> AWS Cluster:"
----
+
[source,terminal]
----
$ oc get awscluster ${HC_CLUSTER_NAME} -n ${HC_CLUSTER_NS}-${HC_CLUSTER_NAME} -o yaml > \
${BACKUP_DIR}/namespaces/${HC_CLUSTER_NS}-${HC_CLUSTER_NAME}/awscl-${HC_CLUSTER_NAME}.yaml
----
+
.. Back up the {aws-short} `MachineTemplate` objects by entering the following commands:
+
[source,terminal]
----
$ echo "--> AWS Machine Template:"
----
+
[source,terminal]
----
$ oc get awsmachinetemplate ${NODEPOOLS} -n ${HC_CLUSTER_NS}-${HC_CLUSTER_NAME} -o yaml > \
${BACKUP_DIR}/namespaces/${HC_CLUSTER_NS}-${HC_CLUSTER_NAME}/awsmt-${HC_CLUSTER_NAME}.yaml
----
+
.. Back up the {aws-short} `Machines` objects by running the following shell script:
+
[source,terminal]
----
$ echo "--> AWS Machine:"
----
+
[source,terminal]
----
$ CL_NAME=$(oc get hcp ${HC_CLUSTER_NAME} -n ${HC_CLUSTER_NS}-${HC_CLUSTER_NAME} -o jsonpath={.metadata.labels.\*} | grep ${HC_CLUSTER_NAME})
for s in $(oc get awsmachines -n ${HC_CLUSTER_NS}-${HC_CLUSTER_NAME} --no-headers | grep ${CL_NAME} | cut -f1 -d\ ); do
oc get -n ${HC_CLUSTER_NS}-${HC_CLUSTER_NAME} awsmachines $s -o yaml > ${BACKUP_DIR}/namespaces/${HC_CLUSTER_NS}-${HC_CLUSTER_NAME}/awsm-${s}.yaml
done
----
+
.. Back up the `MachineDeployments` objects by running the following shell script:
+
[source,terminal]
----
$ echo "--> HostedCluster MachineDeployments:"
for s in $(oc get machinedeployment -n ${HC_CLUSTER_NS}-${HC_CLUSTER_NAME} -o name); do
mdp_name=$(echo ${s} | cut -f 2 -d /)
oc get -n ${HC_CLUSTER_NS}-${HC_CLUSTER_NAME} $s -o yaml > ${BACKUP_DIR}/namespaces/${HC_CLUSTER_NS}-${HC_CLUSTER_NAME}/machinedeployment-${mdp_name}.yaml
done
----
+
.. Back up the `MachineSets` objects by running the following shell script:
+
[source,terminal]
----
$ echo "--> HostedCluster MachineSets:"
for s in $(oc get machineset -n ${HC_CLUSTER_NS}-${HC_CLUSTER_NAME} -o name); do
ms_name=$(echo ${s} | cut -f 2 -d /)
oc get -n ${HC_CLUSTER_NS}-${HC_CLUSTER_NAME} $s -o yaml > ${BACKUP_DIR}/namespaces/${HC_CLUSTER_NS}-${HC_CLUSTER_NAME}/machineset-${ms_name}.yaml
done
----
+
.. Back up the `Machines` objects from the Hosted Control Plane namespace by running the following shell script:
+
[source,terminal]
----
$ echo "--> HostedCluster Machine:"
for s in $(oc get machine -n ${HC_CLUSTER_NS}-${HC_CLUSTER_NAME} -o name); do
m_name=$(echo ${s} | cut -f 2 -d /)
oc get -n ${HC_CLUSTER_NS}-${HC_CLUSTER_NAME} $s -o yaml > ${BACKUP_DIR}/namespaces/${HC_CLUSTER_NS}-${HC_CLUSTER_NAME}/machine-${m_name}.yaml
done
----
. Clean up the `ControlPlane` routes by entering the following command:
+
[source,terminal]
----
$ oc delete routes -n ${HC_CLUSTER_NS}-${HC_CLUSTER_NAME} --all
----
+
By entering that command, you enable the ExternalDNS Operator to delete the Route53 entries.
. Verify that the Route53 entries are clean by running the following script:
+
[source,terminal]
----
function clean_routes() {
if [[ -z "${1}" ]];then
echo "Give me the NS where to clean the routes"
exit 1
fi
# Constants
if [[ -z "${2}" ]];then
echo "Give me the Route53 zone ID"
exit 1
fi
ZONE_ID=${2}
ROUTES=10
timeout=40
count=0
# This allows us to remove the ownership in the AWS for the API route
oc delete route -n ${1} --all
while [ ${ROUTES} -gt 2 ]
do
echo "Waiting for ExternalDNS Operator to clean the DNS Records in AWS Route53 where the zone id is: ${ZONE_ID}..."
echo "Try: (${count}/${timeout})"
sleep 10
if [[ $count -eq timeout ]];then
echo "Timeout waiting for cleaning the Route53 DNS records"
exit 1
fi
count=$((count+1))
ROUTES=$(aws route53 list-resource-record-sets --hosted-zone-id ${ZONE_ID} --max-items 10000 --output json | grep -c ${EXTERNAL_DNS_DOMAIN})
done
}
# SAMPLE: clean_routes "<HC ControlPlane Namespace>" "<AWS_ZONE_ID>"
clean_routes "${HC_CLUSTER_NS}-${HC_CLUSTER_NAME}" "${AWS_ZONE_ID}"
----
.Verification
Check all of the {product-title} objects and the S3 bucket to verify that everything looks as expected.
.Next steps
Restore your hosted cluster.