From a4c1da6553049e201da668ac0054fbd6f83628fa Mon Sep 17 00:00:00 2001 From: Andrea Hoffer Date: Wed, 30 Sep 2020 15:50:03 -0400 Subject: [PATCH] BZ-1882176: Adding steps to remove the secrets from the old etcd member --- ...tore-replace-crashlooping-etcd-member.adoc | 50 +++++++++++++++++++ .../restore-replace-stopped-etcd-member.adoc | 45 +++++++++++++++++ 2 files changed, 95 insertions(+) diff --git a/modules/restore-replace-crashlooping-etcd-member.adoc b/modules/restore-replace-crashlooping-etcd-member.adoc index fd3248381b..7bd7305136 100644 --- a/modules/restore-replace-crashlooping-etcd-member.adoc +++ b/modules/restore-replace-crashlooping-etcd-member.adoc @@ -44,6 +44,11 @@ sh-4.2# chroot /host + [source,terminal] ---- +sh-4.2# mkdir /var/lib/etcd-backup +---- ++ +[source,terminal] +---- sh-4.2# mv /etc/kubernetes/manifests/etcd-pod.yaml /var/lib/etcd-backup/ ---- @@ -102,6 +107,8 @@ sh-4.2# etcdctl member list -w table | d022e10b498760d5 | started | ip-10-0-154-204.ec2.internal | https://10.0.154.204:2380 | https://10.0.154.204:2379 | +------------------+---------+------------------------------+---------------------------+---------------------------+ ---- ++ +Take note of the ID and the name of the unhealthy etcd member, because these values are needed later in the procedure. .. Remove the unhealthy etcd member by providing the ID to the `etcdctl member remove` command: + @@ -136,6 +143,49 @@ sh-4.2# etcdctl member list -w table + You can now exit the node shell. +. Remove the old secrets for the unhealthy etcd member that was removed. + +.. List the secrets for the unhealthy etcd member that was removed. ++ +[source,terminal] +---- +$ oc get secrets -n openshift-etcd | grep ip-10-0-131-183.ec2.internal <1> +---- +<1> Pass in the name of the unhealthy etcd member that you took note of earlier in this procedure. ++ +There is a peer, serving, and metrics secret as shown in the following output: ++ +.Example output +[source,terminal] +---- +etcd-peer-ip-10-0-131-183.ec2.internal kubernetes.io/tls 2 47m +etcd-serving-ip-10-0-131-183.ec2.internal kubernetes.io/tls 2 47m +etcd-serving-metrics-ip-10-0-131-183.ec2.internal kubernetes.io/tls 2 47m +---- + +.. Delete the secrets for the unhealthy etcd member that was removed. + +... Delete the peer secret: ++ +[source,terminal] +---- +$ oc delete secret -n openshift-etcd etcd-peer-ip-10-0-131-183.ec2.internal +---- + +... Delete the serving secret: ++ +[source,terminal] +---- +$ oc delete secret -n openshift-etcd etcd-serving-ip-10-0-131-183.ec2.internal +---- + +... Delete the metrics secret: ++ +[source,terminal] +---- +$ oc delete secret -n etcd-serving-metrics-ip-10-0-131-183.ec2.internal +---- + . Force etcd redeployment. + In a terminal that has access to the cluster as a `cluster-admin` user, run the following command: diff --git a/modules/restore-replace-stopped-etcd-member.adoc b/modules/restore-replace-stopped-etcd-member.adoc index 4e92251ccc..32e0f017b9 100644 --- a/modules/restore-replace-stopped-etcd-member.adoc +++ b/modules/restore-replace-stopped-etcd-member.adoc @@ -67,6 +67,8 @@ sh-4.2# etcdctl member list -w table | ca8c2990a0aa29d1 | started | ip-10-0-154-204.ec2.internal | https://10.0.154.204:2380 | https://10.0.154.204:2379 | +------------------+---------+------------------------------+---------------------------+---------------------------+ ---- ++ +Take note of the ID and the name of the unhealthy etcd member, because these values are needed later in the procedure. .. Remove the unhealthy etcd member by providing the ID to the `etcdctl member remove` command: + @@ -101,6 +103,49 @@ sh-4.2# etcdctl member list -w table + You can now exit the node shell. +. Remove the old secrets for the unhealthy etcd member that was removed. + +.. List the secrets for the unhealthy etcd member that was removed. ++ +[source,terminal] +---- +$ oc get secrets -n openshift-etcd | grep ip-10-0-131-183.ec2.internal <1> +---- +<1> Pass in the name of the unhealthy etcd member that you took note of earlier in this procedure. ++ +There is a peer, serving, and metrics secret as shown in the following output: ++ +.Example output +[source,terminal] +---- +etcd-peer-ip-10-0-131-183.ec2.internal kubernetes.io/tls 2 47m +etcd-serving-ip-10-0-131-183.ec2.internal kubernetes.io/tls 2 47m +etcd-serving-metrics-ip-10-0-131-183.ec2.internal kubernetes.io/tls 2 47m +---- + +.. Delete the secrets for the unhealthy etcd member that was removed. + +... Delete the peer secret: ++ +[source,terminal] +---- +$ oc delete secret -n openshift-etcd etcd-peer-ip-10-0-131-183.ec2.internal +---- + +... Delete the serving secret: ++ +[source,terminal] +---- +$ oc delete secret -n openshift-etcd etcd-serving-ip-10-0-131-183.ec2.internal +---- + +... Delete the metrics secret: ++ +[source,terminal] +---- +$ oc delete secret -n etcd-serving-metrics-ip-10-0-131-183.ec2.internal +---- + . Delete and recreate the master machine. After this machine is recreated, a new revision is forced and etcd scales up automatically. + If you are running installer-provisioned infrastructure, or you used the Machine API to create your machines, follow these steps. Otherwise, you must create the new master using the same method that was used to originally create it.