From d2b1f4b85f52e3a1b00f150c112ef174251ecb37 Mon Sep 17 00:00:00 2001 From: Alex Dellapenta Date: Thu, 25 Feb 2021 21:39:01 -0700 Subject: [PATCH] Add refreshing Operator sub t'shooting --- modules/olm-refresh-subs.adoc | 107 ++++++++++++++++++ .../olm-deleting-operators-from-cluster.adoc | 1 + .../troubleshooting-operator-issues.adoc | 3 + 3 files changed, 111 insertions(+) create mode 100644 modules/olm-refresh-subs.adoc diff --git a/modules/olm-refresh-subs.adoc b/modules/olm-refresh-subs.adoc new file mode 100644 index 0000000000..7fc7cf636c --- /dev/null +++ b/modules/olm-refresh-subs.adoc @@ -0,0 +1,107 @@ +// Module included in the following assemblies: +// +// * support/troubleshooting/troubleshooting-operator-issues.adoc + +[id="olm-refresh-subs_{context}"] += Refreshing failing subscriptions + +In Operator Lifecycle Manager (OLM), if you subscribe to an Operator that references images that are not accessible on your network, you can find jobs in the `openshift-marketplace` namespace that are failing with the following errors: + +.Example output +[source,terminal] +---- +ImagePullBackOff for +Back-off pulling image "example.com/openshift4/ose-elasticsearch-operator-bundle@sha256:6d2587129c846ec28d384540322b40b05833e7e00b25cca584e004af9a1d292e" +---- + +.Example output +[source,terminal] +---- +rpc error: code = Unknown desc = error pinging docker registry example.com: Get "https://example.com/v2/": dial tcp: lookup example.com on 10.0.0.1:53: no such host +---- + +As a result, the subscription is stuck in this failing state and the Operator is unable to install or upgrade. + +You can refresh a failing subscription by deleting the subscription, cluster service version (CSV), and other related objects. After recreating the subscription, OLM then reinstalls the correct version of the Operator. + +.Prerequisites + +* You have a failing subscription that is unable to pull an inaccessible bundle image. +* You have confirmed that the correct bundle image is accessible. + +.Procedure + +. Get the names of the `Subscription` and `ClusterServiceVersion` objects from the namespace where the Operator is installed: ++ +[source,terminal] +---- +$ oc get sub,csv -n +---- ++ +.Example output +[source,terminal] +---- +NAME PACKAGE SOURCE CHANNEL +subscription.operators.coreos.com/elasticsearch-operator elasticsearch-operator redhat-operators 5.0 + +NAME DISPLAY VERSION REPLACES PHASE +clusterserviceversion.operators.coreos.com/elasticsearch-operator.5.0.0-65 OpenShift Elasticsearch Operator 5.0.0-65 Succeeded +---- + +. Delete the subscription: ++ +[source,terminal] +---- +$ oc delete subscription -n +---- + +. Delete the cluster service version: ++ +[source,terminal] +---- +$ oc delete csv -n +---- + +. Get the names of any failing jobs and related config maps in the `openshift-marketplace` namespace: ++ +[source,terminal] +---- +$ oc get job,configmap -n openshift-marketplace +---- ++ +.Example output +[source,terminal] +---- +NAME COMPLETIONS DURATION AGE +job.batch/1de9443b6324e629ddf31fed0a853a121275806170e34c926d69e53a7fcbccb 1/1 26s 9m30s + +NAME DATA AGE +configmap/1de9443b6324e629ddf31fed0a853a121275806170e34c926d69e53a7fcbccb 3 9m30s +---- + +. Delete the job: ++ +[source,terminal] +---- +$ oc delete job -n openshift-marketplace +---- ++ +This ensures pods that try to pull the inaccessible image are not recreated. + +. Delete the config map: ++ +[source,terminal] +---- +$ oc delete configmap -n openshift-marketplace +---- + +. Reinstall the Operator using OperatorHub in the web console. + +.Verification + +* Check that the Operator has been reinstalled successfully: ++ +[source,terminal] +---- +$ oc get sub,csv,installplan -n +---- diff --git a/operators/admin/olm-deleting-operators-from-cluster.adoc b/operators/admin/olm-deleting-operators-from-cluster.adoc index 0868a62621..a43d2a15bf 100644 --- a/operators/admin/olm-deleting-operators-from-cluster.adoc +++ b/operators/admin/olm-deleting-operators-from-cluster.adoc @@ -9,3 +9,4 @@ The following describes how to delete Operators that were previously installed u include::modules/olm-deleting-operators-from-a-cluster-using-web-console.adoc[leveloffset=+1] include::modules/olm-deleting-operators-from-a-cluster-using-cli.adoc[leveloffset=+1] +include::modules/olm-refresh-subs.adoc[leveloffset=+1] diff --git a/support/troubleshooting/troubleshooting-operator-issues.adoc b/support/troubleshooting/troubleshooting-operator-issues.adoc index a5fee9face..4d988047a8 100644 --- a/support/troubleshooting/troubleshooting-operator-issues.adoc +++ b/support/troubleshooting/troubleshooting-operator-issues.adoc @@ -27,3 +27,6 @@ include::modules/gathering-operator-logs.adoc[leveloffset=+1] // Disabling Machine Config Operator from autorebooting include::modules/troubleshooting-disabling-autoreboot-mco.adoc[leveloffset=+1] + +// Refreshing failing subscriptions +include::modules/olm-refresh-subs.adoc[leveloffset=+1]