From 21b5a9fbb23b5fc8e697fa15e2344112593fab2d Mon Sep 17 00:00:00 2001 From: Bob Fournier Date: Mon, 3 Apr 2023 17:02:42 -0400 Subject: [PATCH] OCPBUGS-4998: Add additional info in wait-for when status is pending-user-action When the cluster status is installing-pending-user-action the install won't complete. Most likely this is due to an invalid boot disk. When this status is detected also log the host's status_info for hosts that have this status. --- pkg/agent/cluster.go | 10 ++++++++++ pkg/agent/waitfor.go | 15 ++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/pkg/agent/cluster.go b/pkg/agent/cluster.go index 057cc63b19..43f19cff81 100644 --- a/pkg/agent/cluster.go +++ b/pkg/agent/cluster.go @@ -219,6 +219,16 @@ func (czero *Cluster) IsBootstrapComplete() (bool, bool, error) { czero.PrintInstallStatus(clusterMetadata) + // If status indicates pending action, log host info to help pinpoint what is missing + if (*clusterMetadata.Status != czero.installHistory.RestAPIPreviousClusterStatus) && + (*clusterMetadata.Status == models.ClusterStatusInstallingPendingUserAction) { + for _, host := range clusterMetadata.Hosts { + if *host.Status == models.ClusterStatusInstallingPendingUserAction { + logrus.Debugf("Host %s %s", host.RequestedHostname, *host.StatusInfo) + } + } + } + if *clusterMetadata.Status == models.ClusterStatusReady { stuck, err := czero.IsClusterStuckInReady() if err != nil { diff --git a/pkg/agent/waitfor.go b/pkg/agent/waitfor.go index aa3df6e870..00112dc725 100644 --- a/pkg/agent/waitfor.go +++ b/pkg/agent/waitfor.go @@ -18,7 +18,8 @@ func WaitForBootstrapComplete(cluster *Cluster) error { waitContext, cancel := context.WithTimeout(cluster.Ctx, timeout) defer cancel() - var lastErr error + var lastErrOnExit error + var lastErrStr string wait.Until(func() { bootstrap, exitOnErr, err := cluster.IsBootstrapComplete() if bootstrap && err == nil { @@ -28,10 +29,14 @@ func WaitForBootstrapComplete(cluster *Cluster) error { if err != nil { if exitOnErr { - lastErr = err + lastErrOnExit = err cancel() } else { logrus.Info(err) + if err.Error() != lastErrStr { + logrus.Info(err) + lastErrStr = err.Error() + } } } @@ -47,10 +52,10 @@ func WaitForBootstrapComplete(cluster *Cluster) error { waitErr := waitContext.Err() if waitErr != nil { - if waitErr == context.Canceled && lastErr != nil { - return errors.Wrap(lastErr, "bootstrap process returned error") + if errors.Is(waitErr, context.Canceled) && lastErrOnExit != nil { + return errors.Wrap(lastErrOnExit, "bootstrap process returned error") } - if waitErr == context.DeadlineExceeded { + if errors.Is(waitErr, context.DeadlineExceeded) { return errors.Wrap(waitErr, "bootstrap process timed out") } }