mirror of
https://github.com/openshift/installer.git
synced 2026-02-05 15:47:14 +01:00
OCPBUGS-4998: Add additional info in wait-for when status is pending-user-action
When the cluster status is installing-pending-user-action the install won't complete. Most likely this is due to an invalid boot disk. When this status is detected also log the host's status_info for hosts that have this status.
This commit is contained in:
@@ -219,6 +219,16 @@ func (czero *Cluster) IsBootstrapComplete() (bool, bool, error) {
|
||||
|
||||
czero.PrintInstallStatus(clusterMetadata)
|
||||
|
||||
// If status indicates pending action, log host info to help pinpoint what is missing
|
||||
if (*clusterMetadata.Status != czero.installHistory.RestAPIPreviousClusterStatus) &&
|
||||
(*clusterMetadata.Status == models.ClusterStatusInstallingPendingUserAction) {
|
||||
for _, host := range clusterMetadata.Hosts {
|
||||
if *host.Status == models.ClusterStatusInstallingPendingUserAction {
|
||||
logrus.Debugf("Host %s %s", host.RequestedHostname, *host.StatusInfo)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if *clusterMetadata.Status == models.ClusterStatusReady {
|
||||
stuck, err := czero.IsClusterStuckInReady()
|
||||
if err != nil {
|
||||
|
||||
@@ -18,7 +18,8 @@ func WaitForBootstrapComplete(cluster *Cluster) error {
|
||||
waitContext, cancel := context.WithTimeout(cluster.Ctx, timeout)
|
||||
defer cancel()
|
||||
|
||||
var lastErr error
|
||||
var lastErrOnExit error
|
||||
var lastErrStr string
|
||||
wait.Until(func() {
|
||||
bootstrap, exitOnErr, err := cluster.IsBootstrapComplete()
|
||||
if bootstrap && err == nil {
|
||||
@@ -28,10 +29,14 @@ func WaitForBootstrapComplete(cluster *Cluster) error {
|
||||
|
||||
if err != nil {
|
||||
if exitOnErr {
|
||||
lastErr = err
|
||||
lastErrOnExit = err
|
||||
cancel()
|
||||
} else {
|
||||
logrus.Info(err)
|
||||
if err.Error() != lastErrStr {
|
||||
logrus.Info(err)
|
||||
lastErrStr = err.Error()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -47,10 +52,10 @@ func WaitForBootstrapComplete(cluster *Cluster) error {
|
||||
|
||||
waitErr := waitContext.Err()
|
||||
if waitErr != nil {
|
||||
if waitErr == context.Canceled && lastErr != nil {
|
||||
return errors.Wrap(lastErr, "bootstrap process returned error")
|
||||
if errors.Is(waitErr, context.Canceled) && lastErrOnExit != nil {
|
||||
return errors.Wrap(lastErrOnExit, "bootstrap process returned error")
|
||||
}
|
||||
if waitErr == context.DeadlineExceeded {
|
||||
if errors.Is(waitErr, context.DeadlineExceeded) {
|
||||
return errors.Wrap(waitErr, "bootstrap process timed out")
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user