1
0
mirror of https://github.com/openshift/installer.git synced 2026-02-06 00:48:45 +01:00
Files
installer/pkg/infrastructure/clusterapi/clusterapi.go
Patrick Dillon 1071658bf1 nolint deprecated capi v1beta1
Upgrading from capi v1beta1 -> v1beta2 will take a not
insignificant amount of work. I have captured that work in
https://issues.redhat.com/browse/CORS-3563
and set nolint to disable the linters from failing on this package.
2025-11-11 16:19:45 -05:00

748 lines
26 KiB
Go

package clusterapi
import (
"context"
"errors"
"fmt"
"os"
"path/filepath"
"time"
"github.com/sirupsen/logrus"
"gopkg.in/yaml.v2"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/utils/ptr"
clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta1" //nolint:staticcheck //CORS-3563
"sigs.k8s.io/cluster-api/util"
utilkubeconfig "sigs.k8s.io/cluster-api/util/kubeconfig"
"sigs.k8s.io/controller-runtime/pkg/client"
"github.com/openshift/installer/pkg/asset"
"github.com/openshift/installer/pkg/asset/cluster/metadata"
"github.com/openshift/installer/pkg/asset/cluster/tfvars"
"github.com/openshift/installer/pkg/asset/ignition/bootstrap"
"github.com/openshift/installer/pkg/asset/ignition/machine"
"github.com/openshift/installer/pkg/asset/installconfig"
"github.com/openshift/installer/pkg/asset/kubeconfig"
"github.com/openshift/installer/pkg/asset/machines"
"github.com/openshift/installer/pkg/asset/manifests"
"github.com/openshift/installer/pkg/asset/manifests/capiutils"
capimanifests "github.com/openshift/installer/pkg/asset/manifests/clusterapi"
"github.com/openshift/installer/pkg/asset/rhcos"
"github.com/openshift/installer/pkg/asset/tls"
"github.com/openshift/installer/pkg/clusterapi"
"github.com/openshift/installer/pkg/infrastructure"
"github.com/openshift/installer/pkg/metrics/timer"
"github.com/openshift/installer/pkg/types"
)
// Ensure that clusterapi.InfraProvider implements
// the infrastructure.Provider interface, which is the
// interface the installer uses to call this provider.
var _ infrastructure.Provider = (*InfraProvider)(nil)
const (
preProvisionStage = "Infrastructure Pre-provisioning"
infrastructureStage = "Network-infrastructure Provisioning"
infrastructureReadyStage = "Post-network, pre-machine Provisioning"
ignitionStage = "Bootstrap Ignition Provisioning"
machineStage = "Machine Provisioning"
postProvisionStage = "Infrastructure Post-provisioning"
)
// InfraProvider implements common Cluster API logic and
// contains the platform CAPI provider, which is called
// in the lifecycle defined by the Provider interface.
type InfraProvider struct {
impl Provider
appliedManifests []client.Object
}
// InitializeProvider returns a ClusterAPI provider implementation
// for a specific cloud platform.
func InitializeProvider(platform Provider) infrastructure.Provider {
return &InfraProvider{impl: platform}
}
// Provision creates cluster resources by applying CAPI manifests to a locally running control plane.
//
//nolint:gocyclo
func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset.Parents) (fileList []*asset.File, err error) {
manifestsAsset := &manifests.Manifests{}
workersAsset := &machines.Worker{}
capiManifestsAsset := &capimanifests.Cluster{}
capiMachinesAsset := &machines.ClusterAPI{}
clusterKubeconfigAsset := &kubeconfig.AdminClient{}
clusterID := &installconfig.ClusterID{}
installConfig := &installconfig.InstallConfig{}
rhcosImage := new(rhcos.Image)
bootstrapIgnAsset := &bootstrap.Bootstrap{}
masterIgnAsset := &machine.Master{}
workerIgnAsset := &machine.Worker{}
tfvarsAsset := &tfvars.TerraformVariables{}
rootCA := &tls.RootCA{}
parents.Get(
manifestsAsset,
workersAsset,
capiManifestsAsset,
clusterKubeconfigAsset,
clusterID,
installConfig,
rhcosImage,
bootstrapIgnAsset,
masterIgnAsset,
workerIgnAsset,
capiMachinesAsset,
tfvarsAsset,
rootCA,
)
var capiClusters []*clusterv1.Cluster
// Collect cluster and non-machine-related infra manifests
// to be applied during the initial stage.
infraManifests := []client.Object{}
for _, m := range capiManifestsAsset.RuntimeFiles() {
// Check for cluster definition so that we can collect the names.
if cluster, ok := m.Object.(*clusterv1.Cluster); ok {
capiClusters = append(capiClusters, cluster)
}
infraManifests = append(infraManifests, m.Object)
}
// Machine manifests will be applied after the infra
// manifests and subsequent hooks.
machineManifests := []client.Object{}
for _, m := range capiMachinesAsset.RuntimeFiles() {
machineManifests = append(machineManifests, m.Object)
}
if p, ok := i.impl.(PreProvider); ok {
preProvisionInput := PreProvisionInput{
InfraID: clusterID.InfraID,
InstallConfig: installConfig,
RhcosImage: rhcosImage,
ManifestsAsset: manifestsAsset,
MachineManifests: machineManifests,
WorkersAsset: workersAsset,
}
timer.StartTimer(preProvisionStage)
if err := p.PreProvision(ctx, preProvisionInput); err != nil {
return fileList, fmt.Errorf("failed during pre-provisioning: %w", err)
}
timer.StopTimer(preProvisionStage)
} else {
logrus.Debugf("No pre-provisioning requirements for the %s provider", i.impl.Name())
}
// If we're skipping bootstrap destroy, shutdown the local control plane.
// Otherwise, we will shut it down after bootstrap destroy.
// This has to execute as the last defer in the stack since previous defers might still need the local controlplane.
if oi, ok := os.LookupEnv("OPENSHIFT_INSTALL_PRESERVE_BOOTSTRAP"); ok && oi != "" {
defer func() {
logrus.Warn("OPENSHIFT_INSTALL_PRESERVE_BOOTSTRAP is set, shutting down local control plane.")
clusterapi.System().Teardown()
}()
}
// Make sure to always return generated manifests, even if errors happened
defer func(ctx context.Context) {
var errs []error
// Overriding the named return with the generated list
fileList, errs = i.collectManifests(ctx, clusterapi.System().Client())
// If Provision returned an error, add it to the list
if err != nil {
clusterapi.System().CleanEtcd()
errs = append(errs, err)
}
err = utilerrors.NewAggregate(errs)
}(ctx)
// Run the CAPI system.
timer.StartTimer(infrastructureStage)
capiSystem := clusterapi.System()
if err := capiSystem.Run(ctx); err != nil {
return fileList, fmt.Errorf("failed to run cluster api system: %w", err)
}
// Grab the client.
cl := capiSystem.Client()
i.appliedManifests = []client.Object{}
// Create the infra manifests.
logrus.Info("Creating infra manifests...")
for _, m := range infraManifests {
m.SetNamespace(capiutils.Namespace)
if err := cl.Create(ctx, m); err != nil {
return fileList, fmt.Errorf("failed to create infrastructure manifest: %w", err)
}
i.appliedManifests = append(i.appliedManifests, m)
logrus.Infof("Created manifest %+T, namespace=%s name=%s", m, m.GetNamespace(), m.GetName())
}
logrus.Info("Done creating infra manifests")
// Pass cluster kubeconfig and store it in; this is usually the role of a bootstrap provider.
for _, capiCluster := range capiClusters {
logrus.Infof("Creating kubeconfig entry for capi cluster %v", capiCluster.GetName())
key := client.ObjectKey{
Name: capiCluster.GetName(),
Namespace: capiutils.Namespace,
}
cluster := &clusterv1.Cluster{}
if err := cl.Get(ctx, key, cluster); err != nil {
return fileList, err
}
// Create the secret.
clusterKubeconfig := clusterKubeconfigAsset.Files()[0].Data
secret := generateSecret(cluster, clusterKubeconfig)
if err := cl.Create(ctx, secret); err != nil {
return fileList, err
}
}
var networkTimeout = 15 * time.Minute
if p, ok := i.impl.(Timeouts); ok {
networkTimeout = p.NetworkTimeout()
}
// Wait for successful provisioning by checking the InfrastructureReady
// status on the cluster object.
untilTime := time.Now().Add(networkTimeout)
timezone, _ := untilTime.Zone()
logrus.Infof("Waiting up to %v (until %v %s) for network infrastructure to become ready...", networkTimeout, untilTime.Format(time.Kitchen), timezone)
{
if err := wait.PollUntilContextTimeout(ctx, 15*time.Second, networkTimeout, true,
func(ctx context.Context) (bool, error) {
for _, capiCluster := range capiClusters {
if err := cl.Get(ctx, client.ObjectKey{
Name: capiCluster.GetName(),
Namespace: capiutils.Namespace,
}, capiCluster); err != nil {
if apierrors.IsNotFound(err) {
return false, nil
}
return false, err
}
}
for _, capiCluster := range capiClusters {
if !capiCluster.Status.InfrastructureReady {
return false, nil
}
}
return true, nil
}); err != nil {
// Attempt to find and report falsy conditions in infra cluster if any.
if len(capiClusters) > 0 {
warnIfFalsyInfraConditions(ctx, capiClusters[0].Spec.InfrastructureRef, cl)
}
if wait.Interrupted(err) {
return fileList, fmt.Errorf("infrastructure was not ready within %v", networkTimeout)
}
return fileList, fmt.Errorf("infrastructure is not ready: %w", err)
}
if len(capiClusters) == 0 {
return fileList, fmt.Errorf("error occurred during load balancer ready check")
}
if capiClusters[0].Spec.ControlPlaneEndpoint.Host == "" {
return fileList, fmt.Errorf("control plane endpoint is not set")
}
}
timer.StopTimer(infrastructureStage)
logrus.Info("Network infrastructure is ready")
if p, ok := i.impl.(InfraReadyProvider); ok {
infraReadyInput := InfraReadyInput{
Client: cl,
InstallConfig: installConfig,
InfraID: clusterID.InfraID,
}
timer.StartTimer(infrastructureReadyStage)
if err := p.InfraReady(ctx, infraReadyInput); err != nil {
return fileList, fmt.Errorf("failed provisioning resources after infrastructure ready: %w", err)
}
timer.StopTimer(infrastructureReadyStage)
} else {
logrus.Debugf("No infrastructure ready requirements for the %s provider", i.impl.Name())
}
masterIgnData := masterIgnAsset.Files()[0].Data
bootstrapIgnData, err := injectInstallInfo(bootstrapIgnAsset.Files()[0].Data)
if err != nil {
return fileList, fmt.Errorf("unable to inject installation info: %w", err)
}
workerIgnData := workerIgnAsset.Files()[0].Data
ignitionSecrets := []*corev1.Secret{}
// The cloud-platform may need to override the default
// bootstrap ignition behavior.
if p, ok := i.impl.(IgnitionProvider); ok {
ignInput := IgnitionInput{
Client: cl,
BootstrapIgnData: bootstrapIgnData,
MasterIgnData: masterIgnData,
WorkerIgnData: workerIgnData,
InfraID: clusterID.InfraID,
InstallConfig: installConfig,
TFVarsAsset: tfvarsAsset,
RootCA: rootCA,
}
timer.StartTimer(ignitionStage)
if ignitionSecrets, err = p.Ignition(ctx, ignInput); err != nil {
return fileList, fmt.Errorf("failed preparing ignition data: %w", err)
}
timer.StopTimer(ignitionStage)
} else {
logrus.Debugf("Using default ignition for the %s provider", i.impl.Name())
bootstrapIgnSecret := IgnitionSecret(bootstrapIgnData, clusterID.InfraID, "bootstrap")
masterIgnSecret := IgnitionSecret(masterIgnData, clusterID.InfraID, "master")
ignitionSecrets = append(ignitionSecrets, bootstrapIgnSecret, masterIgnSecret)
}
for _, secret := range ignitionSecrets {
machineManifests = append(machineManifests, secret)
}
// Create the machine manifests.
timer.StartTimer(machineStage)
machineNames := []string{}
capiMachines := []*clusterv1.Machine{}
for _, m := range machineManifests {
m.SetNamespace(capiutils.Namespace)
if err := cl.Create(ctx, m); err != nil {
return fileList, fmt.Errorf("failed to create control-plane manifest: %w", err)
}
i.appliedManifests = append(i.appliedManifests, m)
if machine, ok := m.(*clusterv1.Machine); ok {
machineNames = append(machineNames, machine.Name)
capiMachines = append(capiMachines, machine)
}
logrus.Infof("Created manifest %+T, namespace=%s name=%s", m, m.GetNamespace(), m.GetName())
}
var provisionTimeout = 15 * time.Minute
if p, ok := i.impl.(Timeouts); ok {
provisionTimeout = p.ProvisionTimeout()
}
{
untilTime := time.Now().Add(provisionTimeout)
timezone, _ := untilTime.Zone()
reqBootstrapPubIP := installConfig.Config.Publish == types.ExternalPublishingStrategy && i.impl.PublicGatherEndpoint() == ExternalIP
logrus.Infof("Waiting up to %v (until %v %s) for machines %v to provision...", provisionTimeout, untilTime.Format(time.Kitchen), timezone, machineNames)
if err := wait.PollUntilContextTimeout(ctx, 15*time.Second, provisionTimeout, true,
func(ctx context.Context) (bool, error) {
allReady := true
for _, machine := range capiMachines {
if err := cl.Get(ctx, client.ObjectKey{
Name: machine.Name,
Namespace: capiutils.Namespace,
}, machine); err != nil {
if apierrors.IsNotFound(err) {
logrus.Debugf("Not found")
return false, nil
}
return false, err
}
reqPubIP := reqBootstrapPubIP && machine.Name == capiutils.GenerateBoostrapMachineName(clusterID.InfraID)
ready, err := checkMachineReady(machine, reqPubIP)
if err != nil {
return false, fmt.Errorf("failed waiting for machines: %w", err)
}
if !ready {
allReady = false
} else {
logrus.Debugf("Machine %s is ready. Phase: %s", machine.Name, machine.Status.Phase)
}
}
return allReady, nil
}); err != nil {
// Attempt to find and report falsy conditions in infra machines if any.
for _, machine := range capiMachines {
if machine != nil {
warnIfFalsyInfraConditions(ctx, &machine.Spec.InfrastructureRef, cl)
}
}
if wait.Interrupted(err) {
return fileList, fmt.Errorf("%s within %v", asset.ControlPlaneCreationError, provisionTimeout)
}
return fileList, fmt.Errorf("%s: machines are not ready: %w", asset.ControlPlaneCreationError, err)
}
}
timer.StopTimer(machineStage)
logrus.Info("Control-plane machines are ready")
if p, ok := i.impl.(PostProvider); ok {
postMachineInput := PostProvisionInput{
Client: cl,
InstallConfig: installConfig,
InfraID: clusterID.InfraID,
}
timer.StartTimer(postProvisionStage)
if err = p.PostProvision(ctx, postMachineInput); err != nil {
return fileList, fmt.Errorf("failed during post-machine creation hook: %w", err)
}
timer.StopTimer(postProvisionStage)
}
logrus.Infof("Cluster API resources have been created. Waiting for cluster to become ready...")
return fileList, nil
}
// DestroyBootstrap destroys the temporary bootstrap resources.
func (i *InfraProvider) DestroyBootstrap(ctx context.Context, dir string) error {
defer clusterapi.System().CleanEtcd()
metadata, err := metadata.Load(dir)
if err != nil {
return err
}
sys := clusterapi.System()
if sys.State() != clusterapi.SystemStateRunning {
if err := sys.Run(ctx); err != nil {
return fmt.Errorf("failed to run capi system: %w", err)
}
}
if p, ok := i.impl.(BootstrapDestroyer); ok {
bootstrapDestoryInput := BootstrapDestroyInput{
Client: sys.Client(),
Metadata: *metadata,
}
if err = p.DestroyBootstrap(ctx, bootstrapDestoryInput); err != nil {
return fmt.Errorf("failed during the destroy bootstrap hook: %w", err)
}
}
machineName := capiutils.GenerateBoostrapMachineName(metadata.InfraID)
machineNamespace := capiutils.Namespace
if err := sys.Client().Delete(ctx, &clusterv1.Machine{
ObjectMeta: metav1.ObjectMeta{
Name: machineName,
Namespace: machineNamespace,
},
}); err != nil {
return fmt.Errorf("failed to delete bootstrap machine: %w", err)
}
machineDeletionTimeout := 5 * time.Minute
logrus.Infof("Waiting up to %v for bootstrap machine deletion %s/%s...", machineDeletionTimeout, machineNamespace, machineName)
cctx, cancel := context.WithTimeout(ctx, machineDeletionTimeout)
wait.UntilWithContext(cctx, func(context.Context) {
err := sys.Client().Get(cctx, client.ObjectKey{
Name: machineName,
Namespace: machineNamespace,
}, &clusterv1.Machine{})
if err != nil {
if apierrors.IsNotFound(err) {
logrus.Debugf("Machine deleted: %s", machineName)
cancel()
} else {
logrus.Debugf("Error when deleting bootstrap machine: %s", err)
}
}
}, 2*time.Second)
err = cctx.Err()
if err != nil && !errors.Is(err, context.Canceled) {
logrus.Warnf("Timeout deleting bootstrap machine: %s", err)
}
clusterapi.System().Teardown()
if p, ok := i.impl.(PostDestroyer); ok {
postDestroyInput := PostDestroyerInput{
Metadata: *metadata,
}
if err := p.PostDestroy(ctx, postDestroyInput); err != nil {
return fmt.Errorf("failed during post-destroy hook: %w", err)
}
logrus.Debugf("Finished running post-destroy hook")
} else {
logrus.Infof("no post-destroy requirements for the %s provider", i.impl.Name())
}
logrus.Infof("Finished destroying bootstrap resources")
return nil
}
type machineManifest struct {
Status struct {
Addresses []clusterv1.MachineAddress `yaml:"addresses"`
} `yaml:"status"`
}
// extractIPAddress extracts the IP address from a machine manifest file in a
// provider-agnostic way by reading only the "status" stanza, which should be
// present in all providers.
func extractIPAddress(manifestPath string) ([]string, error) {
data, err := os.ReadFile(manifestPath)
if err != nil {
return []string{}, fmt.Errorf("failed to read machine manifest %s: %w", manifestPath, err)
}
var manifest machineManifest
if err := yaml.Unmarshal(data, &manifest); err != nil {
return []string{}, fmt.Errorf("failed to unmarshal manifest %s: %w", manifestPath, err)
}
var externalIPAddrs []string
var internalIPAddrs []string
for _, addr := range manifest.Status.Addresses {
switch addr.Type {
case clusterv1.MachineExternalIP:
externalIPAddrs = append(externalIPAddrs, addr.Address)
case clusterv1.MachineInternalIP:
internalIPAddrs = append(internalIPAddrs, addr.Address)
default:
continue
}
}
// prioritize the external address in the front of the list
externalIPAddrs = append(externalIPAddrs, internalIPAddrs...)
return externalIPAddrs, nil
}
// ExtractHostAddresses extracts the IPs of the bootstrap and control plane machines.
func (i *InfraProvider) ExtractHostAddresses(dir string, config *types.InstallConfig, ha *infrastructure.HostAddresses) error {
manifestsDir := filepath.Join(dir, clusterapi.ArtifactsDir)
logrus.Debugf("Looking for machine manifests in %s", manifestsDir)
addr, err := i.getBootstrapAddress(config, manifestsDir)
if err != nil {
return fmt.Errorf("failed to get bootstrap address: %w", err)
}
ha.Bootstrap = addr
masterFiles, err := filepath.Glob(filepath.Join(manifestsDir, "Machine\\-openshift\\-cluster\\-api\\-guests\\-*\\-master\\-?.yaml"))
if err != nil {
return fmt.Errorf("failed to list master machine manifests: %w", err)
}
logrus.Debugf("master machine manifests found: %v", masterFiles)
if replicas := int(*config.ControlPlane.Replicas); replicas != len(masterFiles) {
logrus.Warnf("not all master manifests found: %d. Expected %d.", len(masterFiles), replicas)
}
for _, manifest := range masterFiles {
addrs, err := extractIPAddress(manifest)
if err != nil {
// Log the error but keep parsing the remaining files
logrus.Warnf("failed to extract IP address for %s: %v", manifest, err)
continue
}
logrus.Debugf("found master address: %s", addrs)
ha.Masters = append(ha.Masters, prioritizeIPv4(config, addrs))
}
return nil
}
func (i *InfraProvider) getBootstrapAddress(config *types.InstallConfig, manifestsDir string) (string, error) {
// If the bootstrap node cannot have a public IP address, we
// SSH through the load balancer, as is this case on Azure.
if i.impl.PublicGatherEndpoint() == APILoadBalancer && config.Publish != types.InternalPublishingStrategy {
return fmt.Sprintf("api.%s", config.ClusterDomain()), nil
}
bootstrapFiles, err := filepath.Glob(filepath.Join(manifestsDir, "Machine\\-openshift\\-cluster\\-api\\-guests\\-*\\-bootstrap.yaml"))
if err != nil {
return "", fmt.Errorf("failed to list bootstrap manifests: %w", err)
}
logrus.Debugf("bootstrap manifests found: %v", bootstrapFiles)
if len(bootstrapFiles) != 1 {
return "", fmt.Errorf("wrong number of bootstrap manifests found: %v. Expected exactly one", bootstrapFiles)
}
addrs, err := extractIPAddress(bootstrapFiles[0])
if err != nil {
return "", fmt.Errorf("failed to extract IP address for bootstrap: %w", err)
}
logrus.Debugf("found bootstrap address: %s", addrs)
return prioritizeIPv4(config, addrs), nil
}
// IgnitionSecret provides the basic formatting for creating the
// ignition secret.
func IgnitionSecret(ign []byte, infraID, role string) *corev1.Secret {
secret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("%s-%s", infraID, role),
Namespace: capiutils.Namespace,
Labels: map[string]string{
"cluster.x-k8s.io/cluster-name": infraID,
},
},
Data: map[string][]byte{
"format": []byte("ignition"),
"value": ign,
},
}
secret.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("Secret"))
return secret
}
func (i *InfraProvider) collectManifests(ctx context.Context, cl client.Client) ([]*asset.File, []error) {
logrus.Debug("Collecting applied cluster api manifests...")
errorList := []error{}
fileList := []*asset.File{}
for _, m := range i.appliedManifests {
key := client.ObjectKey{
Name: m.GetName(),
Namespace: m.GetNamespace(),
}
if err := cl.Get(ctx, key, m); err != nil {
errorList = append(errorList, fmt.Errorf("failed to get manifest %s: %w", m.GetName(), err))
continue
}
gvk, err := cl.GroupVersionKindFor(m)
if err != nil {
errorList = append(errorList, fmt.Errorf("failed to get GVK for manifest %s: %w", m.GetName(), err))
continue
}
fileName := filepath.Join(clusterapi.ArtifactsDir, fmt.Sprintf("%s-%s-%s.yaml", gvk.Kind, m.GetNamespace(), m.GetName()))
objData, err := yaml.Marshal(m)
if err != nil {
errorList = append(errorList, fmt.Errorf("failed to marshal manifest %s: %w", fileName, err))
continue
}
fileList = append(fileList, &asset.File{
Filename: fileName,
Data: objData,
})
}
return fileList, errorList
}
func checkMachineReady(machine *clusterv1.Machine, requirePublicIP bool) (bool, error) {
logrus.Debugf("Checking that machine %s has provisioned...", machine.Name)
if machine.Status.Phase != string(clusterv1.MachinePhaseProvisioned) &&
machine.Status.Phase != string(clusterv1.MachinePhaseRunning) {
logrus.Debugf("Machine %s has not yet provisioned: %s", machine.Name, machine.Status.Phase)
return false, nil
} else if machine.Status.Phase == string(clusterv1.MachinePhaseFailed) {
//TODO: We need to update this to use non deprecated field
msg := ptr.Deref(machine.Status.FailureMessage, "machine.Status.FailureMessage was not set") //nolint:staticcheck
return false, fmt.Errorf("machine %s failed to provision: %s", machine.Name, msg)
}
logrus.Debugf("Machine %s has status: %s", machine.Name, machine.Status.Phase)
return hasRequiredIP(machine, requirePublicIP), nil
}
func hasRequiredIP(machine *clusterv1.Machine, requirePublicIP bool) bool {
logrus.Debugf("Checking that IP addresses are populated in the status of machine %s...", machine.Name)
for _, addr := range machine.Status.Addresses {
switch {
case len(addr.Address) == 0:
continue
case addr.Type == clusterv1.MachineExternalIP:
logrus.Debugf("Found external IP address: %s", addr.Address)
return true
case addr.Type == clusterv1.MachineInternalIP && !requirePublicIP:
logrus.Debugf("Found internal IP address: %s", addr.Address)
return true
}
logrus.Debugf("Checked IP %s: %s", addr.Type, addr.Address)
}
logrus.Debugf("Still waiting for machine %s to get required IPs", machine.Name)
return false
}
// gatherInfraConditions gather conditions from CAPI infra cluster or machine
// in a provider-agnostic way from the "status.condition" field, which should be present in all providers.
// https://cluster-api.sigs.k8s.io/developer/providers/contracts/infra-cluster#infracluster-conditions
// https://cluster-api.sigs.k8s.io/developer/providers/contracts/infra-machine#inframachine-conditions
func gatherInfraConditions(ctx context.Context, objRef *corev1.ObjectReference, cl client.Client) (clusterv1.Conditions, error) {
unstructuredObj := &unstructured.Unstructured{}
unstructuredObj.SetGroupVersionKind(objRef.GroupVersionKind())
if err := cl.Get(ctx, client.ObjectKey{
Namespace: objRef.Namespace,
Name: objRef.Name,
}, unstructuredObj); err != nil {
return nil, err
}
// Field .status.conditions should be implemented by all providers
// and has type clusterv1.Conditions
infraObj := &struct {
Status struct {
Conditions clusterv1.Conditions `json:"conditions,omitempty"`
} `json:"status,omitempty"`
}{}
if err := runtime.DefaultUnstructuredConverter.FromUnstructured(unstructuredObj.UnstructuredContent(), infraObj); err != nil {
return nil, err
}
return infraObj.Status.Conditions, nil
}
// warnIfFalsyInfraConditions logs warning messages for any conditions that are not "True"
// in the infra cluster or machine status.
func warnIfFalsyInfraConditions(ctx context.Context, objRef *corev1.ObjectReference, cl client.Client) {
apiVersion, kind := objRef.GroupVersionKind().ToAPIVersionAndKind()
objInfo := fmt.Sprintf("apiVersion=%s, kind=%s, namespace=%s, name=%s", apiVersion, kind, objRef.Namespace, objRef.Name)
logrus.Infof("Gathering conditions for %s", objInfo)
conditions, err := gatherInfraConditions(ctx, objRef, cl)
if err != nil {
logrus.Warnf("Failed to gather conditions: %s", err.Error())
return
}
logrus.Infof("Checking conditions for %s", objInfo)
if len(conditions) > 0 {
var falsyConditions clusterv1.Conditions
for _, condition := range conditions {
if condition.Status != corev1.ConditionTrue {
falsyConditions = append(falsyConditions, condition)
}
}
if len(falsyConditions) == 0 {
logrus.Infof("All conditions are satisfied")
}
for _, condition := range falsyConditions {
logrus.Warnf("Condition %s has status: %q, reason: %q, message: %q", condition.Type, condition.Status, condition.Reason, condition.Message)
}
} else {
logrus.Infof("No conditions found")
}
logrus.Infof("Done checking conditions for %s", objInfo)
}
// generateSecret returns a Kubernetes secret for the given Cluster and kubeconfig data.
// TODO: upgrade our usage from the deprecated capi cluster v1beta1 -> v1beta2
// and remove this function in favor of Generate secret in capi util/kubeconfig.
func generateSecret(cluster *clusterv1.Cluster, data []byte) *corev1.Secret {
name := util.ObjectKey(cluster)
return utilkubeconfig.GenerateSecretWithOwner(name, data, metav1.OwnerReference{
APIVersion: clusterv1.GroupVersion.String(),
Kind: "Cluster",
Name: cluster.Name,
UID: cluster.UID,
})
}