1
0
mirror of https://github.com/openshift/installer.git synced 2026-02-05 06:46:36 +01:00

openstack: add ssh to bootstrap sg

Fixes the issue of bootstrap machine logs cannot be collected when installation fails because
the bootstrap VM uses the master security group, which doesn't allow SSH
access from the installation source address. This prevents gathering
diagnostic information needed to troubleshoot failed installations.

- Created dedicated bootstrap security group with SSH access from anywhere
- Tagged with `openshiftRole=bootstrap` for lifecycle management
- Enables SSH access to bootstrap VM for log collection on failure
- Implement PostDestroyer for openstack bootstrap vm
This commit is contained in:
eshulman2
2025-12-04 19:24:51 +02:00
parent ca7b596669
commit 27eb6bdfb9
7 changed files with 134 additions and 6 deletions

View File

@@ -0,0 +1,7 @@
package openstack
const (
bootstrapRole string = "bootstrap"
masterRole string = "master"
workerRole string = "worker"
)

View File

@@ -244,7 +244,7 @@ func generateProviderSpec(ctx context.Context, clusterID string, config *types.I
// For the workers, we still use the AZ name as part of the server group name
// so the user can control the scheduling policy per AZ and change the MachineSets
// if needed on a day 2 operation.
if role == "worker" && failureDomain.AvailabilityZone != "" {
if role == workerRole && failureDomain.AvailabilityZone != "" {
serverGroupName += "-" + failureDomain.AvailabilityZone
}

View File

@@ -31,7 +31,7 @@ func GenerateMachines(clusterID string, config *types.InstallConfig, pool *types
mpool := pool.Platform.OpenStack
total := int64(1)
if role == "master" && pool.Replicas != nil {
if role == masterRole && pool.Replicas != nil {
total = *pool.Replicas
}
@@ -59,7 +59,7 @@ func GenerateMachines(clusterID string, config *types.InstallConfig, pool *types
machineLabels := map[string]string{
"cluster.x-k8s.io/control-plane": "",
}
if role == "bootstrap" {
if role == bootstrapRole {
machineName = capiutils.GenerateBoostrapMachineName(clusterID)
machineLabels = map[string]string{
"cluster.x-k8s.io/control-plane": "",
@@ -170,11 +170,20 @@ func generateMachineSpec(clusterID string, config *types.InstallConfig, mpool *o
securityGroups := []capo.SecurityGroupParam{
{
// Bootstrap and Master share the same security group
// Bootstrap and Master share the same security group, though
// we layer on additional security groups for the bootstrap later.
Filter: &capo.SecurityGroupFilter{Name: fmt.Sprintf("%s-master", clusterID)},
},
}
// Add bootstrap sec group to bootstrap vm to allow collecting logs using ssh
// Notice: bootstrap SG is added by name and removed by tag
if role == bootstrapRole {
securityGroups = append(securityGroups, capo.SecurityGroupParam{
Filter: &capo.SecurityGroupFilter{Name: fmt.Sprintf("%s-bootstrap", clusterID)},
})
}
for i := range mpool.AdditionalSecurityGroupIDs {
securityGroups = append(securityGroups, capo.SecurityGroupParam{ID: &mpool.AdditionalSecurityGroupIDs[i]})
}
@@ -209,7 +218,7 @@ func generateMachineSpec(clusterID string, config *types.InstallConfig, mpool *o
ConfigDrive: configDrive,
}
if role != "bootstrap" {
if role != bootstrapRole {
spec.ServerGroup = &capo.ServerGroupParam{Filter: &capo.ServerGroupFilter{Name: ptr.To(clusterID + "-" + role)}}
}

View File

@@ -16,6 +16,7 @@ import (
"github.com/openshift/installer/pkg/asset/manifests/capiutils"
"github.com/openshift/installer/pkg/infrastructure/clusterapi"
"github.com/openshift/installer/pkg/infrastructure/openstack/infraready"
"github.com/openshift/installer/pkg/infrastructure/openstack/postdestroy"
"github.com/openshift/installer/pkg/infrastructure/openstack/postprovision"
"github.com/openshift/installer/pkg/infrastructure/openstack/preprovision"
"github.com/openshift/installer/pkg/rhcos"
@@ -38,6 +39,7 @@ func (p Provider) Name() string {
func (Provider) PublicGatherEndpoint() clusterapi.GatherEndpoint { return clusterapi.InternalIP }
var _ clusterapi.PreProvider = Provider{}
var _ clusterapi.PostDestroyer = Provider{}
// PreProvision tags the VIP ports, and creates the security groups and the
// server groups defined in the Machine manifests.
@@ -180,3 +182,21 @@ func (p Provider) PostProvision(ctx context.Context, in clusterapi.PostProvision
return postprovision.FloatingIPs(ctx, k8sClient, ospCluster, installConfig, infraID)
}
// PostDestroy cleans up the temporary bootstrap resources after the bootstrap machine has been deleted.
// This cleans up resources that were created by the installer for the bootstrap machine:
// - Bootstrap security group that was created during pre-provisioning
// This runs after CAPO has deleted the bootstrap machine and its ports, ensuring resources are no longer in use.
func (p Provider) PostDestroy(ctx context.Context, in clusterapi.PostDestroyerInput) error {
logrus.Info("Cleaning up OpenStack bootstrap resources")
cloud := in.Metadata.OpenStack.Cloud
infraID := in.Metadata.InfraID
// Delete security groups tagged with the cluster ID and bootstrap role
if err := postdestroy.SecurityGroups(ctx, cloud, infraID); err != nil {
return fmt.Errorf("failed to destroy bootstrap security groups: %w", err)
}
return nil
}

View File

@@ -0,0 +1,5 @@
package postdestroy
const (
roleTag = "openshiftRole=bootstrap"
)

View File

@@ -0,0 +1,66 @@
package postdestroy
import (
"context"
"errors"
"fmt"
"net/http"
"github.com/gophercloud/gophercloud/v2"
"github.com/gophercloud/gophercloud/v2/openstack/networking/v2/extensions/security/groups"
"github.com/sirupsen/logrus"
openstackdefaults "github.com/openshift/installer/pkg/types/openstack/defaults"
)
// SecurityGroups deletes the bootstrap security group which is no longer
// required after bootstrapping is complete.
func SecurityGroups(ctx context.Context, cloud string, infraID string) error {
clusterTag := fmt.Sprintf("openshiftClusterID=%s", infraID)
logrus.Debugf("Searching for bootstrap security group with tags: %s, %s", clusterTag, roleTag)
networkClient, err := openstackdefaults.NewServiceClient(ctx, "network", openstackdefaults.DefaultClientOpts(cloud))
if err != nil {
return fmt.Errorf("failed to create network client: %w", err)
}
// Search for security groups with both cluster ID and role tags
// OpenStack tags filter requires ALL specified tags to match (AND logic)
allPages, err := groups.List(networkClient, groups.ListOpts{
Tags: fmt.Sprintf("%s,%s", clusterTag, roleTag),
}).AllPages(ctx)
if err != nil {
return fmt.Errorf("failed to list security groups: %w", err)
}
secGroups, err := groups.ExtractGroups(allPages)
if err != nil {
return fmt.Errorf("failed to extract security groups: %w", err)
}
if len(secGroups) == 0 {
logrus.Debug("No bootstrap security group found (may have already been deleted)")
return nil
}
// Should only find one security group with both tags, but delete all if multiple exist
var errs []error
for _, secGroup := range secGroups {
logrus.Infof("Deleting bootstrap security group %s (ID: %s)", secGroup.Name, secGroup.ID)
err = groups.Delete(ctx, networkClient, secGroup.ID).ExtractErr()
if err != nil {
// Check if it's a "not found" error, which is acceptable
if gophercloud.ResponseCodeIs(err, http.StatusNotFound) {
logrus.Debugf("Bootstrap security group %s already deleted", secGroup.ID)
continue
}
logrus.Errorf("failed to delete bootstrap security group %s: %v", secGroup.Name, err)
errs = append(errs, fmt.Errorf("failed to delete bootstrap security group %s: %w", secGroup.Name, err))
}
logrus.Infof("Successfully deleted bootstrap security group(s) %s", secGroup.Name)
}
return errors.Join(errs...)
}

View File

@@ -54,7 +54,7 @@ func SecurityGroups(ctx context.Context, installConfig *installconfig.InstallCon
}
logrus.Debugf("Creating the security groups")
var masterGroup, workerGroup *groups.SecGroup
var bootstrapGroup, masterGroup, workerGroup *groups.SecGroup
{
masterGroup, err = groups.Create(ctx, networkClient, groups.CreateOpts{
Name: infraID + "-master",
@@ -91,6 +91,19 @@ func SecurityGroups(ctx context.Context, installConfig *installconfig.InstallCon
}).Extract(); err != nil {
return fmt.Errorf("failed to tag the Compute security group: %w", err)
}
bootstrapGroup, err = groups.Create(ctx, networkClient, groups.CreateOpts{
Name: infraID + "-bootstrap",
Description: description,
}).Extract()
if err != nil {
return fmt.Errorf("failed to create the Bootstrap security group: %w", err)
}
if _, err := attributestags.ReplaceAll(ctx, networkClient, "security-groups", bootstrapGroup.ID, attributestags.ReplaceAllOpts{
Tags: []string{"openshiftClusterID=" + infraID, "openshiftRole=bootstrap"},
}).Extract(); err != nil {
return fmt.Errorf("failed to tag the Bootstrap security group: %w", err)
}
}
logrus.Debugf("Creating the security group rules")
@@ -156,12 +169,16 @@ func SecurityGroups(ctx context.Context, installConfig *installconfig.InstallCon
var masterRules []rules.CreateOpts
var workerRules []rules.CreateOpts
var bootstrapRules []rules.CreateOpts
addMasterRules := func(s service, ipVersion rules.RuleEtherType, remoteCIDRs []string) {
masterRules = append(masterRules, buildRules(masterGroup.ID, s, ipVersion, remoteCIDRs)...)
}
addWorkerRules := func(s service, ipVersion rules.RuleEtherType, remoteCIDRs []string) {
workerRules = append(workerRules, buildRules(workerGroup.ID, s, ipVersion, remoteCIDRs)...)
}
addBootstrapRules := func(s service, ipVersion rules.RuleEtherType, remoteCIDRs []string) {
bootstrapRules = append(bootstrapRules, buildRules(bootstrapGroup.ID, s, ipVersion, remoteCIDRs)...)
}
// TODO(mandre) Explicitly enable egress
@@ -184,6 +201,7 @@ func SecurityGroups(ctx context.Context, installConfig *installconfig.InstallCon
}
}(),
} {
addBootstrapRules(serviceSSH, ipVersion, anyIP)
addMasterRules(serviceAPI, ipVersion, anyIP)
addMasterRules(serviceICMP, ipVersion, anyIP)
addWorkerRules(serviceHTTP, ipVersion, anyIP)
@@ -235,6 +253,9 @@ func SecurityGroups(ctx context.Context, installConfig *installconfig.InstallCon
if _, err := rules.CreateBulk(ctx, networkClient, workerRules).Extract(); err != nil {
return fmt.Errorf("failed to add the rules to the Compute security group: %w", err)
}
if _, err := rules.CreateBulk(ctx, networkClient, bootstrapRules).Extract(); err != nil {
return fmt.Errorf("failed to add the rules to the Bootstrap security group: %w", err)
}
return nil
}