1
0
mirror of https://github.com/siderolabs/omni.git synced 2026-02-05 06:45:34 +01:00

fix: fix schematic generation for machines in agent mode

We had an issue with bare metal provider where two different schematic IDs would fight each other, causing machine to get installed with a wrong schematic ID, only to be upgraded to the correct one immediately, and in some cases, go into an upgrade loop between a correct and an incorrect schematic.

The cause: Omni treated schematics it observed when the machine in agent mode dialed in, and stored the information it received (like kernel args and initial schematic info). This was wrong, as agent mode information essentially meaningless.

Fix this by changing the simple check of "was the schematic info for machine X ever observed" to be "is the schematic info for machine X ready". The readiness check involves schematic being populated and machine not being in agent mode.

This change caused `SchematicConfiguration` resource to not be generated before the machine leaves the agent mode, and caused a side effect: `InfraMachineController` would not receive Talos version from it and would not populate it on the `InfraMachine` resource. And this would cause BM provider to never get notified about the fact that the machine is allocated to a cluster, and would not power it on (to PXE boot it to "regular" Talos, for it to receive the "install" call to Omni).

Change that controller to get the Talos version info directly from the Cluster resource.

Signed-off-by: Utku Ozdemir <utku.ozdemir@siderolabs.com>
This commit is contained in:
Utku Ozdemir
2026-01-30 19:43:03 +01:00
parent e73acfdeef
commit c319d7bcf2
21 changed files with 178 additions and 145 deletions

View File

@@ -0,0 +1,9 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package specs
func (spec *MachineStatusSpec) SchematicReady() bool {
return spec.Schematic != nil && !spec.Schematic.InAgentMode && spec.Schematic.Id != "" && spec.Schematic.FullId != ""
}

View File

@@ -709,9 +709,8 @@ func (s *managementServer) MaintenanceUpgrade(ctx context.Context, req *manageme
return nil, status.Error(codes.FailedPrecondition, "machine is not in maintenance mode")
}
schematic := machineStatus.TypedSpec().Value.Schematic
if schematic == nil || schematic.FullId == "" {
return nil, status.Error(codes.FailedPrecondition, "machine schematic is not known yet")
if !machineStatus.TypedSpec().Value.SchematicReady() {
return nil, status.Error(codes.FailedPrecondition, "machine schematic is not ready yet")
}
platform := machineStatus.TypedSpec().Value.GetPlatformMetadata().GetPlatform()
@@ -726,7 +725,7 @@ func (s *managementServer) MaintenanceUpgrade(ctx context.Context, req *manageme
installImage := &specs.MachineConfigGenOptionsSpec_InstallImage{
TalosVersion: req.Version,
SchematicId: schematic.FullId,
SchematicId: machineStatus.TypedSpec().Value.Schematic.FullId,
SchematicInitialized: true,
Platform: platform,
SecurityState: securityState,

View File

@@ -103,8 +103,7 @@ func UpdateSupported(machineStatus *omni.MachineStatus, getClusterMachineConfig
}
func Calculate(machineStatus *omni.MachineStatus, kernelArgs *omni.KernelArgs) (args []string, initialized bool, err error) {
schematicConfig := machineStatus.TypedSpec().Value.Schematic
if schematicConfig == nil {
if !machineStatus.TypedSpec().Value.SchematicReady() {
return nil, false, nil
}
@@ -118,7 +117,7 @@ func Calculate(machineStatus *omni.MachineStatus, kernelArgs *omni.KernelArgs) (
extraArgs = kernelArgs.TypedSpec().Value.Args
}
baseArgs := xslices.Filter(schematicConfig.KernelArgs, isProtected)
baseArgs := xslices.Filter(machineStatus.TypedSpec().Value.Schematic.KernelArgs, isProtected)
return slices.Concat(baseArgs, extraArgs), true, nil
}

View File

@@ -74,11 +74,6 @@ func (ctrl *InfraMachineController) Settings() controller.QSettings {
Type: omni.InfraMachineConfigType,
Kind: controller.InputQMapped,
},
{
Namespace: resources.DefaultNamespace,
Type: omni.SchematicConfigurationType,
Kind: controller.InputQMapped,
},
{
Namespace: resources.DefaultNamespace,
Type: omni.MachineExtensionsType,
@@ -104,6 +99,11 @@ func (ctrl *InfraMachineController) Settings() controller.QSettings {
Type: infra.ProviderType,
Kind: controller.InputQMapped,
},
{
Namespace: resources.DefaultNamespace,
Type: omni.ClusterType,
Kind: controller.InputQMapped,
},
},
Outputs: []controller.Output{
{
@@ -127,7 +127,7 @@ func (ctrl *InfraMachineController) Settings() controller.QSettings {
}
// Reconcile implements the controller.QController interface.
func (ctrl *InfraMachineController) Reconcile(ctx context.Context, _ *zap.Logger, r controller.QRuntime, ptr resource.Pointer) error {
func (ctrl *InfraMachineController) Reconcile(ctx context.Context, logger *zap.Logger, r controller.QRuntime, ptr resource.Pointer) error {
link, err := safe.ReaderGet[*siderolink.Link](ctx, r, ptr)
if err != nil {
if !state.IsNotFoundError(err) {
@@ -143,7 +143,7 @@ func (ctrl *InfraMachineController) Reconcile(ctx context.Context, _ *zap.Logger
return ctrl.reconcileTearingDown(ctx, r, link)
}
return ctrl.reconcileRunning(ctx, r, link)
return ctrl.reconcileRunning(ctx, r, link, logger)
}
func (ctrl *InfraMachineController) reconcileTearingDown(ctx context.Context, r controller.QRuntime, link *siderolink.Link) error {
@@ -214,7 +214,7 @@ func (ctrl *InfraMachineController) handleInfraProviderDeletion(ctx context.Cont
}, controller.WithExpectedPhaseAny())
}
func (ctrl *InfraMachineController) reconcileRunning(ctx context.Context, r controller.QRuntime, link *siderolink.Link) error {
func (ctrl *InfraMachineController) reconcileRunning(ctx context.Context, r controller.QRuntime, link *siderolink.Link, logger *zap.Logger) error {
config, err := safe.ReaderGetByID[*omni.InfraMachineConfig](ctx, r, link.Metadata().ID())
if err != nil && !state.IsNotFoundError(err) {
return err
@@ -260,6 +260,7 @@ func (ctrl *InfraMachineController) reconcileRunning(ctx context.Context, r cont
machineInfoCollected: machineInfoCollected,
providerID: providerID,
controllerName: ctrl.Name(),
logger: logger,
}
return safe.WriterModify[*infra.Machine](ctx, r, infra.NewMachine(link.Metadata().ID()), func(res *infra.Machine) error {
@@ -273,6 +274,7 @@ type infraMachineControllerHelper struct {
machineExts *omni.MachineExtensions
link *siderolink.Link
nodeUniqueToken *siderolink.NodeUniqueToken
logger *zap.Logger
providerID string
controllerName string
machineInfoCollected bool
@@ -333,16 +335,6 @@ func (helper *infraMachineControllerHelper) modify(ctx context.Context, infraMac
return err
}
schematicConfig, err := safe.ReaderGetByID[*omni.SchematicConfiguration](ctx, helper.runtime, helper.link.Metadata().ID())
if err != nil {
if state.IsNotFoundError(err) {
// the schema configuration is not created yet, skip the cluster information collection
return nil
}
return err
}
var extensions []string
if helper.machineExts != nil {
@@ -351,12 +343,37 @@ func (helper *infraMachineControllerHelper) modify(ctx context.Context, infraMac
// set the cluster allocation information
infraMachine.TypedSpec().Value.ClusterTalosVersion = schematicConfig.TypedSpec().Value.TalosVersion
clusterTalosVersion, err := helper.getClusterTalosVersion(ctx, helper.runtime, clusterMachine)
if err != nil {
return err
}
infraMachine.TypedSpec().Value.ClusterTalosVersion = clusterTalosVersion
infraMachine.TypedSpec().Value.Extensions = extensions
return nil
}
func (helper *infraMachineControllerHelper) getClusterTalosVersion(ctx context.Context, r controller.Reader, clusterMachine *omni.ClusterMachine) (string, error) {
clusterID, ok := clusterMachine.Metadata().Labels().Get(omni.LabelCluster)
if !ok {
return "", fmt.Errorf("cluster machine %q is missing cluster label", clusterMachine.Metadata().ID())
}
cluster, err := safe.ReaderGetByID[*omni.Cluster](ctx, r, clusterID)
if err != nil {
if state.IsNotFoundError(err) {
helper.logger.Info("cluster not yet found for cluster machine", zap.String("cluster_machine_id", clusterMachine.Metadata().ID()), zap.String("cluster_id", clusterID))
return "", nil
}
return "", fmt.Errorf("failed to get cluster %q: %w", clusterID, err)
}
return cluster.TypedSpec().Value.TalosVersion, nil
}
// MapInput implements the controller.QController interface.
func (ctrl *InfraMachineController) MapInput(ctx context.Context, _ *zap.Logger, runtime controller.QRuntime, ptr controller.ReducedResourceMetadata) ([]resource.Pointer, error) {
switch ptr.Type() {
@@ -369,6 +386,18 @@ func (ctrl *InfraMachineController) MapInput(ctx context.Context, _ *zap.Logger,
omni.ClusterMachineType,
omni.MachineStatusType:
return []resource.Pointer{siderolink.NewLink(ptr.ID(), nil).Metadata()}, nil
case omni.ClusterType:
clusterMachineList, err := safe.ReaderListAll[*omni.ClusterMachine](ctx, runtime, state.WithLabelQuery(resource.LabelEqual(omni.LabelCluster, ptr.ID())))
if err != nil {
return nil, err
}
linkPtrs := make([]resource.Pointer, 0, clusterMachineList.Len())
for clusterMachine := range clusterMachineList.All() {
linkPtrs = append(linkPtrs, siderolink.NewLink(clusterMachine.Metadata().ID(), nil).Metadata())
}
return linkPtrs, nil
case infra.InfraProviderStatusType, infra.ProviderType:
linkList, err := safe.ReaderListAll[*siderolink.Link](ctx, runtime, state.WithLabelQuery(resource.LabelEqual(omni.LabelInfraProviderID, ptr.ID())))
if err != nil {

View File

@@ -89,12 +89,11 @@ func (suite *InfraMachineControllerSuite) TestReconcile() {
suite.Require().NoError(suite.state.Create(suite.ctx, clusterMachine))
// create schematic configuration
schematicConfig := omni.NewSchematicConfiguration("machine-1")
// create cluster with talos version
cluster := omni.NewCluster("test-cluster")
cluster.TypedSpec().Value.TalosVersion = "v42.0.0"
schematicConfig.TypedSpec().Value.TalosVersion = "v42.0.0"
suite.Require().NoError(suite.state.Create(suite.ctx, schematicConfig))
suite.Require().NoError(suite.state.Create(suite.ctx, cluster))
// assert that the cluster machine has the correct talos version
assertResource[*infra.Machine](&suite.OmniSuite, clusterMachine.Metadata(), func(r *infra.Machine, assertion *assert.Assertions) {

View File

@@ -11,7 +11,6 @@ import (
"strings"
"github.com/cosi-project/runtime/pkg/safe"
"github.com/siderolabs/go-pointer"
"github.com/siderolabs/image-factory/pkg/constants"
"github.com/siderolabs/image-factory/pkg/schematic"
"github.com/siderolabs/talos/pkg/machinery/client"
@@ -54,15 +53,12 @@ func GetSchematicInfo(ctx context.Context, c *client.Client, defaultKernelArgs [
fullID string
rawSchematic = &schematic.Schematic{}
manifest string
inAgentMode bool
)
err = items.ForEachErr(func(status *runtime.ExtensionStatus) error {
for status := range items.All() {
name := status.TypedSpec().Metadata.Name
if name == extensions.MetalAgentExtensionName {
inAgentMode = true
return nil
return SchematicInfo{InAgentMode: true}, nil
}
if name == constants.SchematicIDExtensionName { // skip the meta extension
@@ -71,14 +67,16 @@ func GetSchematicInfo(ctx context.Context, c *client.Client, defaultKernelArgs [
if status.TypedSpec().Metadata.ExtraInfo != "" {
manifest = status.TypedSpec().Metadata.ExtraInfo
return yaml.Unmarshal([]byte(manifest), rawSchematic)
if err = yaml.Unmarshal([]byte(manifest), rawSchematic); err != nil {
return SchematicInfo{}, fmt.Errorf("failed to unmarshal schematic manifest: %w", err)
}
}
return nil
continue
}
if name == "modules.dep" { // ignore the virtual extension used for kernel modules dependencies
return nil
continue
}
if !strings.HasPrefix(name, extensions.OfficialPrefix) {
@@ -86,24 +84,6 @@ func GetSchematicInfo(ctx context.Context, c *client.Client, defaultKernelArgs [
}
exts = append(exts, name)
return nil
})
if err != nil {
return SchematicInfo{}, err
}
if inAgentMode {
id, idErr := pointer.To(schematic.Schematic{}).ID()
if idErr != nil {
return SchematicInfo{}, fmt.Errorf("failed to calculate extensions schematic ID: %w", idErr)
}
return SchematicInfo{
ID: id,
FullID: id,
InAgentMode: true,
}, nil
}
exts = extensions.MapNames(exts)

View File

@@ -40,8 +40,8 @@ func NewStatusController() *StatusController {
status.TypedSpec().Value.UnmetConditions = nil
status.TypedSpec().Value.CurrentCmdline = ms.TypedSpec().Value.KernelCmdline
if ms.TypedSpec().Value.Schematic == nil {
status.TypedSpec().Value.UnmetConditions = append(status.TypedSpec().Value.UnmetConditions, "Schematic information is not yet known")
if !ms.TypedSpec().Value.SchematicReady() {
status.TypedSpec().Value.UnmetConditions = append(status.TypedSpec().Value.UnmetConditions, "Schematic information is not yet ready")
} else {
status.TypedSpec().Value.CurrentArgs = kernelargs.FilterExtras(ms.TypedSpec().Value.Schematic.KernelArgs)
}

View File

@@ -38,7 +38,7 @@ func TestReconcile(t *testing.T) {
rtestutils.AssertResource(ctx, t, testContext.State, id, func(res *omni.KernelArgsStatus, assertion *assert.Assertions) {
assertion.Equal([]string{
"Schematic information is not yet known",
"Schematic information is not yet ready",
"Talos is not installed, kernel args cannot be updated yet",
"Cannot determine if kernel args update is supported: SecurityState and TalosVersion are not yet set",
}, res.TypedSpec().Value.UnmetConditions)
@@ -59,6 +59,8 @@ func TestReconcile(t *testing.T) {
}
res.TypedSpec().Value.Schematic = &specs.MachineStatusSpec_Schematic{
Id: "test-id",
FullId: "test-full-id",
KernelArgs: []string{"arg-1", "arg-2"},
}

View File

@@ -69,7 +69,7 @@ func GenInstallConfig(machineStatus *omni.MachineStatus, clusterMachineTalosVers
genOptions.TypedSpec().Value.InstallImage.SchematicId = clusterMachineTalosVersion.TypedSpec().Value.SchematicId
genOptions.TypedSpec().Value.InstallImage.TalosVersion = clusterMachineTalosVersion.TypedSpec().Value.TalosVersion
genOptions.TypedSpec().Value.InstallImage.SchematicInitialized = machineStatus.TypedSpec().Value.Schematic != nil
genOptions.TypedSpec().Value.InstallImage.SchematicInitialized = machineStatus.TypedSpec().Value.SchematicReady()
if genOptions.TypedSpec().Value.InstallImage.SchematicInitialized {
genOptions.TypedSpec().Value.InstallImage.SchematicInvalid = machineStatus.TypedSpec().Value.GetSchematic().GetInvalid()

View File

@@ -482,57 +482,8 @@ func (ctrl *MachineStatusController) handleNotification(ctx context.Context, r c
}
if event.Schematic != nil {
if spec.Schematic == nil {
spec.Schematic = &specs.MachineStatusSpec_Schematic{}
}
spec.Schematic.Raw = event.Schematic.Raw
spec.Schematic.Id = event.Schematic.ID
spec.Schematic.FullId = event.Schematic.FullID
spec.Schematic.Extensions = event.Schematic.Extensions
spec.Schematic.Invalid = event.Schematic.Invalid
spec.Schematic.KernelArgs = event.Schematic.KernelArgs
spec.Schematic.MetaValues = xslices.Map(event.Schematic.MetaValues, func(value schematic.MetaValue) *specs.MetaValue {
return &specs.MetaValue{
Key: uint32(value.Key),
Value: value.Value,
}
})
if event.Schematic.Overlay.Name != "" {
spec.Schematic.Overlay = &specs.Overlay{
Name: event.Schematic.Overlay.Name,
Image: event.Schematic.Overlay.Image,
}
}
if spec.Schematic.InitialSchematic == "" {
spec.Schematic.InitialSchematic = spec.Schematic.FullId
}
spec.Schematic.InAgentMode = event.Schematic.InAgentMode
// We populate the initial state on a best-effort basis: we might have missed the moment to capture it, but it is acceptable.
if spec.Schematic.InitialState == nil {
spec.Schematic.InitialState = &specs.MachineStatusSpec_Schematic_InitialState{
Extensions: event.Schematic.Extensions,
}
}
// if the schematic is invalid or the machine is in agent mode, we reset the initial schematic information
if spec.Schematic.Invalid || spec.Schematic.InAgentMode {
spec.Schematic.InitialSchematic = ""
spec.Schematic.InitialState = &specs.MachineStatusSpec_Schematic_InitialState{} // reset to be empty but leave it initialized
}
_, kernelArgsInitialized := m.Metadata().Annotations().Get(omni.KernelArgsInitialized)
if !kernelArgsInitialized {
if err := ctrl.kernelArgsInitializer.Init(ctx, m.Metadata().ID(), event.Schematic.KernelArgs); err != nil {
return fmt.Errorf("failed to initialize extra kernel args: %w", err)
}
m.Metadata().Annotations().Set(omni.KernelArgsInitialized, "")
if err := ctrl.handleEventSchematic(ctx, m, event); err != nil {
return fmt.Errorf("failed to handle schematic event: %w", err)
}
}
@@ -565,6 +516,74 @@ func (ctrl *MachineStatusController) handleNotification(ctx context.Context, r c
return nil
}
func (ctrl *MachineStatusController) handleEventSchematic(ctx context.Context, ms *omni.MachineStatus, event machinetask.Info) error {
spec := ms.TypedSpec().Value
// If the machine is in agent mode, reset the schematic information as it were never observed.
// At that stage, anything on it (extensions, kernel args, etc.) is irrelevant and even wrong for Omni to process.
if event.Schematic.InAgentMode {
spec.Schematic = &specs.MachineStatusSpec_Schematic{
InAgentMode: true,
}
return nil
}
if spec.Schematic == nil {
spec.Schematic = &specs.MachineStatusSpec_Schematic{}
}
spec.Schematic.Raw = event.Schematic.Raw
spec.Schematic.Id = event.Schematic.ID
spec.Schematic.FullId = event.Schematic.FullID
spec.Schematic.Extensions = event.Schematic.Extensions
spec.Schematic.Invalid = event.Schematic.Invalid
spec.Schematic.KernelArgs = event.Schematic.KernelArgs
spec.Schematic.InAgentMode = event.Schematic.InAgentMode
spec.Schematic.MetaValues = xslices.Map(event.Schematic.MetaValues, func(value schematic.MetaValue) *specs.MetaValue {
return &specs.MetaValue{
Key: uint32(value.Key),
Value: value.Value,
}
})
if event.Schematic.Overlay.Name != "" {
spec.Schematic.Overlay = &specs.Overlay{
Name: event.Schematic.Overlay.Name,
Image: event.Schematic.Overlay.Image,
}
}
if spec.Schematic.InitialSchematic == "" {
spec.Schematic.InitialSchematic = spec.Schematic.FullId
}
// We populate the initial state on a best-effort basis: we might have missed the moment to capture it, but it is acceptable.
if spec.Schematic.InitialState == nil {
spec.Schematic.InitialState = &specs.MachineStatusSpec_Schematic_InitialState{
Extensions: event.Schematic.Extensions,
}
}
// If the schematic is invalid, reset the initial schematic information.
if spec.Schematic.Invalid {
spec.Schematic.InitialSchematic = ""
spec.Schematic.InitialState = &specs.MachineStatusSpec_Schematic_InitialState{} // reset to be empty but leave it initialized
}
_, kernelArgsInitialized := ms.Metadata().Annotations().Get(omni.KernelArgsInitialized)
if !kernelArgsInitialized {
if err := ctrl.kernelArgsInitializer.Init(ctx, ms.Metadata().ID(), event.Schematic.KernelArgs); err != nil {
return fmt.Errorf("failed to initialize extra kernel args: %w", err)
}
ms.Metadata().Annotations().Set(omni.KernelArgsInitialized, "")
}
return nil
}
// reconcileLabels is a wrapper around omni.MachineStatusReconcileLabels, but it overrides the "installed" label
// based on a matching infra.MachineStatus for that machine.
//

View File

@@ -440,11 +440,7 @@ func (suite *MachineStatusSuite) TestMachineSchematic() {
},
},
expected: &specs.MachineStatusSpec_Schematic{
Id: defaultSchematic,
InitialSchematic: "",
FullId: defaultSchematic,
InAgentMode: true,
InitialState: &specs.MachineStatusSpec_Schematic_InitialState{},
InAgentMode: true,
},
},
} {

View File

@@ -73,15 +73,11 @@ func checkClusterReady(ctx context.Context, r controller.Reader, machineConfig *
func checkMachineStatus(ctx context.Context, r controller.Reader, machineStatus *omni.MachineStatus) error {
if !machineStatus.TypedSpec().Value.Connected {
return xerrors.NewTaggedf[qtransform.SkipReconcileTag]("'%s' machine is not connected", machineStatus.Metadata().ID())
return xerrors.NewTaggedf[qtransform.SkipReconcileTag]("machine %q is not connected", machineStatus.Metadata().ID())
}
if machineStatus.TypedSpec().Value.Schematic == nil {
return xerrors.NewTagged[qtransform.SkipReconcileTag](fmt.Errorf("machine status '%s' does not have schematic information", machineStatus.Metadata().ID()))
}
if machineStatus.TypedSpec().Value.Schematic.InAgentMode {
return xerrors.NewTagged[qtransform.SkipReconcileTag](fmt.Errorf("machine status '%s' schematic is in agent mode", machineStatus.Metadata().ID()))
if !machineStatus.TypedSpec().Value.SchematicReady() {
return xerrors.NewTaggedf[qtransform.SkipReconcileTag]("machine %q schematic is not ready", machineStatus.Metadata().ID())
}
// if the machine is managed by a static infra provider, we need to ensure that the infra machine is ready to use
@@ -94,7 +90,7 @@ func checkMachineStatus(ctx context.Context, r controller.Reader, machineStatus
return xerrors.NewTaggedf[qtransform.SkipReconcileTag]("machine is managed by a static infra provider but the infra machine status is not found: %w", err)
}
return fmt.Errorf("failed to get infra machine status '%s': %w", machineStatus.Metadata().ID(), err)
return fmt.Errorf("failed to get infra machine status %q: %w", machineStatus.Metadata().ID(), err)
}
if !infraMachineStatus.TypedSpec().Value.ReadyToUse {
@@ -134,7 +130,7 @@ func BuildReconciliationContext(ctx context.Context, r controller.Reader,
machineSetNode, err := safe.ReaderGetByID[*omni.MachineSetNode](ctx, r, machineConfig.Metadata().ID())
if err != nil {
if state.IsNotFoundError(err) {
return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("'%s' machine set node not found: %w", machineConfig.Metadata().ID(), err)
return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("%q machine set node not found: %w", machineConfig.Metadata().ID(), err)
}
return nil, err
@@ -176,19 +172,19 @@ func BuildReconciliationContext(ctx context.Context, r controller.Reader,
)
if err != nil {
if state.IsNotFoundError(err) {
return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("'%s' machine status snapshot not found: %w", machineConfig.Metadata().ID(), err)
return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("%q machine status snapshot not found: %w", machineConfig.Metadata().ID(), err)
}
return nil, fmt.Errorf("failed to get machine status snapshot '%s': %w", machineConfig.Metadata().ID(), err)
return nil, fmt.Errorf("failed to get machine status snapshot %q: %w", machineConfig.Metadata().ID(), err)
}
rc.machineStatus, err = safe.ReaderGetByID[*omni.MachineStatus](ctx, r, machineConfig.Metadata().ID())
if err != nil {
if state.IsNotFoundError(err) {
return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("'%s' machine status not found: %w", machineConfig.Metadata().ID(), err)
return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("%q machine status not found: %w", machineConfig.Metadata().ID(), err)
}
return nil, fmt.Errorf("failed to get machine status '%s': %w", machineConfig.Metadata().ID(), err)
return nil, fmt.Errorf("failed to get machine status %q: %w", machineConfig.Metadata().ID(), err)
}
if err = checkMachineStatus(ctx, r, rc.machineStatus); err != nil {
@@ -198,10 +194,10 @@ func BuildReconciliationContext(ctx context.Context, r controller.Reader,
genOptions, err := safe.ReaderGetByID[*omni.MachineConfigGenOptions](ctx, r, machineConfig.Metadata().ID())
if err != nil {
if state.IsNotFoundError(err) {
return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("'%s' machine config gen options not found: %w", machineConfig.Metadata().ID(), err)
return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("%q machine config gen options not found: %w", machineConfig.Metadata().ID(), err)
}
return nil, fmt.Errorf("failed to get install image '%s': %w", machineConfig.Metadata().ID(), err)
return nil, fmt.Errorf("failed to get install image %q: %w", machineConfig.Metadata().ID(), err)
}
rc.installImage = genOptions.TypedSpec().Value.InstallImage

View File

@@ -781,7 +781,7 @@ func (ctrl *ClusterMachineConfigStatusController) shouldResetGraceful(
machineSetStatus.TypedSpec().Value.Phase != specs.MachineSetPhase_Destroying, nil
}
func (h *ClusterMachineConfigStatusController) gracefulEtcdLeave(ctx context.Context, c *client.Client, id string) error {
func (ctrl *ClusterMachineConfigStatusController) gracefulEtcdLeave(ctx context.Context, c *client.Client, id string) error {
_, err := c.EtcdForfeitLeadership(ctx, &machineapi.EtcdForfeitLeadershipRequest{})
if err != nil {
return fmt.Errorf("failed to forfeit leadership, node %q: %w", id, err)

View File

@@ -69,8 +69,7 @@ func (ctrl *StatusController) transform(ctx context.Context, r controller.Reader
status.TypedSpec().Value.IsMaintenance = ms.TypedSpec().Value.Maintenance
schematicSpec := ms.TypedSpec().Value.Schematic
if schematicSpec == nil {
if !ms.TypedSpec().Value.SchematicReady() {
status.TypedSpec().Value.Phase = specs.MachineUpgradeStatusSpec_Unknown
status.TypedSpec().Value.Status = "schematic info is not available"
status.TypedSpec().Value.Error = ""
@@ -78,6 +77,8 @@ func (ctrl *StatusController) transform(ctx context.Context, r controller.Reader
return nil
}
schematicSpec := ms.TypedSpec().Value.Schematic
status.TypedSpec().Value.CurrentSchematicId = schematicSpec.FullId
if schematicSpec.Raw == "" {

View File

@@ -150,6 +150,7 @@ func TestReconcile(t *testing.T) {
{Key: 1, Value: "value1"},
{Key: 2, Value: "value2"},
},
Id: "test-id",
FullId: currentSchematicID,
Raw: string(currentSchematicRaw),
InitialState: &specs.MachineStatusSpec_Schematic_InitialState{

View File

@@ -513,6 +513,7 @@ func (suite *OmniSuite) createClusterWithTalosVersion(clusterName string, contro
machineStatus.TypedSpec().Value.ManagementAddress = suite.socketConnectionString
machineStatus.TypedSpec().Value.Schematic = &specs.MachineStatusSpec_Schematic{
Id: defaultSchematic,
FullId: defaultSchematic,
InitialState: &specs.MachineStatusSpec_Schematic_InitialState{},
}
machineStatus.TypedSpec().Value.InitialTalosVersion = cluster.TypedSpec().Value.TalosVersion

View File

@@ -143,8 +143,8 @@ func (helper *schematicConfigurationHelper) reconcile(
ms *omni.MachineStatus,
schematicConfiguration *omni.SchematicConfiguration,
) (*omni.MachineExtensionsStatus, error) {
if ms.TypedSpec().Value.Schematic == nil {
return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("schematic information is not yet available")
if !ms.TypedSpec().Value.SchematicReady() {
return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("schematic is not yet ready")
}
securityState := ms.TypedSpec().Value.SecurityState
@@ -368,8 +368,8 @@ func determineKernelArgs(ctx context.Context, machineStatus *omni.MachineStatus,
}
if !updateSupported {
if machineStatus.TypedSpec().Value.Schematic == nil {
return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("machine schematic is not yet initialized")
if !machineStatus.TypedSpec().Value.SchematicReady() {
return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("machine schematic is not yet ready")
}
// keep them unchanged - take the current args as-is
@@ -437,7 +437,7 @@ func computeMachineExtensionsStatus(ms *omni.MachineStatus, customization *machi
requestedExtensions set.Set[string]
)
if ms.TypedSpec().Value.Schematic != nil {
if ms.TypedSpec().Value.SchematicReady() {
installedExtensions = xslices.ToSet(ms.TypedSpec().Value.Schematic.Extensions)
}

View File

@@ -72,6 +72,8 @@ func (suite *SchematicConfigurationSuite) TestReconcile() {
machineStatus.TypedSpec().Value.TalosVersion = talosVersion
machineStatus.TypedSpec().Value.Schematic = &specs.MachineStatusSpec_Schematic{
Id: "test-id",
FullId: "test-full-id",
Extensions: []string{"siderolabs/hello-world-service"},
InitialSchematic: expectedSchematic,
InitialState: &specs.MachineStatusSpec_Schematic_InitialState{

View File

@@ -555,8 +555,7 @@ func populateEmptySchematics(ctx context.Context, r controller.ReaderWriter, clu
return err
}
schematic := machineStatus.TypedSpec().Value.Schematic
if schematic == nil {
if !machineStatus.TypedSpec().Value.SchematicReady() {
return nil
}
@@ -566,7 +565,7 @@ func populateEmptySchematics(ctx context.Context, r controller.ReaderWriter, clu
}
return safe.WriterModify(ctx, r, clusterMachineTalosVersion, func(res *omni.ClusterMachineTalosVersion) error {
res.TypedSpec().Value.SchematicId = schematic.InitialSchematic
res.TypedSpec().Value.SchematicId = machineStatus.TypedSpec().Value.Schematic.InitialSchematic
return nil
})

View File

@@ -144,6 +144,7 @@ func init() {
res.TypedSpec().Value.Schematic = &specs.MachineStatusSpec_Schematic{
Id: defaultSchematic,
FullId: defaultSchematic,
InitialState: &specs.MachineStatusSpec_Schematic_InitialState{},
}

View File

@@ -357,7 +357,7 @@ func preRunHooks(t *testing.T, options *TestOptions) {
func postRunHooks(t *testing.T, options *TestOptions) {
if t.Failed() {
t.Logf("there are failed tests, save support bundle for all cluster")
t.Logf("there are failed tests, save support bundle for all clusters")
saveAllSupportBundles(t, options.omniClient, options.OutputDir)