mirror of
https://github.com/siderolabs/omni.git
synced 2026-02-05 06:45:34 +01:00
fix: fix schematic generation for machines in agent mode
We had an issue with bare metal provider where two different schematic IDs would fight each other, causing machine to get installed with a wrong schematic ID, only to be upgraded to the correct one immediately, and in some cases, go into an upgrade loop between a correct and an incorrect schematic. The cause: Omni treated schematics it observed when the machine in agent mode dialed in, and stored the information it received (like kernel args and initial schematic info). This was wrong, as agent mode information essentially meaningless. Fix this by changing the simple check of "was the schematic info for machine X ever observed" to be "is the schematic info for machine X ready". The readiness check involves schematic being populated and machine not being in agent mode. This change caused `SchematicConfiguration` resource to not be generated before the machine leaves the agent mode, and caused a side effect: `InfraMachineController` would not receive Talos version from it and would not populate it on the `InfraMachine` resource. And this would cause BM provider to never get notified about the fact that the machine is allocated to a cluster, and would not power it on (to PXE boot it to "regular" Talos, for it to receive the "install" call to Omni). Change that controller to get the Talos version info directly from the Cluster resource. Signed-off-by: Utku Ozdemir <utku.ozdemir@siderolabs.com>
This commit is contained in:
9
client/api/omni/specs/machine_status.go
Normal file
9
client/api/omni/specs/machine_status.go
Normal file
@@ -0,0 +1,9 @@
|
||||
// This Source Code Form is subject to the terms of the Mozilla Public
|
||||
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
package specs
|
||||
|
||||
func (spec *MachineStatusSpec) SchematicReady() bool {
|
||||
return spec.Schematic != nil && !spec.Schematic.InAgentMode && spec.Schematic.Id != "" && spec.Schematic.FullId != ""
|
||||
}
|
||||
@@ -709,9 +709,8 @@ func (s *managementServer) MaintenanceUpgrade(ctx context.Context, req *manageme
|
||||
return nil, status.Error(codes.FailedPrecondition, "machine is not in maintenance mode")
|
||||
}
|
||||
|
||||
schematic := machineStatus.TypedSpec().Value.Schematic
|
||||
if schematic == nil || schematic.FullId == "" {
|
||||
return nil, status.Error(codes.FailedPrecondition, "machine schematic is not known yet")
|
||||
if !machineStatus.TypedSpec().Value.SchematicReady() {
|
||||
return nil, status.Error(codes.FailedPrecondition, "machine schematic is not ready yet")
|
||||
}
|
||||
|
||||
platform := machineStatus.TypedSpec().Value.GetPlatformMetadata().GetPlatform()
|
||||
@@ -726,7 +725,7 @@ func (s *managementServer) MaintenanceUpgrade(ctx context.Context, req *manageme
|
||||
|
||||
installImage := &specs.MachineConfigGenOptionsSpec_InstallImage{
|
||||
TalosVersion: req.Version,
|
||||
SchematicId: schematic.FullId,
|
||||
SchematicId: machineStatus.TypedSpec().Value.Schematic.FullId,
|
||||
SchematicInitialized: true,
|
||||
Platform: platform,
|
||||
SecurityState: securityState,
|
||||
|
||||
@@ -103,8 +103,7 @@ func UpdateSupported(machineStatus *omni.MachineStatus, getClusterMachineConfig
|
||||
}
|
||||
|
||||
func Calculate(machineStatus *omni.MachineStatus, kernelArgs *omni.KernelArgs) (args []string, initialized bool, err error) {
|
||||
schematicConfig := machineStatus.TypedSpec().Value.Schematic
|
||||
if schematicConfig == nil {
|
||||
if !machineStatus.TypedSpec().Value.SchematicReady() {
|
||||
return nil, false, nil
|
||||
}
|
||||
|
||||
@@ -118,7 +117,7 @@ func Calculate(machineStatus *omni.MachineStatus, kernelArgs *omni.KernelArgs) (
|
||||
extraArgs = kernelArgs.TypedSpec().Value.Args
|
||||
}
|
||||
|
||||
baseArgs := xslices.Filter(schematicConfig.KernelArgs, isProtected)
|
||||
baseArgs := xslices.Filter(machineStatus.TypedSpec().Value.Schematic.KernelArgs, isProtected)
|
||||
|
||||
return slices.Concat(baseArgs, extraArgs), true, nil
|
||||
}
|
||||
|
||||
@@ -74,11 +74,6 @@ func (ctrl *InfraMachineController) Settings() controller.QSettings {
|
||||
Type: omni.InfraMachineConfigType,
|
||||
Kind: controller.InputQMapped,
|
||||
},
|
||||
{
|
||||
Namespace: resources.DefaultNamespace,
|
||||
Type: omni.SchematicConfigurationType,
|
||||
Kind: controller.InputQMapped,
|
||||
},
|
||||
{
|
||||
Namespace: resources.DefaultNamespace,
|
||||
Type: omni.MachineExtensionsType,
|
||||
@@ -104,6 +99,11 @@ func (ctrl *InfraMachineController) Settings() controller.QSettings {
|
||||
Type: infra.ProviderType,
|
||||
Kind: controller.InputQMapped,
|
||||
},
|
||||
{
|
||||
Namespace: resources.DefaultNamespace,
|
||||
Type: omni.ClusterType,
|
||||
Kind: controller.InputQMapped,
|
||||
},
|
||||
},
|
||||
Outputs: []controller.Output{
|
||||
{
|
||||
@@ -127,7 +127,7 @@ func (ctrl *InfraMachineController) Settings() controller.QSettings {
|
||||
}
|
||||
|
||||
// Reconcile implements the controller.QController interface.
|
||||
func (ctrl *InfraMachineController) Reconcile(ctx context.Context, _ *zap.Logger, r controller.QRuntime, ptr resource.Pointer) error {
|
||||
func (ctrl *InfraMachineController) Reconcile(ctx context.Context, logger *zap.Logger, r controller.QRuntime, ptr resource.Pointer) error {
|
||||
link, err := safe.ReaderGet[*siderolink.Link](ctx, r, ptr)
|
||||
if err != nil {
|
||||
if !state.IsNotFoundError(err) {
|
||||
@@ -143,7 +143,7 @@ func (ctrl *InfraMachineController) Reconcile(ctx context.Context, _ *zap.Logger
|
||||
return ctrl.reconcileTearingDown(ctx, r, link)
|
||||
}
|
||||
|
||||
return ctrl.reconcileRunning(ctx, r, link)
|
||||
return ctrl.reconcileRunning(ctx, r, link, logger)
|
||||
}
|
||||
|
||||
func (ctrl *InfraMachineController) reconcileTearingDown(ctx context.Context, r controller.QRuntime, link *siderolink.Link) error {
|
||||
@@ -214,7 +214,7 @@ func (ctrl *InfraMachineController) handleInfraProviderDeletion(ctx context.Cont
|
||||
}, controller.WithExpectedPhaseAny())
|
||||
}
|
||||
|
||||
func (ctrl *InfraMachineController) reconcileRunning(ctx context.Context, r controller.QRuntime, link *siderolink.Link) error {
|
||||
func (ctrl *InfraMachineController) reconcileRunning(ctx context.Context, r controller.QRuntime, link *siderolink.Link, logger *zap.Logger) error {
|
||||
config, err := safe.ReaderGetByID[*omni.InfraMachineConfig](ctx, r, link.Metadata().ID())
|
||||
if err != nil && !state.IsNotFoundError(err) {
|
||||
return err
|
||||
@@ -260,6 +260,7 @@ func (ctrl *InfraMachineController) reconcileRunning(ctx context.Context, r cont
|
||||
machineInfoCollected: machineInfoCollected,
|
||||
providerID: providerID,
|
||||
controllerName: ctrl.Name(),
|
||||
logger: logger,
|
||||
}
|
||||
|
||||
return safe.WriterModify[*infra.Machine](ctx, r, infra.NewMachine(link.Metadata().ID()), func(res *infra.Machine) error {
|
||||
@@ -273,6 +274,7 @@ type infraMachineControllerHelper struct {
|
||||
machineExts *omni.MachineExtensions
|
||||
link *siderolink.Link
|
||||
nodeUniqueToken *siderolink.NodeUniqueToken
|
||||
logger *zap.Logger
|
||||
providerID string
|
||||
controllerName string
|
||||
machineInfoCollected bool
|
||||
@@ -333,16 +335,6 @@ func (helper *infraMachineControllerHelper) modify(ctx context.Context, infraMac
|
||||
return err
|
||||
}
|
||||
|
||||
schematicConfig, err := safe.ReaderGetByID[*omni.SchematicConfiguration](ctx, helper.runtime, helper.link.Metadata().ID())
|
||||
if err != nil {
|
||||
if state.IsNotFoundError(err) {
|
||||
// the schema configuration is not created yet, skip the cluster information collection
|
||||
return nil
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
var extensions []string
|
||||
|
||||
if helper.machineExts != nil {
|
||||
@@ -351,12 +343,37 @@ func (helper *infraMachineControllerHelper) modify(ctx context.Context, infraMac
|
||||
|
||||
// set the cluster allocation information
|
||||
|
||||
infraMachine.TypedSpec().Value.ClusterTalosVersion = schematicConfig.TypedSpec().Value.TalosVersion
|
||||
clusterTalosVersion, err := helper.getClusterTalosVersion(ctx, helper.runtime, clusterMachine)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
infraMachine.TypedSpec().Value.ClusterTalosVersion = clusterTalosVersion
|
||||
infraMachine.TypedSpec().Value.Extensions = extensions
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (helper *infraMachineControllerHelper) getClusterTalosVersion(ctx context.Context, r controller.Reader, clusterMachine *omni.ClusterMachine) (string, error) {
|
||||
clusterID, ok := clusterMachine.Metadata().Labels().Get(omni.LabelCluster)
|
||||
if !ok {
|
||||
return "", fmt.Errorf("cluster machine %q is missing cluster label", clusterMachine.Metadata().ID())
|
||||
}
|
||||
|
||||
cluster, err := safe.ReaderGetByID[*omni.Cluster](ctx, r, clusterID)
|
||||
if err != nil {
|
||||
if state.IsNotFoundError(err) {
|
||||
helper.logger.Info("cluster not yet found for cluster machine", zap.String("cluster_machine_id", clusterMachine.Metadata().ID()), zap.String("cluster_id", clusterID))
|
||||
|
||||
return "", nil
|
||||
}
|
||||
|
||||
return "", fmt.Errorf("failed to get cluster %q: %w", clusterID, err)
|
||||
}
|
||||
|
||||
return cluster.TypedSpec().Value.TalosVersion, nil
|
||||
}
|
||||
|
||||
// MapInput implements the controller.QController interface.
|
||||
func (ctrl *InfraMachineController) MapInput(ctx context.Context, _ *zap.Logger, runtime controller.QRuntime, ptr controller.ReducedResourceMetadata) ([]resource.Pointer, error) {
|
||||
switch ptr.Type() {
|
||||
@@ -369,6 +386,18 @@ func (ctrl *InfraMachineController) MapInput(ctx context.Context, _ *zap.Logger,
|
||||
omni.ClusterMachineType,
|
||||
omni.MachineStatusType:
|
||||
return []resource.Pointer{siderolink.NewLink(ptr.ID(), nil).Metadata()}, nil
|
||||
case omni.ClusterType:
|
||||
clusterMachineList, err := safe.ReaderListAll[*omni.ClusterMachine](ctx, runtime, state.WithLabelQuery(resource.LabelEqual(omni.LabelCluster, ptr.ID())))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
linkPtrs := make([]resource.Pointer, 0, clusterMachineList.Len())
|
||||
for clusterMachine := range clusterMachineList.All() {
|
||||
linkPtrs = append(linkPtrs, siderolink.NewLink(clusterMachine.Metadata().ID(), nil).Metadata())
|
||||
}
|
||||
|
||||
return linkPtrs, nil
|
||||
case infra.InfraProviderStatusType, infra.ProviderType:
|
||||
linkList, err := safe.ReaderListAll[*siderolink.Link](ctx, runtime, state.WithLabelQuery(resource.LabelEqual(omni.LabelInfraProviderID, ptr.ID())))
|
||||
if err != nil {
|
||||
|
||||
@@ -89,12 +89,11 @@ func (suite *InfraMachineControllerSuite) TestReconcile() {
|
||||
|
||||
suite.Require().NoError(suite.state.Create(suite.ctx, clusterMachine))
|
||||
|
||||
// create schematic configuration
|
||||
schematicConfig := omni.NewSchematicConfiguration("machine-1")
|
||||
// create cluster with talos version
|
||||
cluster := omni.NewCluster("test-cluster")
|
||||
cluster.TypedSpec().Value.TalosVersion = "v42.0.0"
|
||||
|
||||
schematicConfig.TypedSpec().Value.TalosVersion = "v42.0.0"
|
||||
|
||||
suite.Require().NoError(suite.state.Create(suite.ctx, schematicConfig))
|
||||
suite.Require().NoError(suite.state.Create(suite.ctx, cluster))
|
||||
|
||||
// assert that the cluster machine has the correct talos version
|
||||
assertResource[*infra.Machine](&suite.OmniSuite, clusterMachine.Metadata(), func(r *infra.Machine, assertion *assert.Assertions) {
|
||||
|
||||
@@ -11,7 +11,6 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/cosi-project/runtime/pkg/safe"
|
||||
"github.com/siderolabs/go-pointer"
|
||||
"github.com/siderolabs/image-factory/pkg/constants"
|
||||
"github.com/siderolabs/image-factory/pkg/schematic"
|
||||
"github.com/siderolabs/talos/pkg/machinery/client"
|
||||
@@ -54,15 +53,12 @@ func GetSchematicInfo(ctx context.Context, c *client.Client, defaultKernelArgs [
|
||||
fullID string
|
||||
rawSchematic = &schematic.Schematic{}
|
||||
manifest string
|
||||
inAgentMode bool
|
||||
)
|
||||
|
||||
err = items.ForEachErr(func(status *runtime.ExtensionStatus) error {
|
||||
for status := range items.All() {
|
||||
name := status.TypedSpec().Metadata.Name
|
||||
if name == extensions.MetalAgentExtensionName {
|
||||
inAgentMode = true
|
||||
|
||||
return nil
|
||||
return SchematicInfo{InAgentMode: true}, nil
|
||||
}
|
||||
|
||||
if name == constants.SchematicIDExtensionName { // skip the meta extension
|
||||
@@ -71,14 +67,16 @@ func GetSchematicInfo(ctx context.Context, c *client.Client, defaultKernelArgs [
|
||||
if status.TypedSpec().Metadata.ExtraInfo != "" {
|
||||
manifest = status.TypedSpec().Metadata.ExtraInfo
|
||||
|
||||
return yaml.Unmarshal([]byte(manifest), rawSchematic)
|
||||
if err = yaml.Unmarshal([]byte(manifest), rawSchematic); err != nil {
|
||||
return SchematicInfo{}, fmt.Errorf("failed to unmarshal schematic manifest: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
continue
|
||||
}
|
||||
|
||||
if name == "modules.dep" { // ignore the virtual extension used for kernel modules dependencies
|
||||
return nil
|
||||
continue
|
||||
}
|
||||
|
||||
if !strings.HasPrefix(name, extensions.OfficialPrefix) {
|
||||
@@ -86,24 +84,6 @@ func GetSchematicInfo(ctx context.Context, c *client.Client, defaultKernelArgs [
|
||||
}
|
||||
|
||||
exts = append(exts, name)
|
||||
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return SchematicInfo{}, err
|
||||
}
|
||||
|
||||
if inAgentMode {
|
||||
id, idErr := pointer.To(schematic.Schematic{}).ID()
|
||||
if idErr != nil {
|
||||
return SchematicInfo{}, fmt.Errorf("failed to calculate extensions schematic ID: %w", idErr)
|
||||
}
|
||||
|
||||
return SchematicInfo{
|
||||
ID: id,
|
||||
FullID: id,
|
||||
InAgentMode: true,
|
||||
}, nil
|
||||
}
|
||||
|
||||
exts = extensions.MapNames(exts)
|
||||
|
||||
@@ -40,8 +40,8 @@ func NewStatusController() *StatusController {
|
||||
status.TypedSpec().Value.UnmetConditions = nil
|
||||
status.TypedSpec().Value.CurrentCmdline = ms.TypedSpec().Value.KernelCmdline
|
||||
|
||||
if ms.TypedSpec().Value.Schematic == nil {
|
||||
status.TypedSpec().Value.UnmetConditions = append(status.TypedSpec().Value.UnmetConditions, "Schematic information is not yet known")
|
||||
if !ms.TypedSpec().Value.SchematicReady() {
|
||||
status.TypedSpec().Value.UnmetConditions = append(status.TypedSpec().Value.UnmetConditions, "Schematic information is not yet ready")
|
||||
} else {
|
||||
status.TypedSpec().Value.CurrentArgs = kernelargs.FilterExtras(ms.TypedSpec().Value.Schematic.KernelArgs)
|
||||
}
|
||||
|
||||
@@ -38,7 +38,7 @@ func TestReconcile(t *testing.T) {
|
||||
|
||||
rtestutils.AssertResource(ctx, t, testContext.State, id, func(res *omni.KernelArgsStatus, assertion *assert.Assertions) {
|
||||
assertion.Equal([]string{
|
||||
"Schematic information is not yet known",
|
||||
"Schematic information is not yet ready",
|
||||
"Talos is not installed, kernel args cannot be updated yet",
|
||||
"Cannot determine if kernel args update is supported: SecurityState and TalosVersion are not yet set",
|
||||
}, res.TypedSpec().Value.UnmetConditions)
|
||||
@@ -59,6 +59,8 @@ func TestReconcile(t *testing.T) {
|
||||
}
|
||||
|
||||
res.TypedSpec().Value.Schematic = &specs.MachineStatusSpec_Schematic{
|
||||
Id: "test-id",
|
||||
FullId: "test-full-id",
|
||||
KernelArgs: []string{"arg-1", "arg-2"},
|
||||
}
|
||||
|
||||
|
||||
@@ -69,7 +69,7 @@ func GenInstallConfig(machineStatus *omni.MachineStatus, clusterMachineTalosVers
|
||||
|
||||
genOptions.TypedSpec().Value.InstallImage.SchematicId = clusterMachineTalosVersion.TypedSpec().Value.SchematicId
|
||||
genOptions.TypedSpec().Value.InstallImage.TalosVersion = clusterMachineTalosVersion.TypedSpec().Value.TalosVersion
|
||||
genOptions.TypedSpec().Value.InstallImage.SchematicInitialized = machineStatus.TypedSpec().Value.Schematic != nil
|
||||
genOptions.TypedSpec().Value.InstallImage.SchematicInitialized = machineStatus.TypedSpec().Value.SchematicReady()
|
||||
|
||||
if genOptions.TypedSpec().Value.InstallImage.SchematicInitialized {
|
||||
genOptions.TypedSpec().Value.InstallImage.SchematicInvalid = machineStatus.TypedSpec().Value.GetSchematic().GetInvalid()
|
||||
|
||||
@@ -482,57 +482,8 @@ func (ctrl *MachineStatusController) handleNotification(ctx context.Context, r c
|
||||
}
|
||||
|
||||
if event.Schematic != nil {
|
||||
if spec.Schematic == nil {
|
||||
spec.Schematic = &specs.MachineStatusSpec_Schematic{}
|
||||
}
|
||||
|
||||
spec.Schematic.Raw = event.Schematic.Raw
|
||||
spec.Schematic.Id = event.Schematic.ID
|
||||
spec.Schematic.FullId = event.Schematic.FullID
|
||||
spec.Schematic.Extensions = event.Schematic.Extensions
|
||||
spec.Schematic.Invalid = event.Schematic.Invalid
|
||||
spec.Schematic.KernelArgs = event.Schematic.KernelArgs
|
||||
|
||||
spec.Schematic.MetaValues = xslices.Map(event.Schematic.MetaValues, func(value schematic.MetaValue) *specs.MetaValue {
|
||||
return &specs.MetaValue{
|
||||
Key: uint32(value.Key),
|
||||
Value: value.Value,
|
||||
}
|
||||
})
|
||||
|
||||
if event.Schematic.Overlay.Name != "" {
|
||||
spec.Schematic.Overlay = &specs.Overlay{
|
||||
Name: event.Schematic.Overlay.Name,
|
||||
Image: event.Schematic.Overlay.Image,
|
||||
}
|
||||
}
|
||||
|
||||
if spec.Schematic.InitialSchematic == "" {
|
||||
spec.Schematic.InitialSchematic = spec.Schematic.FullId
|
||||
}
|
||||
|
||||
spec.Schematic.InAgentMode = event.Schematic.InAgentMode
|
||||
|
||||
// We populate the initial state on a best-effort basis: we might have missed the moment to capture it, but it is acceptable.
|
||||
if spec.Schematic.InitialState == nil {
|
||||
spec.Schematic.InitialState = &specs.MachineStatusSpec_Schematic_InitialState{
|
||||
Extensions: event.Schematic.Extensions,
|
||||
}
|
||||
}
|
||||
|
||||
// if the schematic is invalid or the machine is in agent mode, we reset the initial schematic information
|
||||
if spec.Schematic.Invalid || spec.Schematic.InAgentMode {
|
||||
spec.Schematic.InitialSchematic = ""
|
||||
spec.Schematic.InitialState = &specs.MachineStatusSpec_Schematic_InitialState{} // reset to be empty but leave it initialized
|
||||
}
|
||||
|
||||
_, kernelArgsInitialized := m.Metadata().Annotations().Get(omni.KernelArgsInitialized)
|
||||
if !kernelArgsInitialized {
|
||||
if err := ctrl.kernelArgsInitializer.Init(ctx, m.Metadata().ID(), event.Schematic.KernelArgs); err != nil {
|
||||
return fmt.Errorf("failed to initialize extra kernel args: %w", err)
|
||||
}
|
||||
|
||||
m.Metadata().Annotations().Set(omni.KernelArgsInitialized, "")
|
||||
if err := ctrl.handleEventSchematic(ctx, m, event); err != nil {
|
||||
return fmt.Errorf("failed to handle schematic event: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -565,6 +516,74 @@ func (ctrl *MachineStatusController) handleNotification(ctx context.Context, r c
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ctrl *MachineStatusController) handleEventSchematic(ctx context.Context, ms *omni.MachineStatus, event machinetask.Info) error {
|
||||
spec := ms.TypedSpec().Value
|
||||
|
||||
// If the machine is in agent mode, reset the schematic information as it were never observed.
|
||||
// At that stage, anything on it (extensions, kernel args, etc.) is irrelevant and even wrong for Omni to process.
|
||||
if event.Schematic.InAgentMode {
|
||||
spec.Schematic = &specs.MachineStatusSpec_Schematic{
|
||||
InAgentMode: true,
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
if spec.Schematic == nil {
|
||||
spec.Schematic = &specs.MachineStatusSpec_Schematic{}
|
||||
}
|
||||
|
||||
spec.Schematic.Raw = event.Schematic.Raw
|
||||
spec.Schematic.Id = event.Schematic.ID
|
||||
spec.Schematic.FullId = event.Schematic.FullID
|
||||
spec.Schematic.Extensions = event.Schematic.Extensions
|
||||
spec.Schematic.Invalid = event.Schematic.Invalid
|
||||
spec.Schematic.KernelArgs = event.Schematic.KernelArgs
|
||||
spec.Schematic.InAgentMode = event.Schematic.InAgentMode
|
||||
|
||||
spec.Schematic.MetaValues = xslices.Map(event.Schematic.MetaValues, func(value schematic.MetaValue) *specs.MetaValue {
|
||||
return &specs.MetaValue{
|
||||
Key: uint32(value.Key),
|
||||
Value: value.Value,
|
||||
}
|
||||
})
|
||||
|
||||
if event.Schematic.Overlay.Name != "" {
|
||||
spec.Schematic.Overlay = &specs.Overlay{
|
||||
Name: event.Schematic.Overlay.Name,
|
||||
Image: event.Schematic.Overlay.Image,
|
||||
}
|
||||
}
|
||||
|
||||
if spec.Schematic.InitialSchematic == "" {
|
||||
spec.Schematic.InitialSchematic = spec.Schematic.FullId
|
||||
}
|
||||
|
||||
// We populate the initial state on a best-effort basis: we might have missed the moment to capture it, but it is acceptable.
|
||||
if spec.Schematic.InitialState == nil {
|
||||
spec.Schematic.InitialState = &specs.MachineStatusSpec_Schematic_InitialState{
|
||||
Extensions: event.Schematic.Extensions,
|
||||
}
|
||||
}
|
||||
|
||||
// If the schematic is invalid, reset the initial schematic information.
|
||||
if spec.Schematic.Invalid {
|
||||
spec.Schematic.InitialSchematic = ""
|
||||
spec.Schematic.InitialState = &specs.MachineStatusSpec_Schematic_InitialState{} // reset to be empty but leave it initialized
|
||||
}
|
||||
|
||||
_, kernelArgsInitialized := ms.Metadata().Annotations().Get(omni.KernelArgsInitialized)
|
||||
if !kernelArgsInitialized {
|
||||
if err := ctrl.kernelArgsInitializer.Init(ctx, ms.Metadata().ID(), event.Schematic.KernelArgs); err != nil {
|
||||
return fmt.Errorf("failed to initialize extra kernel args: %w", err)
|
||||
}
|
||||
|
||||
ms.Metadata().Annotations().Set(omni.KernelArgsInitialized, "")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// reconcileLabels is a wrapper around omni.MachineStatusReconcileLabels, but it overrides the "installed" label
|
||||
// based on a matching infra.MachineStatus for that machine.
|
||||
//
|
||||
|
||||
@@ -440,11 +440,7 @@ func (suite *MachineStatusSuite) TestMachineSchematic() {
|
||||
},
|
||||
},
|
||||
expected: &specs.MachineStatusSpec_Schematic{
|
||||
Id: defaultSchematic,
|
||||
InitialSchematic: "",
|
||||
FullId: defaultSchematic,
|
||||
InAgentMode: true,
|
||||
InitialState: &specs.MachineStatusSpec_Schematic_InitialState{},
|
||||
InAgentMode: true,
|
||||
},
|
||||
},
|
||||
} {
|
||||
|
||||
@@ -73,15 +73,11 @@ func checkClusterReady(ctx context.Context, r controller.Reader, machineConfig *
|
||||
|
||||
func checkMachineStatus(ctx context.Context, r controller.Reader, machineStatus *omni.MachineStatus) error {
|
||||
if !machineStatus.TypedSpec().Value.Connected {
|
||||
return xerrors.NewTaggedf[qtransform.SkipReconcileTag]("'%s' machine is not connected", machineStatus.Metadata().ID())
|
||||
return xerrors.NewTaggedf[qtransform.SkipReconcileTag]("machine %q is not connected", machineStatus.Metadata().ID())
|
||||
}
|
||||
|
||||
if machineStatus.TypedSpec().Value.Schematic == nil {
|
||||
return xerrors.NewTagged[qtransform.SkipReconcileTag](fmt.Errorf("machine status '%s' does not have schematic information", machineStatus.Metadata().ID()))
|
||||
}
|
||||
|
||||
if machineStatus.TypedSpec().Value.Schematic.InAgentMode {
|
||||
return xerrors.NewTagged[qtransform.SkipReconcileTag](fmt.Errorf("machine status '%s' schematic is in agent mode", machineStatus.Metadata().ID()))
|
||||
if !machineStatus.TypedSpec().Value.SchematicReady() {
|
||||
return xerrors.NewTaggedf[qtransform.SkipReconcileTag]("machine %q schematic is not ready", machineStatus.Metadata().ID())
|
||||
}
|
||||
|
||||
// if the machine is managed by a static infra provider, we need to ensure that the infra machine is ready to use
|
||||
@@ -94,7 +90,7 @@ func checkMachineStatus(ctx context.Context, r controller.Reader, machineStatus
|
||||
return xerrors.NewTaggedf[qtransform.SkipReconcileTag]("machine is managed by a static infra provider but the infra machine status is not found: %w", err)
|
||||
}
|
||||
|
||||
return fmt.Errorf("failed to get infra machine status '%s': %w", machineStatus.Metadata().ID(), err)
|
||||
return fmt.Errorf("failed to get infra machine status %q: %w", machineStatus.Metadata().ID(), err)
|
||||
}
|
||||
|
||||
if !infraMachineStatus.TypedSpec().Value.ReadyToUse {
|
||||
@@ -134,7 +130,7 @@ func BuildReconciliationContext(ctx context.Context, r controller.Reader,
|
||||
machineSetNode, err := safe.ReaderGetByID[*omni.MachineSetNode](ctx, r, machineConfig.Metadata().ID())
|
||||
if err != nil {
|
||||
if state.IsNotFoundError(err) {
|
||||
return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("'%s' machine set node not found: %w", machineConfig.Metadata().ID(), err)
|
||||
return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("%q machine set node not found: %w", machineConfig.Metadata().ID(), err)
|
||||
}
|
||||
|
||||
return nil, err
|
||||
@@ -176,19 +172,19 @@ func BuildReconciliationContext(ctx context.Context, r controller.Reader,
|
||||
)
|
||||
if err != nil {
|
||||
if state.IsNotFoundError(err) {
|
||||
return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("'%s' machine status snapshot not found: %w", machineConfig.Metadata().ID(), err)
|
||||
return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("%q machine status snapshot not found: %w", machineConfig.Metadata().ID(), err)
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("failed to get machine status snapshot '%s': %w", machineConfig.Metadata().ID(), err)
|
||||
return nil, fmt.Errorf("failed to get machine status snapshot %q: %w", machineConfig.Metadata().ID(), err)
|
||||
}
|
||||
|
||||
rc.machineStatus, err = safe.ReaderGetByID[*omni.MachineStatus](ctx, r, machineConfig.Metadata().ID())
|
||||
if err != nil {
|
||||
if state.IsNotFoundError(err) {
|
||||
return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("'%s' machine status not found: %w", machineConfig.Metadata().ID(), err)
|
||||
return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("%q machine status not found: %w", machineConfig.Metadata().ID(), err)
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("failed to get machine status '%s': %w", machineConfig.Metadata().ID(), err)
|
||||
return nil, fmt.Errorf("failed to get machine status %q: %w", machineConfig.Metadata().ID(), err)
|
||||
}
|
||||
|
||||
if err = checkMachineStatus(ctx, r, rc.machineStatus); err != nil {
|
||||
@@ -198,10 +194,10 @@ func BuildReconciliationContext(ctx context.Context, r controller.Reader,
|
||||
genOptions, err := safe.ReaderGetByID[*omni.MachineConfigGenOptions](ctx, r, machineConfig.Metadata().ID())
|
||||
if err != nil {
|
||||
if state.IsNotFoundError(err) {
|
||||
return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("'%s' machine config gen options not found: %w", machineConfig.Metadata().ID(), err)
|
||||
return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("%q machine config gen options not found: %w", machineConfig.Metadata().ID(), err)
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("failed to get install image '%s': %w", machineConfig.Metadata().ID(), err)
|
||||
return nil, fmt.Errorf("failed to get install image %q: %w", machineConfig.Metadata().ID(), err)
|
||||
}
|
||||
|
||||
rc.installImage = genOptions.TypedSpec().Value.InstallImage
|
||||
|
||||
@@ -781,7 +781,7 @@ func (ctrl *ClusterMachineConfigStatusController) shouldResetGraceful(
|
||||
machineSetStatus.TypedSpec().Value.Phase != specs.MachineSetPhase_Destroying, nil
|
||||
}
|
||||
|
||||
func (h *ClusterMachineConfigStatusController) gracefulEtcdLeave(ctx context.Context, c *client.Client, id string) error {
|
||||
func (ctrl *ClusterMachineConfigStatusController) gracefulEtcdLeave(ctx context.Context, c *client.Client, id string) error {
|
||||
_, err := c.EtcdForfeitLeadership(ctx, &machineapi.EtcdForfeitLeadershipRequest{})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to forfeit leadership, node %q: %w", id, err)
|
||||
|
||||
@@ -69,8 +69,7 @@ func (ctrl *StatusController) transform(ctx context.Context, r controller.Reader
|
||||
|
||||
status.TypedSpec().Value.IsMaintenance = ms.TypedSpec().Value.Maintenance
|
||||
|
||||
schematicSpec := ms.TypedSpec().Value.Schematic
|
||||
if schematicSpec == nil {
|
||||
if !ms.TypedSpec().Value.SchematicReady() {
|
||||
status.TypedSpec().Value.Phase = specs.MachineUpgradeStatusSpec_Unknown
|
||||
status.TypedSpec().Value.Status = "schematic info is not available"
|
||||
status.TypedSpec().Value.Error = ""
|
||||
@@ -78,6 +77,8 @@ func (ctrl *StatusController) transform(ctx context.Context, r controller.Reader
|
||||
return nil
|
||||
}
|
||||
|
||||
schematicSpec := ms.TypedSpec().Value.Schematic
|
||||
|
||||
status.TypedSpec().Value.CurrentSchematicId = schematicSpec.FullId
|
||||
|
||||
if schematicSpec.Raw == "" {
|
||||
|
||||
@@ -150,6 +150,7 @@ func TestReconcile(t *testing.T) {
|
||||
{Key: 1, Value: "value1"},
|
||||
{Key: 2, Value: "value2"},
|
||||
},
|
||||
Id: "test-id",
|
||||
FullId: currentSchematicID,
|
||||
Raw: string(currentSchematicRaw),
|
||||
InitialState: &specs.MachineStatusSpec_Schematic_InitialState{
|
||||
|
||||
@@ -513,6 +513,7 @@ func (suite *OmniSuite) createClusterWithTalosVersion(clusterName string, contro
|
||||
machineStatus.TypedSpec().Value.ManagementAddress = suite.socketConnectionString
|
||||
machineStatus.TypedSpec().Value.Schematic = &specs.MachineStatusSpec_Schematic{
|
||||
Id: defaultSchematic,
|
||||
FullId: defaultSchematic,
|
||||
InitialState: &specs.MachineStatusSpec_Schematic_InitialState{},
|
||||
}
|
||||
machineStatus.TypedSpec().Value.InitialTalosVersion = cluster.TypedSpec().Value.TalosVersion
|
||||
|
||||
@@ -143,8 +143,8 @@ func (helper *schematicConfigurationHelper) reconcile(
|
||||
ms *omni.MachineStatus,
|
||||
schematicConfiguration *omni.SchematicConfiguration,
|
||||
) (*omni.MachineExtensionsStatus, error) {
|
||||
if ms.TypedSpec().Value.Schematic == nil {
|
||||
return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("schematic information is not yet available")
|
||||
if !ms.TypedSpec().Value.SchematicReady() {
|
||||
return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("schematic is not yet ready")
|
||||
}
|
||||
|
||||
securityState := ms.TypedSpec().Value.SecurityState
|
||||
@@ -368,8 +368,8 @@ func determineKernelArgs(ctx context.Context, machineStatus *omni.MachineStatus,
|
||||
}
|
||||
|
||||
if !updateSupported {
|
||||
if machineStatus.TypedSpec().Value.Schematic == nil {
|
||||
return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("machine schematic is not yet initialized")
|
||||
if !machineStatus.TypedSpec().Value.SchematicReady() {
|
||||
return nil, xerrors.NewTaggedf[qtransform.SkipReconcileTag]("machine schematic is not yet ready")
|
||||
}
|
||||
|
||||
// keep them unchanged - take the current args as-is
|
||||
@@ -437,7 +437,7 @@ func computeMachineExtensionsStatus(ms *omni.MachineStatus, customization *machi
|
||||
requestedExtensions set.Set[string]
|
||||
)
|
||||
|
||||
if ms.TypedSpec().Value.Schematic != nil {
|
||||
if ms.TypedSpec().Value.SchematicReady() {
|
||||
installedExtensions = xslices.ToSet(ms.TypedSpec().Value.Schematic.Extensions)
|
||||
}
|
||||
|
||||
|
||||
@@ -72,6 +72,8 @@ func (suite *SchematicConfigurationSuite) TestReconcile() {
|
||||
|
||||
machineStatus.TypedSpec().Value.TalosVersion = talosVersion
|
||||
machineStatus.TypedSpec().Value.Schematic = &specs.MachineStatusSpec_Schematic{
|
||||
Id: "test-id",
|
||||
FullId: "test-full-id",
|
||||
Extensions: []string{"siderolabs/hello-world-service"},
|
||||
InitialSchematic: expectedSchematic,
|
||||
InitialState: &specs.MachineStatusSpec_Schematic_InitialState{
|
||||
|
||||
@@ -555,8 +555,7 @@ func populateEmptySchematics(ctx context.Context, r controller.ReaderWriter, clu
|
||||
return err
|
||||
}
|
||||
|
||||
schematic := machineStatus.TypedSpec().Value.Schematic
|
||||
if schematic == nil {
|
||||
if !machineStatus.TypedSpec().Value.SchematicReady() {
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -566,7 +565,7 @@ func populateEmptySchematics(ctx context.Context, r controller.ReaderWriter, clu
|
||||
}
|
||||
|
||||
return safe.WriterModify(ctx, r, clusterMachineTalosVersion, func(res *omni.ClusterMachineTalosVersion) error {
|
||||
res.TypedSpec().Value.SchematicId = schematic.InitialSchematic
|
||||
res.TypedSpec().Value.SchematicId = machineStatus.TypedSpec().Value.Schematic.InitialSchematic
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
@@ -144,6 +144,7 @@ func init() {
|
||||
|
||||
res.TypedSpec().Value.Schematic = &specs.MachineStatusSpec_Schematic{
|
||||
Id: defaultSchematic,
|
||||
FullId: defaultSchematic,
|
||||
InitialState: &specs.MachineStatusSpec_Schematic_InitialState{},
|
||||
}
|
||||
|
||||
|
||||
@@ -357,7 +357,7 @@ func preRunHooks(t *testing.T, options *TestOptions) {
|
||||
|
||||
func postRunHooks(t *testing.T, options *TestOptions) {
|
||||
if t.Failed() {
|
||||
t.Logf("there are failed tests, save support bundle for all cluster")
|
||||
t.Logf("there are failed tests, save support bundle for all clusters")
|
||||
|
||||
saveAllSupportBundles(t, options.omniClient, options.OutputDir)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user