m/node: implement static network config
Allows using a static network configuration in Monogon OS.
This plumbs in support for the new static network configuration mode of
the network service into Monogon OS. It introduces a new NodeParameter
field as well as an ESP file to persistently hold this configuration.
The file is not sealed or encrypted to allow recovery of nodes with
broken network configuration.
Change-Id: Ia398368a8d1c0eef4bca53bb279a97a144bdbd20
Reviewed-on: https://review.monogon.dev/c/monogon/+/1403
Tested-by: Jenkins CI
Reviewed-by: Serge Bazanski <serge@monogon.tech>
diff --git a/metropolis/node/core/BUILD.bazel b/metropolis/node/core/BUILD.bazel
index 142f090..49873d8 100644
--- a/metropolis/node/core/BUILD.bazel
+++ b/metropolis/node/core/BUILD.bazel
@@ -6,6 +6,7 @@
srcs = [
"main.go",
"mounts.go",
+ "nodeparams.go",
"panichandler.go",
"pstore.go",
] + select({
@@ -35,11 +36,13 @@
"//metropolis/pkg/supervisor",
"//metropolis/pkg/tpm",
"//metropolis/proto/api",
+ "@com_github_cenkalti_backoff_v4//:backoff",
"@com_github_containerd_containerd//:containerd",
"@com_github_containerd_containerd//namespaces",
"@org_golang_google_grpc//:go_default_library",
"@org_golang_google_grpc//codes",
"@org_golang_google_grpc//status",
+ "@org_golang_google_protobuf//proto",
"@org_golang_x_sys//unix",
],
)
diff --git a/metropolis/node/core/cluster/BUILD.bazel b/metropolis/node/core/cluster/BUILD.bazel
index be09f48..1a724c1 100644
--- a/metropolis/node/core/cluster/BUILD.bazel
+++ b/metropolis/node/core/cluster/BUILD.bazel
@@ -7,7 +7,6 @@
"cluster_bootstrap.go",
"cluster_join.go",
"cluster_register.go",
- "platform.go",
],
importpath = "source.monogon.dev/metropolis/node/core/cluster",
visibility = ["//metropolis/node/core:__subpackages__"],
diff --git a/metropolis/node/core/cluster/cluster.go b/metropolis/node/core/cluster/cluster.go
index 5230939..adfc632 100644
--- a/metropolis/node/core/cluster/cluster.go
+++ b/metropolis/node/core/cluster/cluster.go
@@ -27,17 +27,10 @@
import (
"context"
- "encoding/base64"
"errors"
"fmt"
- "io"
- "net/http"
- "os"
"strings"
- "github.com/cenkalti/backoff/v4"
- "google.golang.org/protobuf/proto"
-
"source.monogon.dev/metropolis/node/core/localstorage"
"source.monogon.dev/metropolis/node/core/network"
"source.monogon.dev/metropolis/node/core/roleserve"
@@ -50,6 +43,7 @@
storageRoot *localstorage.Root
networkService *network.Service
roleServer *roleserve.Service
+ nodeParams *apb.NodeParameters
oneway chan struct{}
}
@@ -57,11 +51,12 @@
// NewManager creates a new cluster Manager. The given localstorage Root must
// be places, but not yet started (and will be started as the Manager makes
// progress). The given network Service must already be running.
-func NewManager(storageRoot *localstorage.Root, networkService *network.Service, rs *roleserve.Service) *Manager {
+func NewManager(storageRoot *localstorage.Root, networkService *network.Service, rs *roleserve.Service, nodeParams *apb.NodeParameters) *Manager {
return &Manager{
storageRoot: storageRoot,
networkService: networkService,
roleServer: rs,
+ nodeParams: nodeParams,
oneway: make(chan struct{}),
}
}
@@ -96,12 +91,7 @@
supervisor.Logger(ctx).Info("No sealed configuration, looking for node parameters")
- params, err := m.nodeParams(ctx)
- if err != nil {
- return fmt.Errorf("no parameters available: %w", err)
- }
-
- switch inner := params.Cluster.(type) {
+ switch inner := m.nodeParams.Cluster.(type) {
case *apb.NodeParameters_ClusterBootstrap_:
err = m.bootstrap(ctx, inner.ClusterBootstrap)
case *apb.NodeParameters_ClusterRegister_:
@@ -117,124 +107,6 @@
return err
}
-func (m *Manager) nodeParamsFWCFG(ctx context.Context) (*apb.NodeParameters, error) {
- bytes, err := os.ReadFile("/sys/firmware/qemu_fw_cfg/by_name/dev.monogon.metropolis/parameters.pb/raw")
- if err != nil {
- return nil, fmt.Errorf("could not read firmware enrolment file: %w", err)
- }
-
- config := apb.NodeParameters{}
- err = proto.Unmarshal(bytes, &config)
- if err != nil {
- return nil, fmt.Errorf("could not unmarshal: %v", err)
- }
-
- return &config, nil
-}
-
-// nodeParamsGCPMetadata attempts to retrieve the node parameters from the
-// GCP metadata service. Returns nil if the metadata service is available,
-// but no node parameters are specified.
-func (m *Manager) nodeParamsGCPMetadata(ctx context.Context) (*apb.NodeParameters, error) {
- const metadataURL = "http://169.254.169.254/computeMetadata/v1/instance/attributes/metropolis-node-params"
- req, err := http.NewRequestWithContext(ctx, "GET", metadataURL, nil)
- if err != nil {
- return nil, fmt.Errorf("could not create request: %w", err)
- }
- req.Header.Set("Metadata-Flavor", "Google")
- resp, err := http.DefaultClient.Do(req)
- if err != nil {
- return nil, fmt.Errorf("HTTP request failed: %w", err)
- }
- defer resp.Body.Close()
- if resp.StatusCode != http.StatusOK {
- if resp.StatusCode == http.StatusNotFound {
- return nil, nil
- }
- return nil, fmt.Errorf("non-200 status code: %d", resp.StatusCode)
- }
- decoded, err := io.ReadAll(base64.NewDecoder(base64.StdEncoding, resp.Body))
- if err != nil {
- return nil, fmt.Errorf("cannot decode base64: %w", err)
- }
- config := apb.NodeParameters{}
- err = proto.Unmarshal(decoded, &config)
- if err != nil {
- return nil, fmt.Errorf("failed unmarshalling NodeParameters: %w", err)
- }
- return &config, nil
-}
-
-func (m *Manager) nodeParams(ctx context.Context) (*apb.NodeParameters, error) {
- boardName, err := getDMIBoardName()
- if err != nil {
- if errors.Is(err, os.ErrNotExist) {
- supervisor.Logger(ctx).Infof("Board name: UNKNOWN")
- } else {
- supervisor.Logger(ctx).Warningf("Could not get board name, cannot detect platform: %v", err)
- }
- } else {
- supervisor.Logger(ctx).Infof("Board name: %q", boardName)
- }
-
- // When running on GCP, attempt to retrieve the node parameters from the
- // metadata server first. Retry until we get a response, since we need to
- // wait for the network service to assign an IP address first.
- if isGCPInstance(boardName) {
- var params *apb.NodeParameters
- op := func() error {
- supervisor.Logger(ctx).Info("Running on GCP, attempting to retrieve node parameters from metadata server")
- params, err = m.nodeParamsGCPMetadata(ctx)
- return err
- }
- err := backoff.Retry(op, backoff.WithContext(backoff.NewExponentialBackOff(), ctx))
- if err != nil {
- supervisor.Logger(ctx).Errorf("Failed to retrieve node parameters: %v", err)
- }
- if params != nil {
- supervisor.Logger(ctx).Info("Retrieved parameters from GCP metadata server")
- return params, nil
- }
- supervisor.Logger(ctx).Infof("\"metropolis-node-params\" metadata not found")
- }
-
- // Retrieve node parameters from qemu's fwcfg interface or ESP.
- // TODO(q3k): probably abstract this away and implement per platform/build/...
- paramsFWCFG, err := m.nodeParamsFWCFG(ctx)
- if err != nil {
- if errors.Is(err, os.ErrNotExist) {
- supervisor.Logger(ctx).Infof("No qemu fwcfg params.")
- } else {
- supervisor.Logger(ctx).Warningf("Could not retrieve node parameters from qemu fwcfg: %v", err)
- }
- paramsFWCFG = nil
- } else {
- supervisor.Logger(ctx).Infof("Retrieved node parameters from qemu fwcfg")
- }
- paramsESP, err := m.storageRoot.ESP.Metropolis.NodeParameters.Unmarshal()
- if err != nil {
- if errors.Is(err, os.ErrNotExist) {
- supervisor.Logger(ctx).Infof("No ESP node parameters.")
- } else {
- supervisor.Logger(ctx).Warningf("Could not retrieve node parameters from ESP: %v", err)
- }
- paramsESP = nil
- } else {
- supervisor.Logger(ctx).Infof("Retrieved node parameters from ESP")
- }
- if paramsFWCFG == nil && paramsESP == nil {
- return nil, fmt.Errorf("could not find node parameters in ESP or qemu fwcfg")
- }
- if paramsFWCFG != nil && paramsESP != nil {
- supervisor.Logger(ctx).Warningf("Node parameters found both in both ESP and qemu fwcfg, using the latter")
- return paramsFWCFG, nil
- } else if paramsFWCFG != nil {
- return paramsFWCFG, nil
- } else {
- return paramsESP, nil
- }
-}
-
// logClusterDirectory verbosely logs the whole Cluster Directory passed to it.
func logClusterDirectory(ctx context.Context, cd *cpb.ClusterDirectory) {
for _, node := range cd.Nodes {
diff --git a/metropolis/node/core/cluster/platform.go b/metropolis/node/core/cluster/platform.go
deleted file mode 100644
index b6b501e..0000000
--- a/metropolis/node/core/cluster/platform.go
+++ /dev/null
@@ -1,19 +0,0 @@
-package cluster
-
-import (
- "fmt"
- "os"
- "strings"
-)
-
-func getDMIBoardName() (string, error) {
- b, err := os.ReadFile("/sys/devices/virtual/dmi/id/board_name")
- if err != nil {
- return "", fmt.Errorf("could not read board name: %w", err)
- }
- return strings.TrimRight(string(b), "\n"), nil
-}
-
-func isGCPInstance(boardName string) bool {
- return boardName == "Google Compute Engine"
-}
diff --git a/metropolis/node/core/localstorage/BUILD.bazel b/metropolis/node/core/localstorage/BUILD.bazel
index 31475de..3d4b352 100644
--- a/metropolis/node/core/localstorage/BUILD.bazel
+++ b/metropolis/node/core/localstorage/BUILD.bazel
@@ -18,6 +18,7 @@
"//metropolis/proto/api",
"//metropolis/proto/common",
"//metropolis/proto/private",
+ "//net/proto",
"@org_golang_google_protobuf//proto",
"@org_golang_x_sys//unix",
],
diff --git a/metropolis/node/core/localstorage/storage_esp.go b/metropolis/node/core/localstorage/storage_esp.go
index ba77f1a..05b1f1c 100644
--- a/metropolis/node/core/localstorage/storage_esp.go
+++ b/metropolis/node/core/localstorage/storage_esp.go
@@ -28,6 +28,7 @@
apb "source.monogon.dev/metropolis/proto/api"
cpb "source.monogon.dev/metropolis/proto/common"
ppb "source.monogon.dev/metropolis/proto/private"
+ npb "source.monogon.dev/net/proto"
)
// ESPDirectory is the EFI System Partition. It is a cleartext partition
@@ -43,9 +44,10 @@
// bootstrap-related data.
type ESPMetropolisDirectory struct {
declarative.Directory
- SealedConfiguration ESPSealedConfiguration `file:"sealed_configuration.pb"`
- NodeParameters ESPNodeParameters `file:"parameters.pb"`
- ClusterDirectory ESPClusterDirectory `file:"cluster_directory.pb"`
+ SealedConfiguration ESPSealedConfiguration `file:"sealed_configuration.pb"`
+ NodeParameters ESPNodeParameters `file:"parameters.pb"`
+ ClusterDirectory ESPClusterDirectory `file:"cluster_directory.pb"`
+ NetworkConfiguration ESPNetworkConfiguration `file:"network_configuration.pb"`
}
// ESPSealedConfiguration is a TPM sealed serialized
@@ -69,14 +71,22 @@
declarative.File
}
+// ESPNetworkConfiguration is a serialized net.Net protobuf. If present, it
+// disables automatic network configuration and uses the given configuration
+// to enable network connectivity.
+type ESPNetworkConfiguration struct {
+ declarative.File
+}
+
var (
- ErrNoSealed = errors.New("no sealed configuration exists")
- ErrSealedUnavailable = errors.New("sealed configuration temporary unavailable")
- ErrSealedCorrupted = errors.New("sealed configuration corrupted")
- ErrNoParameters = errors.New("no parameters found")
- ErrParametersCorrupted = errors.New("parameters corrupted")
- ErrNoDirectory = errors.New("no cluster directory found")
- ErrDirectoryCorrupted = errors.New("cluster directory corrupted")
+ ErrNoSealed = errors.New("no sealed configuration exists")
+ ErrSealedUnavailable = errors.New("sealed configuration temporary unavailable")
+ ErrSealedCorrupted = errors.New("sealed configuration corrupted")
+ ErrNoParameters = errors.New("no parameters found")
+ ErrParametersCorrupted = errors.New("parameters corrupted")
+ ErrNoDirectory = errors.New("no cluster directory found")
+ ErrDirectoryCorrupted = errors.New("cluster directory corrupted")
+ ErrNetworkConfigCorrupted = errors.New("network configuration corrupted")
)
func (e *ESPNodeParameters) Unmarshal() (*apb.NodeParameters, error) {
@@ -114,6 +124,34 @@
return &dir, nil
}
+func (e *ESPNetworkConfiguration) Unmarshal() (*npb.Net, error) {
+ bytes, err := e.Read()
+ if err != nil {
+ if os.IsNotExist(err) {
+ return nil, nil
+ }
+ return nil, fmt.Errorf("%w: when reading: %v", ErrNetworkConfigCorrupted, err)
+ }
+
+ netConf := npb.Net{}
+ err = proto.Unmarshal(bytes, &netConf)
+ if err != nil {
+ return nil, fmt.Errorf("%w: when unmarshaling: %v", ErrNetworkConfigCorrupted, err)
+ }
+ return &netConf, nil
+}
+
+func (e *ESPNetworkConfiguration) Marshal(n *npb.Net) error {
+ netConfRaw, err := proto.Marshal(n)
+ if err != nil {
+ return fmt.Errorf("error marshaling Net: %w", err)
+ }
+ if err := e.Write(netConfRaw, 0666); err != nil {
+ return fmt.Errorf("error writing static network config to ESP: %w", err)
+ }
+ return nil
+}
+
func (e *ESPSealedConfiguration) SealSecureBoot(c *ppb.SealedConfiguration) error {
bytes, err := proto.Marshal(c)
if err != nil {
diff --git a/metropolis/node/core/main.go b/metropolis/node/core/main.go
index bd7f475..a9bdc98 100644
--- a/metropolis/node/core/main.go
+++ b/metropolis/node/core/main.go
@@ -137,8 +137,26 @@
if err := root.Start(ctx); err != nil {
return fmt.Errorf("cannot start root FS: %w", err)
}
- if err := supervisor.Run(ctx, "network", networkSvc.Run); err != nil {
- return fmt.Errorf("when starting network: %w", err)
+ nodeParams, err := getNodeParams(ctx, root)
+ if err != nil {
+ return fmt.Errorf("cannot get node parameters: %w", err)
+ }
+ if nodeParams.NetworkConfig != nil {
+ networkSvc.StaticConfig = nodeParams.NetworkConfig
+ if err := root.ESP.Metropolis.NetworkConfiguration.Marshal(nodeParams.NetworkConfig); err != nil {
+ logger.Errorf("Error writing back network_config from NodeParameters: %v", err)
+ }
+ }
+ if networkSvc.StaticConfig == nil {
+ staticConfig, err := root.ESP.Metropolis.NetworkConfiguration.Unmarshal()
+ if err == nil {
+ networkSvc.StaticConfig = staticConfig
+ } else {
+ logger.Errorf("Unable to load static config, proceeding without it: %v", err)
+ }
+ if err := supervisor.Run(ctx, "network", networkSvc.Run); err != nil {
+ return fmt.Errorf("when starting network: %w", err)
+ }
}
if err := supervisor.Run(ctx, "time", timeSvc.Run); err != nil {
return fmt.Errorf("when starting time: %w", err)
@@ -166,7 +184,7 @@
// Start cluster manager. This kicks off cluster membership machinery,
// which will either start a new cluster, enroll into one or join one.
- m := cluster.NewManager(root, networkSvc, rs)
+ m := cluster.NewManager(root, networkSvc, rs, nodeParams)
return m.Run(ctx)
}
diff --git a/metropolis/node/core/nodeparams.go b/metropolis/node/core/nodeparams.go
new file mode 100644
index 0000000..993dd64
--- /dev/null
+++ b/metropolis/node/core/nodeparams.go
@@ -0,0 +1,149 @@
+package main
+
+import (
+ "context"
+ "encoding/base64"
+ "errors"
+ "fmt"
+ "io"
+ "net/http"
+ "os"
+ "strings"
+
+ "github.com/cenkalti/backoff/v4"
+ "google.golang.org/protobuf/proto"
+
+ "source.monogon.dev/metropolis/node/core/localstorage"
+ "source.monogon.dev/metropolis/pkg/supervisor"
+ apb "source.monogon.dev/metropolis/proto/api"
+)
+
+func nodeParamsFWCFG(ctx context.Context) (*apb.NodeParameters, error) {
+ bytes, err := os.ReadFile("/sys/firmware/qemu_fw_cfg/by_name/dev.monogon.metropolis/parameters.pb/raw")
+ if err != nil {
+ return nil, fmt.Errorf("could not read firmware enrolment file: %w", err)
+ }
+
+ config := apb.NodeParameters{}
+ err = proto.Unmarshal(bytes, &config)
+ if err != nil {
+ return nil, fmt.Errorf("could not unmarshal: %v", err)
+ }
+
+ return &config, nil
+}
+
+// nodeParamsGCPMetadata attempts to retrieve the node parameters from the
+// GCP metadata service. Returns nil if the metadata service is available,
+// but no node parameters are specified.
+func nodeParamsGCPMetadata(ctx context.Context) (*apb.NodeParameters, error) {
+ const metadataURL = "http://169.254.169.254/computeMetadata/v1/instance/attributes/metropolis-node-params"
+ req, err := http.NewRequestWithContext(ctx, "GET", metadataURL, nil)
+ if err != nil {
+ return nil, fmt.Errorf("could not create request: %w", err)
+ }
+ req.Header.Set("Metadata-Flavor", "Google")
+ resp, err := http.DefaultClient.Do(req)
+ if err != nil {
+ return nil, fmt.Errorf("HTTP request failed: %w", err)
+ }
+ defer resp.Body.Close()
+ if resp.StatusCode != http.StatusOK {
+ if resp.StatusCode == http.StatusNotFound {
+ return nil, nil
+ }
+ return nil, fmt.Errorf("non-200 status code: %d", resp.StatusCode)
+ }
+ decoded, err := io.ReadAll(base64.NewDecoder(base64.StdEncoding, resp.Body))
+ if err != nil {
+ return nil, fmt.Errorf("cannot decode base64: %w", err)
+ }
+ config := apb.NodeParameters{}
+ err = proto.Unmarshal(decoded, &config)
+ if err != nil {
+ return nil, fmt.Errorf("failed unmarshalling NodeParameters: %w", err)
+ }
+ return &config, nil
+}
+
+func getDMIBoardName() (string, error) {
+ b, err := os.ReadFile("/sys/devices/virtual/dmi/id/board_name")
+ if err != nil {
+ return "", fmt.Errorf("could not read board name: %w", err)
+ }
+ return strings.TrimRight(string(b), "\n"), nil
+}
+
+func isGCPInstance(boardName string) bool {
+ return boardName == "Google Compute Engine"
+}
+
+func getNodeParams(ctx context.Context, storage *localstorage.Root) (*apb.NodeParameters, error) {
+ boardName, err := getDMIBoardName()
+ if err != nil {
+ if errors.Is(err, os.ErrNotExist) {
+ supervisor.Logger(ctx).Infof("Board name: UNKNOWN")
+ } else {
+ supervisor.Logger(ctx).Warningf("Could not get board name, cannot detect platform: %v", err)
+ }
+ } else {
+ supervisor.Logger(ctx).Infof("Board name: %q", boardName)
+ }
+
+ // When running on GCP, attempt to retrieve the node parameters from the
+ // metadata server first. Retry until we get a response, since we need to
+ // wait for the network service to assign an IP address first.
+ if isGCPInstance(boardName) {
+ var params *apb.NodeParameters
+ op := func() error {
+ supervisor.Logger(ctx).Info("Running on GCP, attempting to retrieve node parameters from metadata server")
+ params, err = nodeParamsGCPMetadata(ctx)
+ return err
+ }
+ err := backoff.Retry(op, backoff.WithContext(backoff.NewExponentialBackOff(), ctx))
+ if err != nil {
+ supervisor.Logger(ctx).Errorf("Failed to retrieve node parameters: %v", err)
+ }
+ if params != nil {
+ supervisor.Logger(ctx).Info("Retrieved parameters from GCP metadata server")
+ return params, nil
+ }
+ supervisor.Logger(ctx).Infof("\"metropolis-node-params\" metadata not found")
+ }
+
+ // Retrieve node parameters from qemu's fwcfg interface or ESP.
+ // TODO(q3k): probably abstract this away and implement per platform/build/...
+ paramsFWCFG, err := nodeParamsFWCFG(ctx)
+ if err != nil {
+ if errors.Is(err, os.ErrNotExist) {
+ supervisor.Logger(ctx).Infof("No qemu fwcfg params.")
+ } else {
+ supervisor.Logger(ctx).Warningf("Could not retrieve node parameters from qemu fwcfg: %v", err)
+ }
+ paramsFWCFG = nil
+ } else {
+ supervisor.Logger(ctx).Infof("Retrieved node parameters from qemu fwcfg")
+ }
+ paramsESP, err := storage.ESP.Metropolis.NodeParameters.Unmarshal()
+ if err != nil {
+ if errors.Is(err, os.ErrNotExist) {
+ supervisor.Logger(ctx).Infof("No ESP node parameters.")
+ } else {
+ supervisor.Logger(ctx).Warningf("Could not retrieve node parameters from ESP: %v", err)
+ }
+ paramsESP = nil
+ } else {
+ supervisor.Logger(ctx).Infof("Retrieved node parameters from ESP")
+ }
+ if paramsFWCFG == nil && paramsESP == nil {
+ return nil, fmt.Errorf("could not find node parameters in ESP or qemu fwcfg")
+ }
+ if paramsFWCFG != nil && paramsESP != nil {
+ supervisor.Logger(ctx).Warningf("Node parameters found both in both ESP and qemu fwcfg, using the latter")
+ return paramsFWCFG, nil
+ } else if paramsFWCFG != nil {
+ return paramsFWCFG, nil
+ } else {
+ return paramsESP, nil
+ }
+}