m/node: implement static network config

Allows using a static network configuration in Monogon OS.

This plumbs in support for the new static network configuration mode of
the network service into Monogon OS. It introduces a new NodeParameter
field as well as an ESP file to persistently hold this configuration.
The file is not sealed or encrypted to allow recovery of nodes with
broken network configuration.

Change-Id: Ia398368a8d1c0eef4bca53bb279a97a144bdbd20
Reviewed-on: https://review.monogon.dev/c/monogon/+/1403
Tested-by: Jenkins CI
Reviewed-by: Serge Bazanski <serge@monogon.tech>
diff --git a/metropolis/node/core/cluster/cluster.go b/metropolis/node/core/cluster/cluster.go
index 5230939..adfc632 100644
--- a/metropolis/node/core/cluster/cluster.go
+++ b/metropolis/node/core/cluster/cluster.go
@@ -27,17 +27,10 @@
 
 import (
 	"context"
-	"encoding/base64"
 	"errors"
 	"fmt"
-	"io"
-	"net/http"
-	"os"
 	"strings"
 
-	"github.com/cenkalti/backoff/v4"
-	"google.golang.org/protobuf/proto"
-
 	"source.monogon.dev/metropolis/node/core/localstorage"
 	"source.monogon.dev/metropolis/node/core/network"
 	"source.monogon.dev/metropolis/node/core/roleserve"
@@ -50,6 +43,7 @@
 	storageRoot    *localstorage.Root
 	networkService *network.Service
 	roleServer     *roleserve.Service
+	nodeParams     *apb.NodeParameters
 
 	oneway chan struct{}
 }
@@ -57,11 +51,12 @@
 // NewManager creates a new cluster Manager. The given localstorage Root must
 // be places, but not yet started (and will be started as the Manager makes
 // progress). The given network Service must already be running.
-func NewManager(storageRoot *localstorage.Root, networkService *network.Service, rs *roleserve.Service) *Manager {
+func NewManager(storageRoot *localstorage.Root, networkService *network.Service, rs *roleserve.Service, nodeParams *apb.NodeParameters) *Manager {
 	return &Manager{
 		storageRoot:    storageRoot,
 		networkService: networkService,
 		roleServer:     rs,
+		nodeParams:     nodeParams,
 		oneway:         make(chan struct{}),
 	}
 }
@@ -96,12 +91,7 @@
 
 	supervisor.Logger(ctx).Info("No sealed configuration, looking for node parameters")
 
-	params, err := m.nodeParams(ctx)
-	if err != nil {
-		return fmt.Errorf("no parameters available: %w", err)
-	}
-
-	switch inner := params.Cluster.(type) {
+	switch inner := m.nodeParams.Cluster.(type) {
 	case *apb.NodeParameters_ClusterBootstrap_:
 		err = m.bootstrap(ctx, inner.ClusterBootstrap)
 	case *apb.NodeParameters_ClusterRegister_:
@@ -117,124 +107,6 @@
 	return err
 }
 
-func (m *Manager) nodeParamsFWCFG(ctx context.Context) (*apb.NodeParameters, error) {
-	bytes, err := os.ReadFile("/sys/firmware/qemu_fw_cfg/by_name/dev.monogon.metropolis/parameters.pb/raw")
-	if err != nil {
-		return nil, fmt.Errorf("could not read firmware enrolment file: %w", err)
-	}
-
-	config := apb.NodeParameters{}
-	err = proto.Unmarshal(bytes, &config)
-	if err != nil {
-		return nil, fmt.Errorf("could not unmarshal: %v", err)
-	}
-
-	return &config, nil
-}
-
-// nodeParamsGCPMetadata attempts to retrieve the node parameters from the
-// GCP metadata service. Returns nil if the metadata service is available,
-// but no node parameters are specified.
-func (m *Manager) nodeParamsGCPMetadata(ctx context.Context) (*apb.NodeParameters, error) {
-	const metadataURL = "http://169.254.169.254/computeMetadata/v1/instance/attributes/metropolis-node-params"
-	req, err := http.NewRequestWithContext(ctx, "GET", metadataURL, nil)
-	if err != nil {
-		return nil, fmt.Errorf("could not create request: %w", err)
-	}
-	req.Header.Set("Metadata-Flavor", "Google")
-	resp, err := http.DefaultClient.Do(req)
-	if err != nil {
-		return nil, fmt.Errorf("HTTP request failed: %w", err)
-	}
-	defer resp.Body.Close()
-	if resp.StatusCode != http.StatusOK {
-		if resp.StatusCode == http.StatusNotFound {
-			return nil, nil
-		}
-		return nil, fmt.Errorf("non-200 status code: %d", resp.StatusCode)
-	}
-	decoded, err := io.ReadAll(base64.NewDecoder(base64.StdEncoding, resp.Body))
-	if err != nil {
-		return nil, fmt.Errorf("cannot decode base64: %w", err)
-	}
-	config := apb.NodeParameters{}
-	err = proto.Unmarshal(decoded, &config)
-	if err != nil {
-		return nil, fmt.Errorf("failed unmarshalling NodeParameters: %w", err)
-	}
-	return &config, nil
-}
-
-func (m *Manager) nodeParams(ctx context.Context) (*apb.NodeParameters, error) {
-	boardName, err := getDMIBoardName()
-	if err != nil {
-		if errors.Is(err, os.ErrNotExist) {
-			supervisor.Logger(ctx).Infof("Board name: UNKNOWN")
-		} else {
-			supervisor.Logger(ctx).Warningf("Could not get board name, cannot detect platform: %v", err)
-		}
-	} else {
-		supervisor.Logger(ctx).Infof("Board name: %q", boardName)
-	}
-
-	// When running on GCP, attempt to retrieve the node parameters from the
-	// metadata server first. Retry until we get a response, since we need to
-	// wait for the network service to assign an IP address first.
-	if isGCPInstance(boardName) {
-		var params *apb.NodeParameters
-		op := func() error {
-			supervisor.Logger(ctx).Info("Running on GCP, attempting to retrieve node parameters from metadata server")
-			params, err = m.nodeParamsGCPMetadata(ctx)
-			return err
-		}
-		err := backoff.Retry(op, backoff.WithContext(backoff.NewExponentialBackOff(), ctx))
-		if err != nil {
-			supervisor.Logger(ctx).Errorf("Failed to retrieve node parameters: %v", err)
-		}
-		if params != nil {
-			supervisor.Logger(ctx).Info("Retrieved parameters from GCP metadata server")
-			return params, nil
-		}
-		supervisor.Logger(ctx).Infof("\"metropolis-node-params\" metadata not found")
-	}
-
-	// Retrieve node parameters from qemu's fwcfg interface or ESP.
-	// TODO(q3k): probably abstract this away and implement per platform/build/...
-	paramsFWCFG, err := m.nodeParamsFWCFG(ctx)
-	if err != nil {
-		if errors.Is(err, os.ErrNotExist) {
-			supervisor.Logger(ctx).Infof("No qemu fwcfg params.")
-		} else {
-			supervisor.Logger(ctx).Warningf("Could not retrieve node parameters from qemu fwcfg: %v", err)
-		}
-		paramsFWCFG = nil
-	} else {
-		supervisor.Logger(ctx).Infof("Retrieved node parameters from qemu fwcfg")
-	}
-	paramsESP, err := m.storageRoot.ESP.Metropolis.NodeParameters.Unmarshal()
-	if err != nil {
-		if errors.Is(err, os.ErrNotExist) {
-			supervisor.Logger(ctx).Infof("No ESP node parameters.")
-		} else {
-			supervisor.Logger(ctx).Warningf("Could not retrieve node parameters from ESP: %v", err)
-		}
-		paramsESP = nil
-	} else {
-		supervisor.Logger(ctx).Infof("Retrieved node parameters from ESP")
-	}
-	if paramsFWCFG == nil && paramsESP == nil {
-		return nil, fmt.Errorf("could not find node parameters in ESP or qemu fwcfg")
-	}
-	if paramsFWCFG != nil && paramsESP != nil {
-		supervisor.Logger(ctx).Warningf("Node parameters found both in both ESP and qemu fwcfg, using the latter")
-		return paramsFWCFG, nil
-	} else if paramsFWCFG != nil {
-		return paramsFWCFG, nil
-	} else {
-		return paramsESP, nil
-	}
-}
-
 // logClusterDirectory verbosely logs the whole Cluster Directory passed to it.
 func logClusterDirectory(ctx context.Context, cd *cpb.ClusterDirectory) {
 	for _, node := range cd.Nodes {