m/node/core: fix nodeparams dependent on network

The GCP nodeparams gathering strategy depends on network availability.
With the introduction of static network configuration that got added to
NodeParameters which meant that they needed to be there before the
network could be initialized. This dependency loop stalls bootup on GCP
forever.

Fix it by splitting up NodeParameter gathering into a local and a
non-local phase. In setups where metadata is gathered via network
automated network configuration is generally always available to break
this dependency loop. Thus we can start networking after the local phase
has finished and run the non-local (i.e. networked) phase later.

Change-Id: I661b9b474f67f2289f427327efa4c3eaa19393e7
Fixes: https://github.com/monogon-dev/monogon/issues/353
Reviewed-on: https://review.monogon.dev/c/monogon/+/3439
Reviewed-by: Leopold Schabel <leo@monogon.tech>
Tested-by: Jenkins CI
diff --git a/metropolis/node/core/main.go b/metropolis/node/core/main.go
index f737bd6..2a667d6 100644
--- a/metropolis/node/core/main.go
+++ b/metropolis/node/core/main.go
@@ -151,13 +151,13 @@
 		if err := root.Start(ctx, updateSvc); err != nil {
 			return fmt.Errorf("cannot start root FS: %w", err)
 		}
-		nodeParams, err := getNodeParams(ctx, root)
+		localNodeParams, err := getLocalNodeParams(ctx, root)
 		if err != nil {
-			return fmt.Errorf("cannot get node parameters: %w", err)
+			return fmt.Errorf("cannot get local node parameters: %w", err)
 		}
-		if nodeParams.NetworkConfig != nil {
-			networkSvc.StaticConfig = nodeParams.NetworkConfig
-			if err := root.ESP.Metropolis.NetworkConfiguration.Marshal(nodeParams.NetworkConfig); err != nil {
+		if localNodeParams.NetworkConfig != nil {
+			networkSvc.StaticConfig = localNodeParams.NetworkConfig
+			if err := root.ESP.Metropolis.NetworkConfiguration.Marshal(localNodeParams.NetworkConfig); err != nil {
 				logger.Errorf("Error writing back network_config from NodeParameters: %v", err)
 			}
 		}
@@ -234,6 +234,11 @@
 			logger.Errorf("Failed to configure printk logging: %v", err)
 		}
 
+		nodeParams, err := getNodeParams(ctx, root)
+		if err != nil {
+			return fmt.Errorf("cannot get node parameters: %w", err)
+		}
+
 		// Start cluster manager. This kicks off cluster membership machinery,
 		// which will either start a new cluster, enroll into one or join one.
 		m := cluster.NewManager(root, networkSvc, rs, updateSvc, nodeParams, haveTPM)
diff --git a/metropolis/node/core/nodeparams.go b/metropolis/node/core/nodeparams.go
index bc3c5f0..868d070 100644
--- a/metropolis/node/core/nodeparams.go
+++ b/metropolis/node/core/nodeparams.go
@@ -79,39 +79,7 @@
 	return boardName == "Google Compute Engine"
 }
 
-func getNodeParams(ctx context.Context, storage *localstorage.Root) (*apb.NodeParameters, error) {
-	boardName, err := getDMIBoardName()
-	if err != nil {
-		if errors.Is(err, os.ErrNotExist) {
-			supervisor.Logger(ctx).Infof("Board name: UNKNOWN")
-		} else {
-			supervisor.Logger(ctx).Warningf("Could not get board name, cannot detect platform: %v", err)
-		}
-	} else {
-		supervisor.Logger(ctx).Infof("Board name: %q", boardName)
-	}
-
-	// When running on GCP, attempt to retrieve the node parameters from the
-	// metadata server first. Retry until we get a response, since we need to
-	// wait for the network service to assign an IP address first.
-	if isGCPInstance(boardName) {
-		var params *apb.NodeParameters
-		op := func() error {
-			supervisor.Logger(ctx).Info("Running on GCP, attempting to retrieve node parameters from metadata server")
-			params, err = nodeParamsGCPMetadata(ctx)
-			return err
-		}
-		err := backoff.Retry(op, backoff.WithContext(backoff.NewExponentialBackOff(), ctx))
-		if err != nil {
-			supervisor.Logger(ctx).Errorf("Failed to retrieve node parameters: %v", err)
-		}
-		if params != nil {
-			supervisor.Logger(ctx).Info("Retrieved parameters from GCP metadata server")
-			return params, nil
-		}
-		supervisor.Logger(ctx).Infof("\"metropolis-node-params\" metadata not found")
-	}
-
+func getLocalNodeParams(ctx context.Context, storage *localstorage.Root) (*apb.NodeParameters, error) {
 	// Retrieve node parameters from qemu's fwcfg interface or ESP.
 	// TODO(q3k): probably abstract this away and implement per platform/build/...
 	paramsFWCFG, err := nodeParamsFWCFG(ctx)
@@ -148,3 +116,39 @@
 		return paramsESP, nil
 	}
 }
+
+func getNodeParams(ctx context.Context, storage *localstorage.Root) (*apb.NodeParameters, error) {
+	boardName, err := getDMIBoardName()
+	if err != nil {
+		if errors.Is(err, os.ErrNotExist) {
+			supervisor.Logger(ctx).Infof("Board name: UNKNOWN")
+		} else {
+			supervisor.Logger(ctx).Warningf("Could not get board name, cannot detect platform: %v", err)
+		}
+	} else {
+		supervisor.Logger(ctx).Infof("Board name: %q", boardName)
+	}
+
+	// When running on GCP, attempt to retrieve the node parameters from the
+	// metadata server first. Retry until we get a response, since we need to
+	// wait for the network service to assign an IP address first.
+	if isGCPInstance(boardName) {
+		var params *apb.NodeParameters
+		op := func() error {
+			supervisor.Logger(ctx).Info("Running on GCP, attempting to retrieve node parameters from metadata server")
+			params, err = nodeParamsGCPMetadata(ctx)
+			return err
+		}
+		err := backoff.Retry(op, backoff.WithContext(backoff.NewExponentialBackOff(), ctx))
+		if err != nil {
+			supervisor.Logger(ctx).Errorf("Failed to retrieve node parameters: %v", err)
+		}
+		if params != nil {
+			supervisor.Logger(ctx).Info("Retrieved parameters from GCP metadata server")
+			return params, nil
+		}
+		supervisor.Logger(ctx).Infof("\"metropolis-node-params\" metadata not found")
+	}
+
+	return getLocalNodeParams(ctx, storage)
+}