m/node/core: fix nodeparams dependent on network
The GCP nodeparams gathering strategy depends on network availability.
With the introduction of static network configuration that got added to
NodeParameters which meant that they needed to be there before the
network could be initialized. This dependency loop stalls bootup on GCP
forever.
Fix it by splitting up NodeParameter gathering into a local and a
non-local phase. In setups where metadata is gathered via network
automated network configuration is generally always available to break
this dependency loop. Thus we can start networking after the local phase
has finished and run the non-local (i.e. networked) phase later.
Change-Id: I661b9b474f67f2289f427327efa4c3eaa19393e7
Fixes: https://github.com/monogon-dev/monogon/issues/353
Reviewed-on: https://review.monogon.dev/c/monogon/+/3439
Reviewed-by: Leopold Schabel <leo@monogon.tech>
Tested-by: Jenkins CI
diff --git a/metropolis/node/core/nodeparams.go b/metropolis/node/core/nodeparams.go
index bc3c5f0..868d070 100644
--- a/metropolis/node/core/nodeparams.go
+++ b/metropolis/node/core/nodeparams.go
@@ -79,39 +79,7 @@
return boardName == "Google Compute Engine"
}
-func getNodeParams(ctx context.Context, storage *localstorage.Root) (*apb.NodeParameters, error) {
- boardName, err := getDMIBoardName()
- if err != nil {
- if errors.Is(err, os.ErrNotExist) {
- supervisor.Logger(ctx).Infof("Board name: UNKNOWN")
- } else {
- supervisor.Logger(ctx).Warningf("Could not get board name, cannot detect platform: %v", err)
- }
- } else {
- supervisor.Logger(ctx).Infof("Board name: %q", boardName)
- }
-
- // When running on GCP, attempt to retrieve the node parameters from the
- // metadata server first. Retry until we get a response, since we need to
- // wait for the network service to assign an IP address first.
- if isGCPInstance(boardName) {
- var params *apb.NodeParameters
- op := func() error {
- supervisor.Logger(ctx).Info("Running on GCP, attempting to retrieve node parameters from metadata server")
- params, err = nodeParamsGCPMetadata(ctx)
- return err
- }
- err := backoff.Retry(op, backoff.WithContext(backoff.NewExponentialBackOff(), ctx))
- if err != nil {
- supervisor.Logger(ctx).Errorf("Failed to retrieve node parameters: %v", err)
- }
- if params != nil {
- supervisor.Logger(ctx).Info("Retrieved parameters from GCP metadata server")
- return params, nil
- }
- supervisor.Logger(ctx).Infof("\"metropolis-node-params\" metadata not found")
- }
-
+func getLocalNodeParams(ctx context.Context, storage *localstorage.Root) (*apb.NodeParameters, error) {
// Retrieve node parameters from qemu's fwcfg interface or ESP.
// TODO(q3k): probably abstract this away and implement per platform/build/...
paramsFWCFG, err := nodeParamsFWCFG(ctx)
@@ -148,3 +116,39 @@
return paramsESP, nil
}
}
+
+func getNodeParams(ctx context.Context, storage *localstorage.Root) (*apb.NodeParameters, error) {
+ boardName, err := getDMIBoardName()
+ if err != nil {
+ if errors.Is(err, os.ErrNotExist) {
+ supervisor.Logger(ctx).Infof("Board name: UNKNOWN")
+ } else {
+ supervisor.Logger(ctx).Warningf("Could not get board name, cannot detect platform: %v", err)
+ }
+ } else {
+ supervisor.Logger(ctx).Infof("Board name: %q", boardName)
+ }
+
+ // When running on GCP, attempt to retrieve the node parameters from the
+ // metadata server first. Retry until we get a response, since we need to
+ // wait for the network service to assign an IP address first.
+ if isGCPInstance(boardName) {
+ var params *apb.NodeParameters
+ op := func() error {
+ supervisor.Logger(ctx).Info("Running on GCP, attempting to retrieve node parameters from metadata server")
+ params, err = nodeParamsGCPMetadata(ctx)
+ return err
+ }
+ err := backoff.Retry(op, backoff.WithContext(backoff.NewExponentialBackOff(), ctx))
+ if err != nil {
+ supervisor.Logger(ctx).Errorf("Failed to retrieve node parameters: %v", err)
+ }
+ if params != nil {
+ supervisor.Logger(ctx).Info("Retrieved parameters from GCP metadata server")
+ return params, nil
+ }
+ supervisor.Logger(ctx).Infof("\"metropolis-node-params\" metadata not found")
+ }
+
+ return getLocalNodeParams(ctx, storage)
+}