m/n/c/cluster: retrieve node parameters from GCP metadata

This allows configuration via GCP instance metadata.

Change-Id: I56609019cef998aa779c5a602232767b920a9721
Reviewed-on: https://review.monogon.dev/c/monogon/+/462
Reviewed-by: Sergiusz Bazanski <serge@monogon.tech>
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
diff --git a/metropolis/node/core/cluster/BUILD.bazel b/metropolis/node/core/cluster/BUILD.bazel
index 7ba7a9a..1d55e8d 100644
--- a/metropolis/node/core/cluster/BUILD.bazel
+++ b/metropolis/node/core/cluster/BUILD.bazel
@@ -5,6 +5,7 @@
     srcs = [
         "cluster.go",
         "cluster_bootstrap.go",
+        "platform.go",
         "status.go",
         "watcher.go",
     ],
@@ -24,6 +25,7 @@
         "//metropolis/proto/api:go_default_library",
         "//metropolis/proto/common:go_default_library",
         "//metropolis/proto/private:go_default_library",
+        "@com_github_cenkalti_backoff_v4//:go_default_library",
         "@org_golang_google_protobuf//proto:go_default_library",
     ],
 )
diff --git a/metropolis/node/core/cluster/cluster.go b/metropolis/node/core/cluster/cluster.go
index 60d7e15..3ff1ad4 100644
--- a/metropolis/node/core/cluster/cluster.go
+++ b/metropolis/node/core/cluster/cluster.go
@@ -27,11 +27,15 @@
 
 import (
 	"context"
+	"encoding/base64"
 	"errors"
 	"fmt"
+	"io"
+	"net/http"
 	"os"
 	"sync"
 
+	"github.com/cenkalti/backoff/v4"
 	"google.golang.org/protobuf/proto"
 
 	"source.monogon.dev/metropolis/node/core/consensus"
@@ -143,7 +147,67 @@
 	return &config, nil
 }
 
+// nodeParamsGCPMetadata attempts to retrieve the node parameters from the
+// GCP metadata service. Returns nil if the metadata service is available,
+// but no node parameters are specified.
+func (m *Manager) nodeParamsGCPMetadata(ctx context.Context) (*apb.NodeParameters, error) {
+	const metadataURL = "http://169.254.169.254/computeMetadata/v1/instance/attributes/metropolis-node-params"
+	req, err := http.NewRequestWithContext(ctx, "GET", metadataURL, nil)
+	if err != nil {
+		return nil, fmt.Errorf("could not create request: %w", err)
+	}
+	req.Header.Set("Metadata-Flavor", "Google")
+	resp, err := http.DefaultClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("HTTP request failed: %w", err)
+	}
+	defer resp.Body.Close()
+	if resp.StatusCode != http.StatusOK {
+		if resp.StatusCode == http.StatusNotFound {
+			return nil, nil
+		}
+		return nil, fmt.Errorf("non-200 status code: %d", resp.StatusCode)
+	}
+	decoded, err := io.ReadAll(base64.NewDecoder(base64.StdEncoding, resp.Body))
+	if err != nil {
+		return nil, fmt.Errorf("cannot decode base64: %w", err)
+	}
+	config := apb.NodeParameters{}
+	err = proto.Unmarshal(decoded, &config)
+	if err != nil {
+		return nil, fmt.Errorf("failed unmarshalling NodeParameters: %w", err)
+	}
+	return &config, nil
+}
+
 func (m *Manager) nodeParams(ctx context.Context) (*apb.NodeParameters, error) {
+	boardName, err := getDMIBoardName()
+	if err != nil {
+		supervisor.Logger(ctx).Warningf("Could not get board name, cannot detect platform: %v", err)
+	}
+	supervisor.Logger(ctx).Infof("Board name: %q", boardName)
+
+	// When running on GCP, attempt to retrieve the node parameters from the
+	// metadata server first. Retry until we get a response, since we need to
+	// wait for the network service to assign an IP address first.
+	if isGCPInstance(boardName) {
+		var params *apb.NodeParameters
+		op := func() error {
+			supervisor.Logger(ctx).Info("Running on GCP, attempting to retrieve node parameters from metadata server")
+			params, err = m.nodeParamsGCPMetadata(ctx)
+			return err
+		}
+		err := backoff.Retry(op, backoff.WithContext(backoff.NewExponentialBackOff(), ctx))
+		if err != nil {
+			supervisor.Logger(ctx).Errorf("Failed to retrieve node parameters: %v", err)
+		}
+		if params != nil {
+			supervisor.Logger(ctx).Info("Retrieved parameters from GCP metadata server")
+			return params, nil
+		}
+		supervisor.Logger(ctx).Infof("\"metropolis-node-params\" metadata not found")
+	}
+
 	// Retrieve node parameters from qemu's fwcfg interface or ESP.
 	// TODO(q3k): probably abstract this away and implement per platform/build/...
 	paramsFWCFG, err := m.nodeParamsFWCFG(ctx)
diff --git a/metropolis/node/core/cluster/platform.go b/metropolis/node/core/cluster/platform.go
new file mode 100644
index 0000000..b6b501e
--- /dev/null
+++ b/metropolis/node/core/cluster/platform.go
@@ -0,0 +1,19 @@
+package cluster
+
+import (
+	"fmt"
+	"os"
+	"strings"
+)
+
+func getDMIBoardName() (string, error) {
+	b, err := os.ReadFile("/sys/devices/virtual/dmi/id/board_name")
+	if err != nil {
+		return "", fmt.Errorf("could not read board name: %w", err)
+	}
+	return strings.TrimRight(string(b), "\n"), nil
+}
+
+func isGCPInstance(boardName string) bool {
+	return boardName == "Google Compute Engine"
+}