c/agent: implement

Implement the currently-required agent functionality, i.e. running with
both autoconfigured as well as static network configuration, interacting
with the BMaaS API and installing Monogon OS.

The early-stage setup is similar to Monogon OS itself, but after setting
up the root supervisor this instead calls into the agent runnable which
then performs the rest of the work.
In the process I made both logtree as well as supervisor public as they
are very generic and I see no reason to keep them scoped so tightly.
Maybe we should move them to go/ at some point.

This currently calls into osimage without the optimization the
regular installer performs, this is intentional as I have code which
will replace osimage with a high-performance version, obviating the
need to manually make this fast here.

This also comes with an end-to-end test
which exercises the whole flow, installing TestOS and checking if it
launches.

Change-Id: Iab3f89598a30072ea565ec2db3b198c8df7999ef
Reviewed-on: https://review.monogon.dev/c/monogon/+/1405
Reviewed-by: Serge Bazanski <serge@monogon.tech>
Tested-by: Jenkins CI
diff --git a/cloud/agent/BUILD.bazel b/cloud/agent/BUILD.bazel
index 775dd12..d890ed2 100644
--- a/cloud/agent/BUILD.bazel
+++ b/cloud/agent/BUILD.bazel
@@ -5,19 +5,31 @@
 go_library(
     name = "agent_lib",
     srcs = [
+        "agent.go",
         "hwreport.go",
+        "install.go",
         "main.go",
     ],
     importpath = "source.monogon.dev/cloud/agent",
     visibility = ["//visibility:private"],
     deps = [
         "//cloud/agent/api",
+        "//cloud/bmaas/server/api",
+        "//metropolis/node/build/mkimage/osimage",
+        "//metropolis/node/core/network",
+        "//metropolis/pkg/efivarfs",
+        "//metropolis/pkg/logtree",
         "//metropolis/pkg/nvme",
+        "//metropolis/pkg/pki",
         "//metropolis/pkg/scsi",
         "//metropolis/pkg/smbios",
+        "//metropolis/pkg/supervisor",
+        "@com_github_cenkalti_backoff_v4//:backoff",
         "@com_github_mdlayher_ethtool//:ethtool",
         "@com_github_vishvananda_netlink//:netlink",
-        "@org_golang_google_protobuf//encoding/prototext",
+        "@org_golang_google_grpc//:go_default_library",
+        "@org_golang_google_grpc//credentials",
+        "@org_golang_google_protobuf//proto",
         "@org_golang_x_sys//unix",
     ],
 )
@@ -50,6 +62,8 @@
     name = "initramfs",
     files = {
         ":agent": "/init",
+        "@com_github_coredns_coredns//:coredns": "/kubernetes/bin/coredns",
+        "//metropolis/node/core/network/dns:resolv.conf": "/etc/resolv.conf",
         "@cacerts//file": "/etc/ssl/cert.pem",
     },
     fsspecs = [
diff --git a/cloud/agent/agent.go b/cloud/agent/agent.go
new file mode 100644
index 0000000..131a38e
--- /dev/null
+++ b/cloud/agent/agent.go
@@ -0,0 +1,172 @@
+package main
+
+import (
+	"context"
+	"crypto/ed25519"
+	"crypto/rand"
+	"crypto/tls"
+	"crypto/x509"
+	"errors"
+	"fmt"
+	"math/big"
+	"os"
+	"time"
+
+	"github.com/cenkalti/backoff/v4"
+	"golang.org/x/sys/unix"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/credentials"
+	"google.golang.org/protobuf/proto"
+
+	apb "source.monogon.dev/cloud/agent/api"
+	bpb "source.monogon.dev/cloud/bmaas/server/api"
+	"source.monogon.dev/metropolis/node/core/network"
+	"source.monogon.dev/metropolis/pkg/pki"
+	"source.monogon.dev/metropolis/pkg/supervisor"
+)
+
+// This is similar to rpc.NewEphemeralCredentials, but that only deals with
+// Metropolis-style certificate verification.
+func newEphemeralCert(private ed25519.PrivateKey) (*tls.Certificate, error) {
+	template := x509.Certificate{
+		SerialNumber: big.NewInt(1),
+		NotBefore:    time.Now(),
+		NotAfter:     pki.UnknownNotAfter,
+
+		KeyUsage:              x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature,
+		ExtKeyUsage:           []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth},
+		BasicConstraintsValid: true,
+	}
+	certificateBytes, err := x509.CreateCertificate(rand.Reader, &template, &template, private.Public(), private)
+	if err != nil {
+		return nil, fmt.Errorf("when generating self-signed certificate: %w", err)
+	}
+	return &tls.Certificate{
+		Certificate: [][]byte{certificateBytes},
+		PrivateKey:  private,
+	}, nil
+}
+
+// Main runnable for the agent.
+func agentRunnable(ctx context.Context) error {
+	l := supervisor.Logger(ctx)
+	// Mount this late so we don't just crash when not booted with EFI.
+	isEFIBoot := false
+	if err := mkdirAndMount("/sys/firmware/efi/efivars", "efivarfs", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV); err == nil {
+		isEFIBoot = true
+	}
+	agentInitRaw, err := os.ReadFile("/init.pb")
+	if err != nil {
+		return fmt.Errorf("Unable to read spec file from takeover: %w", err)
+	}
+
+	var agentInit apb.AgentInit
+	if err := proto.Unmarshal(agentInitRaw, &agentInit); err != nil {
+		return fmt.Errorf("unable to parse spec file from takeover: %w", err)
+	}
+	l.Info("Monogon BMaaS Agent started")
+	if agentInit.TakeoverInit == nil {
+		return errors.New("AgentInit takeover_init field is unset, this is not allowed")
+	}
+
+	networkSvc := network.New(agentInit.NetworkConfig)
+	networkSvc.DHCPVendorClassID = "dev.monogon.cloud.agent.v1"
+	supervisor.Run(ctx, "networking", networkSvc.Run)
+	l.Info("Started networking")
+
+	ephemeralCert, err := newEphemeralCert(ed25519.PrivateKey(agentInit.PrivateKey))
+	if err != nil {
+		return fmt.Errorf("could not generate ephemeral credentials: %w", err)
+	}
+	var rootCAs *x509.CertPool
+	if len(agentInit.TakeoverInit.CaCertificate) != 0 {
+		caCert, err := x509.ParseCertificate(agentInit.TakeoverInit.CaCertificate)
+		if err != nil {
+			return fmt.Errorf("unable to parse supplied ca_certificate, is it in DER format?")
+		}
+		rootCAs = x509.NewCertPool()
+		rootCAs.AddCert(caCert)
+	}
+
+	conn, err := grpc.Dial(agentInit.TakeoverInit.BmaasEndpoint, grpc.WithTransportCredentials(credentials.NewTLS(&tls.Config{
+		Certificates: []tls.Certificate{*ephemeralCert},
+		RootCAs:      rootCAs,
+	})))
+	if err != nil {
+		return fmt.Errorf("error dialing BMaaS gRPC endpoint: %w", err)
+	}
+	c := bpb.NewAgentCallbackClient(conn)
+
+	supervisor.Signal(ctx, supervisor.SignalHealthy)
+
+	report, warnings := gatherHWReport()
+	var warningStrings []string
+	for _, w := range warnings {
+		l.Warningf("Hardware Report Warning: %v", w)
+		warningStrings = append(warningStrings, w.Error())
+	}
+
+	var hwReportSent bool
+	var installationReport *bpb.OSInstallationReport
+	var installationGeneration int64
+	b := backoff.NewExponentialBackOff()
+	// Main heartbeat loop
+	for {
+		req := bpb.AgentHeartbeatRequest{
+			MachineId: agentInit.TakeoverInit.MachineId,
+		}
+		if !hwReportSent {
+			req.HardwareReport = &bpb.AgentHardwareReport{
+				Report:  report,
+				Warning: warningStrings,
+			}
+		}
+		if installationReport != nil {
+			req.InstallationReport = installationReport
+		}
+		res, err := c.Heartbeat(context.Background(), &req)
+		if err != nil {
+			l.Infof("Heartbeat failed: %v", err)
+			time.Sleep(b.NextBackOff())
+			continue
+		}
+		b.Reset()
+		hwReportSent = true
+		if installationReport != nil {
+			l.Infof("Installation report sent successfully, rebooting")
+			// Close connection and wait 1s to make sure that the RST
+			// can be sent. Important for QEMU/slirp where not doing this
+			// triggers bugs in the connection state management, but also
+			// nice for reducing the number of stale connections in the API
+			// server.
+			conn.Close()
+			time.Sleep(1 * time.Second)
+			unix.Sync()
+			unix.Reboot(unix.LINUX_REBOOT_CMD_RESTART)
+		}
+		if res.InstallationRequest != nil {
+			if res.InstallationRequest.Generation == installationGeneration {
+				// This installation request has already been attempted
+				continue
+			}
+			installationReport = &bpb.OSInstallationReport{
+				Generation: res.InstallationRequest.Generation,
+			}
+			if err := install(res.InstallationRequest, l, isEFIBoot); err != nil {
+				l.Errorf("Installation failed: %v", err)
+				installationReport.Result = &bpb.OSInstallationReport_Error_{
+					Error: &bpb.OSInstallationReport_Error{
+						Error: err.Error(),
+					},
+				}
+			} else {
+				l.Info("Installation succeeded")
+				installationReport.Result = &bpb.OSInstallationReport_Success_{
+					Success: &bpb.OSInstallationReport_Success{},
+				}
+			}
+		} else {
+			time.Sleep(30 * time.Second)
+		}
+	}
+}
diff --git a/cloud/agent/api/takeover.proto b/cloud/agent/api/takeover.proto
index 788c5a3..a983a73 100644
--- a/cloud/agent/api/takeover.proto
+++ b/cloud/agent/api/takeover.proto
@@ -11,6 +11,9 @@
   // bmaas_endpoint is an address of the BMaaS service the agent should call
   // back to.
   string bmaas_endpoint = 2;
+  // Optional CA certificate to be used instead of a public CA root store.
+  // Formatted as raw ASN.1 DER.
+  bytes ca_certificate = 3;
 }
 
 message TakeoverSuccess {
diff --git a/cloud/agent/e2e/BUILD.bazel b/cloud/agent/e2e/BUILD.bazel
new file mode 100644
index 0000000..2ed2ad5
--- /dev/null
+++ b/cloud/agent/e2e/BUILD.bazel
@@ -0,0 +1,25 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_test")
+
+go_test(
+    name = "e2e_test",
+    srcs = ["main_test.go"],
+    data = [
+        "//cloud/agent:initramfs",
+        "//metropolis/installer/test/testos:testos_bundle",
+        "//third_party/edk2:firmware",
+        "//third_party/linux",
+    ],
+    deps = [
+        "//cloud/agent/api",
+        "//cloud/bmaas/server/api",
+        "//metropolis/cli/pkg/datafile",
+        "//metropolis/pkg/pki",
+        "//metropolis/proto/api",
+        "@com_github_cavaliergopher_cpio//:cpio",
+        "@com_github_pierrec_lz4_v4//:lz4",
+        "@org_golang_google_grpc//:go_default_library",
+        "@org_golang_google_grpc//credentials",
+        "@org_golang_google_protobuf//proto",
+        "@org_golang_x_sys//unix",
+    ],
+)
diff --git a/cloud/agent/e2e/main_test.go b/cloud/agent/e2e/main_test.go
new file mode 100644
index 0000000..100553e
--- /dev/null
+++ b/cloud/agent/e2e/main_test.go
@@ -0,0 +1,287 @@
+package e2e
+
+import (
+	"bufio"
+	"context"
+	"crypto/ed25519"
+	"crypto/rand"
+	"crypto/tls"
+	"crypto/x509"
+	"crypto/x509/pkix"
+	"fmt"
+	"io"
+	"math/big"
+	"net"
+	"net/http"
+	"net/url"
+	"os"
+	"os/exec"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/cavaliergopher/cpio"
+	"github.com/pierrec/lz4/v4"
+	"golang.org/x/sys/unix"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/credentials"
+	"google.golang.org/protobuf/proto"
+
+	apb "source.monogon.dev/cloud/agent/api"
+	bpb "source.monogon.dev/cloud/bmaas/server/api"
+	"source.monogon.dev/metropolis/cli/pkg/datafile"
+	"source.monogon.dev/metropolis/pkg/pki"
+	mpb "source.monogon.dev/metropolis/proto/api"
+)
+
+type fakeServer struct {
+	hardwareReport      *bpb.AgentHardwareReport
+	installationRequest *bpb.OSInstallationRequest
+	installationReport  *bpb.OSInstallationReport
+}
+
+func (f *fakeServer) Heartbeat(ctx context.Context, req *bpb.AgentHeartbeatRequest) (*bpb.AgentHeartbeatResponse, error) {
+	var res bpb.AgentHeartbeatResponse
+	if req.HardwareReport != nil {
+		f.hardwareReport = req.HardwareReport
+	}
+	if req.InstallationReport != nil {
+		f.installationReport = req.InstallationReport
+	}
+	if f.installationRequest != nil {
+		res.InstallationRequest = f.installationRequest
+	}
+	return &res, nil
+}
+
+const GiB = 1024 * 1024 * 1024
+
+// TestMetropolisInstallE2E exercises the agent communicating against a test cloud
+// API server. This server requests the installation of the Metropolis 'TestOS',
+// which we then validate by looking for a string it outputs on boot.
+func TestMetropolisInstallE2E(t *testing.T) {
+	var f fakeServer
+
+	// Address inside fake QEMU userspace networking
+	grpcAddr := net.TCPAddr{
+		IP:   net.IPv4(10, 42, 0, 5),
+		Port: 3000,
+	}
+
+	blobAddr := net.TCPAddr{
+		IP:   net.IPv4(10, 42, 0, 6),
+		Port: 80,
+	}
+
+	f.installationRequest = &bpb.OSInstallationRequest{
+		Generation: 5,
+		Type: &bpb.OSInstallationRequest_Metropolis{Metropolis: &bpb.MetropolisInstallationRequest{
+			BundleUrl:      (&url.URL{Scheme: "http", Host: blobAddr.String(), Path: "/bundle.bin"}).String(),
+			NodeParameters: &mpb.NodeParameters{},
+			RootDevice:     "vda",
+		}},
+	}
+
+	caPubKey, caPrivKey, err := ed25519.GenerateKey(rand.Reader)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	caCertTmpl := x509.Certificate{
+		SerialNumber: big.NewInt(1),
+		Subject: pkix.Name{
+			CommonName: "Agent E2E Test CA",
+		},
+		NotBefore:             time.Now(),
+		NotAfter:              pki.UnknownNotAfter,
+		IsCA:                  true,
+		KeyUsage:              x509.KeyUsageCertSign | x509.KeyUsageCRLSign | x509.KeyUsageDigitalSignature,
+		BasicConstraintsValid: true,
+	}
+	caCertRaw, err := x509.CreateCertificate(rand.Reader, &caCertTmpl, &caCertTmpl, caPubKey, caPrivKey)
+	if err != nil {
+		t.Fatal(err)
+	}
+	caCert, err := x509.ParseCertificate(caCertRaw)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	serverPubKey, serverPrivKey, err := ed25519.GenerateKey(rand.Reader)
+	if err != nil {
+		t.Fatal(err)
+	}
+	serverCertTmpl := x509.Certificate{
+		SerialNumber:          big.NewInt(1),
+		Subject:               pkix.Name{},
+		NotBefore:             time.Now(),
+		NotAfter:              pki.UnknownNotAfter,
+		KeyUsage:              x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment,
+		ExtKeyUsage:           []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth},
+		IPAddresses:           []net.IP{grpcAddr.IP},
+		BasicConstraintsValid: true,
+	}
+	serverCert, err := x509.CreateCertificate(rand.Reader, &serverCertTmpl, caCert, serverPubKey, caPrivKey)
+
+	s := grpc.NewServer(grpc.Creds(credentials.NewServerTLSFromCert(&tls.Certificate{
+		Certificate: [][]byte{serverCert},
+		PrivateKey:  serverPrivKey,
+	})))
+	bpb.RegisterAgentCallbackServer(s, &f)
+	grpcLis, err := net.Listen("tcp", "127.0.0.1:0")
+	if err != nil {
+		panic(err)
+	}
+	go s.Serve(grpcLis)
+	grpcListenAddr := grpcLis.Addr().(*net.TCPAddr)
+
+	m := http.NewServeMux()
+	bundleFilePath, err := datafile.ResolveRunfile("metropolis/installer/test/testos/testos_bundle.zip")
+	if err != nil {
+		t.Fatal(err)
+	}
+	m.HandleFunc("/bundle.bin", func(w http.ResponseWriter, req *http.Request) {
+		http.ServeFile(w, req, bundleFilePath)
+	})
+	blobLis, err := net.Listen("tcp", "127.0.0.1:0")
+	if err != nil {
+		t.Fatal(err)
+	}
+	blobListenAddr := blobLis.Addr().(*net.TCPAddr)
+	go http.Serve(blobLis, m)
+
+	_, privateKey, err := ed25519.GenerateKey(rand.Reader)
+
+	init := apb.AgentInit{
+		TakeoverInit: &apb.TakeoverInit{
+			MachineId:     "testbox1",
+			BmaasEndpoint: grpcAddr.String(),
+			CaCertificate: caCertRaw,
+		},
+		PrivateKey: privateKey,
+	}
+
+	rootDisk, err := os.CreateTemp("", "rootdisk")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer os.Remove(rootDisk.Name())
+	// Create a 5GiB sparse root disk
+	if err := unix.Ftruncate(int(rootDisk.Fd()), 5*GiB); err != nil {
+		t.Fatalf("ftruncate failed: %v", err)
+	}
+
+	ovmfVarsPath, err := datafile.ResolveRunfile("external/edk2/OVMF_VARS.fd")
+	if err != nil {
+		t.Fatal(err)
+	}
+	ovmfCodePath, err := datafile.ResolveRunfile("external/edk2/OVMF_CODE.fd")
+	if err != nil {
+		t.Fatal(err)
+	}
+	kernelPath, err := datafile.ResolveRunfile("third_party/linux/bzImage")
+	if err != nil {
+		t.Fatal(err)
+	}
+	initramfsOrigPath, err := datafile.ResolveRunfile("cloud/agent/initramfs.cpio.lz4")
+	if err != nil {
+		t.Fatal(err)
+	}
+	initramfsOrigFile, err := os.Open(initramfsOrigPath)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer initramfsOrigFile.Close()
+
+	initramfsFile, err := os.CreateTemp("", "agent-initramfs")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer os.Remove(initramfsFile.Name())
+	if _, err := initramfsFile.ReadFrom(initramfsOrigFile); err != nil {
+		t.Fatal(err)
+	}
+
+	// Append AgentInit spec to initramfs
+	agentInitRaw, err := proto.Marshal(&init)
+	if err != nil {
+		t.Fatal(err)
+	}
+	compressedOut := lz4.NewWriter(initramfsFile)
+	compressedOut.Apply(lz4.LegacyOption(true))
+	cpioW := cpio.NewWriter(compressedOut)
+	cpioW.WriteHeader(&cpio.Header{
+		Name: "/init.pb",
+		Size: int64(len(agentInitRaw)),
+		Mode: cpio.TypeReg | 0o644,
+	})
+	cpioW.Write(agentInitRaw)
+	cpioW.Close()
+	compressedOut.Close()
+
+	grpcGuestFwd := fmt.Sprintf("guestfwd=tcp:%s-tcp:127.0.0.1:%d", grpcAddr.String(), grpcListenAddr.Port)
+	blobGuestFwd := fmt.Sprintf("guestfwd=tcp:%s-tcp:127.0.0.1:%d", blobAddr.String(), blobListenAddr.Port)
+
+	ovmfVars, err := os.CreateTemp("", "agent-ovmf-vars")
+	if err != nil {
+		t.Fatal(err)
+	}
+	ovmfVarsTmpl, err := os.Open(ovmfVarsPath)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if _, err := io.Copy(ovmfVars, ovmfVarsTmpl); err != nil {
+		t.Fatal(err)
+	}
+
+	qemuArgs := []string{
+		"-machine", "q35", "-accel", "kvm", "-nographic", "-nodefaults", "-m", "1024",
+		"-cpu", "host", "-smp", "sockets=1,cpus=1,cores=2,threads=2,maxcpus=4",
+		"-drive", "if=pflash,format=raw,readonly=on,file=" + ovmfCodePath,
+		"-drive", "if=pflash,format=raw,file=" + ovmfVars.Name(),
+		"-drive", "if=virtio,format=raw,cache=unsafe,file=" + rootDisk.Name(),
+		"-netdev", fmt.Sprintf("user,id=net0,net=10.42.0.0/24,dhcpstart=10.42.0.10,%s,%s", grpcGuestFwd, blobGuestFwd),
+		"-device", "virtio-net-pci,netdev=net0,mac=22:d5:8e:76:1d:07",
+		"-device", "virtio-rng-pci",
+		"-serial", "stdio",
+		"-no-reboot",
+	}
+	stage1Args := append(qemuArgs,
+		"-kernel", kernelPath,
+		"-initrd", initramfsFile.Name(),
+		"-append", "console=ttyS0 quiet")
+	qemuCmdAgent := exec.Command("qemu-system-x86_64", stage1Args...)
+	qemuCmdAgent.Stdout = os.Stdout
+	qemuCmdAgent.Stderr = os.Stderr
+	qemuCmdAgent.Run()
+	qemuCmdLaunch := exec.Command("qemu-system-x86_64", qemuArgs...)
+	stdoutPipe, err := qemuCmdLaunch.StdoutPipe()
+	if err != nil {
+		t.Fatal(err)
+	}
+	testosStarted := make(chan struct{})
+	go func() {
+		s := bufio.NewScanner(stdoutPipe)
+		for s.Scan() {
+			if strings.HasPrefix(s.Text(), "[") {
+				continue
+			}
+			t.Log("vm: " + s.Text())
+			if strings.Contains(s.Text(), "_TESTOS_LAUNCH_SUCCESS_") {
+				testosStarted <- struct{}{}
+				break
+			}
+		}
+		qemuCmdLaunch.Wait()
+	}()
+	if err := qemuCmdLaunch.Start(); err != nil {
+		t.Fatal(err)
+	}
+	defer qemuCmdLaunch.Process.Kill()
+	select {
+	case <-testosStarted:
+		// Done, test passed
+	case <-time.After(10 * time.Second):
+		t.Fatal("Waiting for TestOS launch timed out")
+	}
+}
diff --git a/cloud/agent/hwreport.go b/cloud/agent/hwreport.go
index 8797000..3b82d27 100644
--- a/cloud/agent/hwreport.go
+++ b/cloud/agent/hwreport.go
@@ -184,9 +184,7 @@
 	return
 }
 
-var (
-	FRUUnavailable = [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
-)
+var FRUUnavailable = [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
 
 func (c *hwReportContext) gatherNVMe(bd *api.BlockDevice, bde os.DirEntry) error {
 	bd.Protocol = api.BlockDevice_NVME
@@ -207,7 +205,7 @@
 	if healthInfo, err := nvmeDev.GetHealthInfo(); err == nil {
 		bd.AvailableSpareRatio = &healthInfo.AvailableSpare
 		bd.CriticalWarning = healthInfo.HasCriticalWarning()
-		var mediaErrors = int64(healthInfo.MediaAndDataIntegrityErrors)
+		mediaErrors := int64(healthInfo.MediaAndDataIntegrityErrors)
 		bd.MediaErrors = &mediaErrors
 		bd.UsageRatio = &healthInfo.LifeUsed
 	}
@@ -399,7 +397,9 @@
 }
 
 func gatherHWReport() (*api.Node, []error) {
-	var hwReportCtx hwReportContext
+	hwReportCtx := hwReportContext{
+		node: &api.Node{},
+	}
 
 	hwReportCtx.gatherCPU()
 	hwReportCtx.gatherSMBIOS()
diff --git a/cloud/agent/install.go b/cloud/agent/install.go
new file mode 100644
index 0000000..17ec098
--- /dev/null
+++ b/cloud/agent/install.go
@@ -0,0 +1,124 @@
+package main
+
+import (
+	"archive/zip"
+	"bytes"
+	"errors"
+	"fmt"
+	"net/http"
+	"path/filepath"
+
+	"github.com/cenkalti/backoff/v4"
+	"google.golang.org/protobuf/proto"
+
+	bpb "source.monogon.dev/cloud/bmaas/server/api"
+	"source.monogon.dev/metropolis/node/build/mkimage/osimage"
+	"source.monogon.dev/metropolis/pkg/efivarfs"
+	"source.monogon.dev/metropolis/pkg/logtree"
+)
+
+// install dispatches OSInstallationRequests to the appropriate installer
+// method
+func install(req *bpb.OSInstallationRequest, l logtree.LeveledLogger, isEFIBoot bool) error {
+	switch reqT := req.Type.(type) {
+	case *bpb.OSInstallationRequest_Metropolis:
+		return installMetropolis(reqT.Metropolis, l, isEFIBoot)
+	default:
+		return errors.New("unknown installation request type")
+	}
+}
+
+func installMetropolis(req *bpb.MetropolisInstallationRequest, l logtree.LeveledLogger, isEFIBoot bool) error {
+	if !isEFIBoot {
+		return errors.New("Monogon OS can only be installed on EFI-booted machines, this one is not")
+	}
+	// Download into a buffer as ZIP files cannot efficiently be read from
+	// HTTP in Go as the ReaderAt has no way of indicating continuous sections,
+	// thus a ton of small range requests would need to be used, causing
+	// a huge latency penalty as well as costing a lot of money on typical
+	// object storages. This should go away when we switch to a better bundle
+	// format which can be streamed.
+	var bundleRaw bytes.Buffer
+	b := backoff.NewExponentialBackOff()
+	err := backoff.Retry(func() error {
+		bundleRes, err := http.Get(req.BundleUrl)
+		if err != nil {
+			l.Warningf("Metropolis bundle request failed: %v", err)
+			return fmt.Errorf("HTTP request failed: %v", err)
+		}
+		defer bundleRes.Body.Close()
+		switch bundleRes.StatusCode {
+		case http.StatusTooEarly, http.StatusTooManyRequests,
+			http.StatusInternalServerError, http.StatusBadGateway,
+			http.StatusServiceUnavailable, http.StatusGatewayTimeout:
+			l.Warningf("Metropolis bundle request HTTP %d error, retrying", bundleRes.StatusCode)
+			return fmt.Errorf("HTTP error %d", bundleRes.StatusCode)
+		default:
+			// Non-standard code range used for proxy-related issue by various
+			// vendors. Treat as non-permanent error.
+			if bundleRes.StatusCode >= 520 && bundleRes.StatusCode < 599 {
+				l.Warningf("Metropolis bundle request HTTP %d error, retrying", bundleRes.StatusCode)
+				return fmt.Errorf("HTTP error %d", bundleRes.StatusCode)
+			}
+			if bundleRes.StatusCode != 200 {
+				l.Errorf("Metropolis bundle request permanent HTTP %d error, aborting", bundleRes.StatusCode)
+				return backoff.Permanent(fmt.Errorf("HTTP error %d", bundleRes.StatusCode))
+			}
+		}
+		if _, err := bundleRaw.ReadFrom(bundleRes.Body); err != nil {
+			l.Warningf("Metropolis bundle download failed, retrying: %v", err)
+			bundleRaw.Reset()
+			return err
+		}
+		return nil
+	}, b)
+	if err != nil {
+		return fmt.Errorf("error downloading Metropolis bundle: %v", err)
+	}
+	l.Info("Metropolis Bundle downloaded")
+	bundle, err := zip.NewReader(bytes.NewReader(bundleRaw.Bytes()), int64(bundleRaw.Len()))
+	if err != nil {
+		return fmt.Errorf("failed to open node bundle: %w", err)
+	}
+	efiPayload, err := bundle.Open("kernel_efi.efi")
+	if err != nil {
+		return fmt.Errorf("invalid bundle: %w", err)
+	}
+	defer efiPayload.Close()
+	systemImage, err := bundle.Open("verity_rootfs.img")
+	if err != nil {
+		return fmt.Errorf("invalid bundle: %w", err)
+	}
+	defer systemImage.Close()
+
+	nodeParamsRaw, err := proto.Marshal(req.NodeParameters)
+	if err != nil {
+		return fmt.Errorf("failed marshaling: %w", err)
+	}
+
+	installParams := osimage.Params{
+		PartitionSize: osimage.PartitionSizeInfo{
+			ESP:    128,
+			System: 4096,
+			Data:   128,
+		},
+		SystemImage:    systemImage,
+		EFIPayload:     efiPayload,
+		NodeParameters: bytes.NewReader(nodeParamsRaw),
+		OutputPath:     filepath.Join("/dev", req.RootDevice),
+	}
+
+	be, err := osimage.Create(&installParams)
+	if err != nil {
+		return err
+	}
+	bootEntryIdx, err := efivarfs.CreateBootEntry(be)
+	if err != nil {
+		return fmt.Errorf("error creating EFI boot entry: %w", err)
+	}
+	if err := efivarfs.SetBootOrder(&efivarfs.BootOrder{uint16(bootEntryIdx)}); err != nil {
+		return fmt.Errorf("error setting EFI boot order: %w", err)
+	}
+	l.Info("Metropolis installation completed")
+	return nil
+}
diff --git a/cloud/agent/main.go b/cloud/agent/main.go
index 704b252..e859488 100644
--- a/cloud/agent/main.go
+++ b/cloud/agent/main.go
@@ -1,15 +1,76 @@
 package main
 
 import (
+	"context"
 	"fmt"
+	"io"
+	"os"
 
-	"google.golang.org/protobuf/encoding/prototext"
+	"golang.org/x/sys/unix"
+
+	"source.monogon.dev/metropolis/pkg/logtree"
+	"source.monogon.dev/metropolis/pkg/supervisor"
 )
 
 func main() {
-	fmt.Println("Monogon BMaaS Agent started")
-	report, errs := gatherHWReport()
-	// Just print the report for now
-	fmt.Println(prototext.Format(report))
-	fmt.Println("Encountered errors:", errs)
+	setupMounts()
+
+	// Set up logger for the Agent. Currently logs everything to /dev/tty0 and
+	// /dev/ttyS0.
+	consoles := []string{"/dev/tty0", "/dev/ttyS0"}
+	lt := logtree.New()
+	for _, p := range consoles {
+		f, err := os.OpenFile(p, os.O_WRONLY, 0)
+		if err != nil {
+			continue
+		}
+		reader, err := lt.Read("", logtree.WithChildren(), logtree.WithStream())
+		if err != nil {
+			panic(fmt.Errorf("could not set up root log reader: %v", err))
+		}
+		go func(path string, f io.Writer) {
+			for {
+				p := <-reader.Stream
+				fmt.Fprintf(f, "%s\n", p.String())
+			}
+		}(p, f)
+	}
+
+	sCtx := context.Background()
+	supervisor.New(sCtx, agentRunnable, supervisor.WithExistingLogtree(lt))
+	select {}
+}
+
+func mkdirAndMount(dir, fs string, flags uintptr) error {
+	if err := os.MkdirAll(dir, 0o755); err != nil {
+		return fmt.Errorf("could not make %s: %w", dir, err)
+	}
+	if err := unix.Mount(fs, dir, fs, flags, ""); err != nil {
+		return fmt.Errorf("could not mount %s on %s: %w", fs, dir, err)
+	}
+	return nil
+}
+
+// setupMounts sets up basic mounts like sysfs, procfs, devtmpfs and cgroups.
+// This should be called early during init as a lot of processes depend on this
+// being available.
+func setupMounts() error {
+	// Set up target filesystems.
+	for _, el := range []struct {
+		dir   string
+		fs    string
+		flags uintptr
+	}{
+		{"/sys", "sysfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+		{"/sys/kernel/tracing", "tracefs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+		{"/sys/fs/pstore", "pstore", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+		{"/proc", "proc", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
+		{"/dev", "devtmpfs", unix.MS_NOEXEC | unix.MS_NOSUID},
+		{"/dev/pts", "devpts", unix.MS_NOEXEC | unix.MS_NOSUID},
+	} {
+		if err := mkdirAndMount(el.dir, el.fs, el.flags); err != nil {
+			return err
+		}
+	}
+	return nil
 }