metropolis/cli/metroctl: implement install ssh
This implements another way of installing metropolis via ssh. It does
this by uploading the files to the target machine and then doing a kexec
into the install environment. If it fails at any point it will print the
error and reboot.
Change-Id: I1ac6538896709c386b053a84903fa04940c1f012
Reviewed-on: https://review.monogon.dev/c/monogon/+/2079
Tested-by: Jenkins CI
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
diff --git a/metropolis/cli/metroctl/BUILD.bazel b/metropolis/cli/metroctl/BUILD.bazel
index 6e75400..50dc1eb 100644
--- a/metropolis/cli/metroctl/BUILD.bazel
+++ b/metropolis/cli/metroctl/BUILD.bazel
@@ -19,6 +19,7 @@
srcs = [
"cmd_certs.go",
"cmd_install.go",
+ "cmd_install_ssh.go",
"cmd_install_usb.go",
"cmd_k8s_configure.go",
"cmd_k8scredplugin.go",
@@ -37,6 +38,7 @@
deps = [
"//go/clitable",
"//go/logging",
+ "//go/net/ssh",
"//metropolis/cli/flagdefs",
"//metropolis/cli/metroctl/core",
"//metropolis/node",
@@ -51,13 +53,19 @@
"//osbase/logtree/proto",
"//version",
"@com_github_adrg_xdg//:xdg",
+ "@com_github_schollz_progressbar_v3//:progressbar",
"@com_github_spf13_cobra//:cobra",
"@io_bazel_rules_go//go/runfiles:go_default_library",
"@io_k8s_apimachinery//pkg/apis/meta/v1:meta",
"@io_k8s_client_go//pkg/apis/clientauthentication/v1:clientauthentication",
"@org_golang_google_grpc//:grpc",
+ "@org_golang_google_protobuf//proto",
+ "@org_golang_x_crypto//ssh",
+ "@org_golang_x_crypto//ssh/agent",
+ "@org_golang_x_crypto//ssh/terminal",
"@org_golang_x_net//proxy",
"@org_golang_x_sync//semaphore",
+ "@org_golang_x_term//:term",
],
)
@@ -66,6 +74,7 @@
data = select({
":buildkind_lite": [],
"//conditions:default": [
+ "//metropolis/cli/takeover",
"//metropolis/installer:kernel",
"//metropolis/node:bundle",
],
diff --git a/metropolis/cli/metroctl/cmd_install_ssh.go b/metropolis/cli/metroctl/cmd_install_ssh.go
new file mode 100644
index 0000000..15ced4a
--- /dev/null
+++ b/metropolis/cli/metroctl/cmd_install_ssh.go
@@ -0,0 +1,202 @@
+package main
+
+import (
+ "context"
+ _ "embed"
+ "fmt"
+ "log"
+ "net"
+ "net/netip"
+ "os"
+ "os/signal"
+ "strings"
+ "syscall"
+ "time"
+
+ "github.com/schollz/progressbar/v3"
+ "github.com/spf13/cobra"
+ xssh "golang.org/x/crypto/ssh"
+ "golang.org/x/crypto/ssh/agent"
+ "golang.org/x/crypto/ssh/terminal"
+ "golang.org/x/term"
+ "google.golang.org/protobuf/proto"
+
+ "source.monogon.dev/go/net/ssh"
+ "source.monogon.dev/osbase/fat32"
+)
+
+var sshCmd = &cobra.Command{
+ Use: "ssh --disk=<disk> <target>",
+ Short: "Installs Metropolis on a Linux system accessible via SSH.",
+ Example: "metroctl install --bundle=metropolis-v0.1.zip --takeover=takeover ssh --disk=nvme0n1 root@ssh-enabled-server.example",
+ Args: cobra.ExactArgs(1), // One positional argument: the target
+ RunE: doSSH,
+}
+
+func parseAddrOptionalPort(addr string) (string, string, error) {
+ if addr == "" {
+ return "", "", fmt.Errorf("address is empty")
+ }
+
+ idx := strings.LastIndex(addr, ":")
+ // IPv4, DNS without Port.
+ if idx == -1 {
+ return addr, "", nil
+ }
+
+ // IPv4, DNS with Port.
+ if strings.Count(addr, ":") == 1 {
+ return addr[:idx], addr[idx+1:], nil
+ }
+
+ // IPv6 with Port.
+ if addrPort, err := netip.ParseAddrPort(addr); err == nil {
+ return addrPort.Addr().String(), fmt.Sprintf("%d", addrPort.Port()), nil
+ }
+
+ // IPv6 without Port.
+ if addr, err := netip.ParseAddr(addr); err == nil {
+ return addr.String(), "", nil
+ }
+
+ return "", "", fmt.Errorf("failed to parse address: %q", addr)
+}
+
+func parseSSHAddr(s string) (string, string, error) {
+ user, rawAddr, ok := strings.Cut(s, "@")
+ if !ok {
+ return "", "", fmt.Errorf("SSH user is mandatory")
+ }
+
+ addr, port, err := parseAddrOptionalPort(rawAddr)
+ if err != nil {
+ return "", "", err
+ }
+ if port == "" {
+ port = "22"
+ }
+
+ return user, net.JoinHostPort(addr, port), nil
+}
+
+func doSSH(cmd *cobra.Command, args []string) error {
+ user, address, err := parseSSHAddr(args[0])
+ if err != nil {
+ return err
+ }
+
+ diskName, err := cmd.Flags().GetString("disk")
+ if err != nil {
+ return err
+ }
+
+ if len(diskName) == 0 {
+ return fmt.Errorf("flag disk is required")
+ }
+
+ var authMethods []xssh.AuthMethod
+ if aconn, err := net.Dial("unix", os.Getenv("SSH_AUTH_SOCK")); err == nil {
+ defer aconn.Close()
+ a := agent.NewClient(aconn)
+ authMethods = append(authMethods, xssh.PublicKeysCallback(a.Signers))
+ } else {
+ log.Printf("error while establishing ssh agent connection: %v", err)
+ log.Println("ssh agent authentication will not be available.")
+ }
+
+ if term.IsTerminal(int(os.Stdin.Fd())) {
+ authMethods = append(authMethods,
+ xssh.PasswordCallback(func() (string, error) {
+ fmt.Printf("%s@%s's password: ", user, address)
+ b, err := terminal.ReadPassword(syscall.Stdin)
+ if err != nil {
+ return "", err
+ }
+ fmt.Println()
+ return string(b), nil
+ }),
+ xssh.KeyboardInteractive(func(name, instruction string, questions []string, echos []bool) ([]string, error) {
+ answers := make([]string, 0, len(questions))
+ for i, q := range questions {
+ fmt.Print(q)
+ if echos[i] {
+ if _, err := fmt.Scan(&questions[i]); err != nil {
+ return nil, err
+ }
+ } else {
+ b, err := terminal.ReadPassword(syscall.Stdin)
+ if err != nil {
+ return nil, err
+ }
+ fmt.Println()
+ answers = append(answers, string(b))
+ }
+ }
+ return answers, nil
+ }),
+ )
+ } else {
+ log.Println("stdin is not interactive. password authentication will not be available.")
+ }
+
+ cl := ssh.DirectClient{
+ Username: user,
+ AuthMethods: authMethods,
+ }
+
+ ctx, _ := signal.NotifyContext(context.Background(), os.Interrupt)
+ conn, err := cl.Dial(ctx, address, 5*time.Second)
+ if err != nil {
+ return fmt.Errorf("error while establishing ssh connection: %v", err)
+ }
+
+ params := makeNodeParams()
+ rawParams, err := proto.Marshal(params)
+ if err != nil {
+ return fmt.Errorf("error while marshaling node params: %v", err)
+ }
+
+ const takeoverTargetPath = "/root/takeover"
+ const bundleTargetPath = "/root/bundle.zip"
+ bundle := external("bundle", "_main/metropolis/node/bundle.zip", bundlePath)
+ takeover := external("takeover", "_main/metropolis/cli/takeover/takeover_bin_/takeover_bin", bundlePath)
+
+ barUploader := func(r fat32.SizedReader, targetPath string) {
+ bar := progressbar.DefaultBytes(
+ r.Size(),
+ targetPath,
+ )
+ defer bar.Close()
+
+ proxyReader := progressbar.NewReader(r, bar)
+ defer proxyReader.Close()
+
+ if err := conn.Upload(ctx, targetPath, &proxyReader); err != nil {
+ log.Fatalf("error while uploading %q: %v", targetPath, err)
+ }
+ }
+
+ log.Println("Uploading required binaries to target host.")
+ barUploader(takeover, takeoverTargetPath)
+ barUploader(bundle, bundleTargetPath)
+
+ // Start the agent and wait for the agent's output to arrive.
+ log.Printf("Starting the takeover executable at path %q.", takeoverTargetPath)
+ _, stderr, err := conn.Execute(ctx, fmt.Sprintf("%s -disk %s", takeoverTargetPath, diskName), rawParams)
+ stderrStr := strings.TrimSpace(string(stderr))
+ if stderrStr != "" {
+ log.Printf("Agent stderr: %q", stderrStr)
+ }
+ if err != nil {
+ return fmt.Errorf("while starting the takeover executable: %v", err)
+ }
+
+ return nil
+}
+
+func init() {
+ sshCmd.Flags().String("disk", "", "Which disk Metropolis should be installed to")
+ sshCmd.Flags().String("takeover", "", "Path to the Metropolis takeover binary")
+
+ installCmd.AddCommand(sshCmd)
+}
diff --git a/metropolis/cli/takeover/BUILD.bazel b/metropolis/cli/takeover/BUILD.bazel
new file mode 100644
index 0000000..b3af473
--- /dev/null
+++ b/metropolis/cli/takeover/BUILD.bazel
@@ -0,0 +1,64 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
+load("//build/static_binary_tarball:def.bzl", "static_binary_tarball")
+load("//osbase/build:def.bzl", "node_initramfs")
+load("//osbase/build:def.bzl", "platform_transition_binary")
+
+node_initramfs(
+ name = "initramfs",
+ files = {},
+ fsspecs = [
+ "//osbase/build:earlydev.fsspec",
+ "//third_party:firmware",
+ ],
+ visibility = ["//metropolis/cli/takeover:__subpackages__"],
+)
+
+go_library(
+ name = "takeover_lib",
+ srcs = [
+ "boot.go",
+ "install.go",
+ "main.go",
+ "takeover.go",
+ ],
+ embedsrcs = [
+ "//third_party/linux", #keep
+ "//third_party:ucode", #keep
+ ":initramfs", #keep
+ "//metropolis/node/core/abloader", #keep
+ ],
+ importpath = "source.monogon.dev/metropolis/cli/takeover",
+ visibility = ["//visibility:private"],
+ deps = [
+ "//go/logging",
+ "//metropolis/node/core/devmgr",
+ "//metropolis/proto/api",
+ "//osbase/blockdev",
+ "//osbase/bootparam",
+ "//osbase/bringup",
+ "//osbase/build/mkimage/osimage",
+ "//osbase/efivarfs",
+ "//osbase/kexec",
+ "//osbase/net/dump",
+ "//osbase/net/proto",
+ "//osbase/supervisor",
+ "@com_github_cavaliergopher_cpio//:cpio",
+ "@com_github_klauspost_compress//zstd",
+ "@org_golang_google_protobuf//proto",
+ "@org_golang_x_sys//unix",
+ ],
+)
+
+go_binary(
+ name = "takeover_bin",
+ embed = [":takeover_lib"],
+ pure = "on",
+ visibility = ["//visibility:public"],
+)
+
+platform_transition_binary(
+ name = "takeover",
+ binary = ":takeover_bin",
+ target_platform = "//build/platforms:linux_amd64_static",
+ visibility = ["//visibility:public"],
+)
diff --git a/metropolis/cli/takeover/boot.go b/metropolis/cli/takeover/boot.go
new file mode 100644
index 0000000..eb71694
--- /dev/null
+++ b/metropolis/cli/takeover/boot.go
@@ -0,0 +1,47 @@
+package main
+
+import (
+ "context"
+ "os"
+ "path/filepath"
+ "time"
+
+ "golang.org/x/sys/unix"
+
+ "source.monogon.dev/metropolis/node/core/devmgr"
+ "source.monogon.dev/osbase/supervisor"
+)
+
+// Main runnable for the installer.
+func takeoverRunnable(ctx context.Context) error {
+ l := supervisor.Logger(ctx)
+
+ devmgrSvc := devmgr.New()
+ supervisor.Run(ctx, "devmgr", devmgrSvc.Run)
+ supervisor.Signal(ctx, supervisor.SignalHealthy)
+
+ for {
+ devicePath := filepath.Join("/dev", os.Getenv(EnvInstallTarget))
+ l.Infof("Waiting for device: %s", devicePath)
+ _, err := os.Stat(devicePath)
+ if os.IsNotExist(err) {
+ time.Sleep(1 * time.Second)
+ continue
+ } else if err != nil {
+ return err
+ }
+ break
+ }
+
+ if err := installMetropolis(l); err != nil {
+ l.Errorf("Installation failed: %v", err)
+ } else {
+ l.Info("Installation succeeded")
+ }
+
+ time.Sleep(1 * time.Second)
+ unix.Sync()
+ unix.Reboot(unix.LINUX_REBOOT_CMD_RESTART)
+
+ return nil
+}
diff --git a/metropolis/cli/takeover/e2e/BUILD.bazel b/metropolis/cli/takeover/e2e/BUILD.bazel
new file mode 100644
index 0000000..81ff44a
--- /dev/null
+++ b/metropolis/cli/takeover/e2e/BUILD.bazel
@@ -0,0 +1,32 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_test")
+
+go_test(
+ name = "e2e_test",
+ srcs = ["main_test.go"],
+ data = [
+ "//metropolis/cli/takeover",
+ "//metropolis/installer/test/testos:testos_bundle",
+ "//third_party/edk2:OVMF_CODE.fd",
+ "//third_party/edk2:OVMF_VARS.fd",
+ "@debian_11_cloudimage//file",
+ ],
+ x_defs = {
+ "xBundleFilePath": "$(rlocationpath //metropolis/installer/test/testos:testos_bundle )",
+ "xOvmfVarsPath": "$(rlocationpath //third_party/edk2:OVMF_VARS.fd )",
+ "xOvmfCodePath": "$(rlocationpath //third_party/edk2:OVMF_CODE.fd )",
+ "xCloudImagePath": "$(rlocationpath @debian_11_cloudimage//file )",
+ # TODO(tim): Hardcoded because of https://github.com/monogon-dev/monogon/issues/316
+ "xTakeoverPath": "_main/metropolis/cli/takeover/takeover/takeover_bin",
+ },
+ deps = [
+ "//go/net/ssh",
+ "//metropolis/proto/api",
+ "//metropolis/test/launch",
+ "//osbase/fat32",
+ "//osbase/freeport",
+ "@io_bazel_rules_go//go/runfiles:go_default_library",
+ "@org_golang_google_protobuf//proto",
+ "@org_golang_x_crypto//ssh",
+ "@org_golang_x_sys//unix",
+ ],
+)
diff --git a/metropolis/cli/takeover/e2e/main_test.go b/metropolis/cli/takeover/e2e/main_test.go
new file mode 100644
index 0000000..32d7fb8
--- /dev/null
+++ b/metropolis/cli/takeover/e2e/main_test.go
@@ -0,0 +1,229 @@
+package e2e
+
+import (
+ "bufio"
+ "context"
+ "crypto/ed25519"
+ "crypto/rand"
+ "encoding/json"
+ "fmt"
+ "net"
+ "os"
+ "os/exec"
+ "os/signal"
+ "strings"
+ "testing"
+ "time"
+
+ "github.com/bazelbuild/rules_go/go/runfiles"
+ xssh "golang.org/x/crypto/ssh"
+ "golang.org/x/sys/unix"
+ "google.golang.org/protobuf/proto"
+
+ "source.monogon.dev/metropolis/proto/api"
+
+ "source.monogon.dev/go/net/ssh"
+ "source.monogon.dev/metropolis/test/launch"
+ "source.monogon.dev/osbase/fat32"
+ "source.monogon.dev/osbase/freeport"
+)
+
+var (
+ // These are filled by bazel at linking time with the canonical path of
+ // their corresponding file. Inside the init function we resolve it
+ // with the rules_go runfiles package to the real path.
+ xBundleFilePath string
+ xOvmfVarsPath string
+ xOvmfCodePath string
+ xCloudImagePath string
+ xTakeoverPath string
+)
+
+func init() {
+ var err error
+ for _, path := range []*string{
+ &xCloudImagePath, &xOvmfVarsPath, &xOvmfCodePath,
+ &xTakeoverPath, &xBundleFilePath,
+ } {
+ *path, err = runfiles.Rlocation(*path)
+ if err != nil {
+ panic(err)
+ }
+ }
+}
+
+const GiB = 1024 * 1024 * 1024
+
+func TestE2E(t *testing.T) {
+ pubKey, privKey, err := ed25519.GenerateKey(rand.Reader)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ sshPubKey, err := xssh.NewPublicKey(pubKey)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ sshPrivkey, err := xssh.NewSignerFromKey(privKey)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ // CloudConfig doesn't really have a rigid spec, so just put things into it
+ cloudConfig := make(map[string]any)
+ cloudConfig["ssh_authorized_keys"] = []string{
+ strings.TrimSuffix(string(xssh.MarshalAuthorizedKey(sshPubKey)), "\n"),
+ }
+
+ userData, err := json.Marshal(cloudConfig)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ rootInode := fat32.Inode{
+ Attrs: fat32.AttrDirectory,
+ Children: []*fat32.Inode{
+ {
+ Name: "user-data",
+ Content: strings.NewReader("#cloud-config\n" + string(userData)),
+ },
+ {
+ Name: "meta-data",
+ Content: strings.NewReader(""),
+ },
+ },
+ }
+ cloudInitDataFile, err := os.CreateTemp("", "cidata*.img")
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer os.Remove(cloudInitDataFile.Name())
+ if err := fat32.WriteFS(cloudInitDataFile, rootInode, fat32.Options{Label: "cidata"}); err != nil {
+ t.Fatal(err)
+ }
+
+ rootDisk, err := os.CreateTemp("", "rootdisk")
+ if err != nil {
+ t.Fatal(err)
+ }
+ // Create a 10GiB sparse root disk
+ if err := unix.Ftruncate(int(rootDisk.Fd()), 10*GiB); err != nil {
+ t.Fatalf("ftruncate failed: %v", err)
+ }
+
+ defer os.Remove(rootDisk.Name())
+
+ sshPort, sshPortCloser, err := freeport.AllocateTCPPort()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ qemuArgs := []string{
+ "-machine", "q35", "-accel", "kvm", "-nographic", "-nodefaults", "-m", "1024",
+ "-cpu", "host", "-smp", "sockets=1,cpus=1,cores=2,threads=2,maxcpus=4",
+ "-drive", "if=pflash,format=raw,readonly=on,file=" + xOvmfCodePath,
+ "-drive", "if=pflash,format=raw,snapshot=on,file=" + xOvmfVarsPath,
+ "-drive", "if=none,format=raw,cache=unsafe,id=root,file=" + rootDisk.Name(),
+ "-drive", "if=none,format=qcow2,snapshot=on,id=cloud,cache=unsafe,file=" + xCloudImagePath,
+ "-device", "virtio-blk-pci,drive=root,bootindex=1",
+ "-device", "virtio-blk-pci,drive=cloud,bootindex=2",
+ "-drive", "if=virtio,format=raw,snapshot=on,file=" + cloudInitDataFile.Name(),
+ "-netdev", fmt.Sprintf("user,id=net0,net=10.42.0.0/24,dhcpstart=10.42.0.10,hostfwd=tcp::%d-:22", sshPort),
+ "-device", "virtio-net-pci,netdev=net0,mac=22:d5:8e:76:1d:07",
+ "-device", "virtio-rng-pci",
+ "-serial", "stdio",
+ }
+ qemuCmd := exec.Command("qemu-system-x86_64", qemuArgs...)
+ stdoutPipe, err := qemuCmd.StdoutPipe()
+ if err != nil {
+ t.Fatal(err)
+ }
+ installSucceed := make(chan struct{})
+ go func() {
+ s := bufio.NewScanner(stdoutPipe)
+ for s.Scan() {
+ t.Log("kernel: " + s.Text())
+ if strings.Contains(s.Text(), "_TESTOS_LAUNCH_SUCCESS_") {
+ installSucceed <- struct{}{}
+ break
+ }
+ }
+ qemuCmd.Wait()
+ }()
+ qemuCmd.Stderr = os.Stderr
+ sshPortCloser.Close()
+ if err := qemuCmd.Start(); err != nil {
+ t.Fatal(err)
+ }
+ defer qemuCmd.Process.Kill()
+
+ cl := ssh.DirectClient{
+ Username: "debian",
+ AuthMethods: []xssh.AuthMethod{xssh.PublicKeys(sshPrivkey)},
+ }
+
+ ctx, _ := signal.NotifyContext(context.Background(), os.Interrupt)
+
+ var conn ssh.Connection
+ for {
+ conn, err = cl.Dial(ctx, net.JoinHostPort("localhost", fmt.Sprintf("%d", sshPort)), 5*time.Second)
+ if err != nil {
+ t.Logf("error connecting via SSH, retrying: %v", err)
+ time.Sleep(1 * time.Second)
+ continue
+ }
+ break
+ }
+
+ takeover, err := os.Open(xTakeoverPath)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ const takeoverTargetPath = "/tmp/takeover"
+ if err := conn.Upload(ctx, takeoverTargetPath, takeover); err != nil {
+ t.Fatalf("error while uploading takeover: %v", err)
+ }
+
+ bundleFile, err := os.Open(xBundleFilePath)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ const bundleTargetPath = "/tmp/bundle.zip"
+ if err := conn.Upload(ctx, bundleTargetPath, bundleFile); err != nil {
+ t.Fatalf("error while uploading bundle: %v", err)
+ }
+
+ params := &api.NodeParameters{
+ Cluster: &api.NodeParameters_ClusterBootstrap_{
+ ClusterBootstrap: &api.NodeParameters_ClusterBootstrap{
+ OwnerPublicKey: launch.InsecurePublicKey,
+ },
+ },
+ NetworkConfig: nil,
+ }
+ rawParams, err := proto.Marshal(params)
+ if err != nil {
+ t.Fatalf("error while marshaling node params: %v", err)
+ }
+
+ // Start the agent and wait for the agent's output to arrive.
+ t.Logf("Starting the takeover executable at path %q.", takeoverTargetPath)
+ _, stderr, err := conn.Execute(ctx, fmt.Sprintf("sudo %s -disk %s", takeoverTargetPath, "vda"), rawParams)
+ stderrStr := strings.TrimSpace(string(stderr))
+ if stderrStr != "" {
+ t.Logf("Agent stderr: %q", stderrStr)
+ }
+ if err != nil {
+ t.Fatalf("while starting the takeover executable: %v", err)
+ }
+
+ select {
+ case <-installSucceed:
+ // Done, test passed
+ case <-time.After(30 * time.Second):
+ t.Fatal("Waiting for installation timed out")
+ }
+}
diff --git a/metropolis/cli/takeover/install.go b/metropolis/cli/takeover/install.go
new file mode 100644
index 0000000..50679d0
--- /dev/null
+++ b/metropolis/cli/takeover/install.go
@@ -0,0 +1,115 @@
+package main
+
+import (
+ "archive/zip"
+ "bytes"
+ _ "embed"
+ "fmt"
+ "io/fs"
+ "os"
+ "path/filepath"
+
+ "source.monogon.dev/go/logging"
+ "source.monogon.dev/osbase/blockdev"
+ "source.monogon.dev/osbase/build/mkimage/osimage"
+ "source.monogon.dev/osbase/efivarfs"
+)
+
+//go:embed metropolis/node/core/abloader/abloader_bin.efi
+var abloader []byte
+
+// FileSizedReader is a small adapter from fs.File to fs.SizedReader
+// Panics on Stat() failure, so should only be used with sources where Stat()
+// cannot fail.
+type FileSizedReader struct {
+ fs.File
+}
+
+func (f FileSizedReader) Size() int64 {
+ stat, err := f.Stat()
+ if err != nil {
+ panic(err)
+ }
+ return stat.Size()
+}
+
+// EnvInstallTarget environment variable which tells the takeover binary where
+// to install to
+const EnvInstallTarget = "TAKEOVER_INSTALL_TARGET"
+
+func installMetropolis(l logging.Leveled) error {
+ // Validate we are running via EFI.
+ if _, err := os.Stat("/sys/firmware/efi"); os.IsNotExist(err) {
+ //nolint:ST1005
+ return fmt.Errorf("Monogon OS can only be installed on EFI-booted machines, this one is not")
+ }
+
+ metropolisSpecRaw, err := os.ReadFile("/params.pb")
+ if err != nil {
+ return err
+ }
+
+ bundleRaw, err := os.Open("/bundle.zip")
+ if err != nil {
+ return err
+ }
+
+ bundleStat, err := bundleRaw.Stat()
+ if err != nil {
+ return err
+ }
+
+ bundle, err := zip.NewReader(bundleRaw, bundleStat.Size())
+ if err != nil {
+ return fmt.Errorf("failed to open node bundle: %w", err)
+ }
+
+ installParams, err := setupOSImageParams(bundle, metropolisSpecRaw, os.Getenv(EnvInstallTarget))
+ if err != nil {
+ return err
+ }
+
+ be, err := osimage.Write(installParams)
+ if err != nil {
+ return fmt.Errorf("failed to apply installation: %w", err)
+ }
+ bootEntryIdx, err := efivarfs.AddBootEntry(be)
+ if err != nil {
+ return fmt.Errorf("error creating EFI boot entry: %w", err)
+ }
+ if err := efivarfs.SetBootOrder(efivarfs.BootOrder{uint16(bootEntryIdx)}); err != nil {
+ return fmt.Errorf("error setting EFI boot order: %w", err)
+ }
+ l.Info("Metropolis installation completed")
+ return nil
+}
+
+func setupOSImageParams(bundle *zip.Reader, metropolisSpecRaw []byte, installTarget string) (*osimage.Params, error) {
+ rootDev, err := blockdev.Open(filepath.Join("/dev", installTarget))
+ if err != nil {
+ return nil, fmt.Errorf("failed to open root device: %w", err)
+ }
+
+ efiPayload, err := bundle.Open("kernel_efi.efi")
+ if err != nil {
+ return nil, fmt.Errorf("invalid bundle: %w", err)
+ }
+
+ systemImage, err := bundle.Open("verity_rootfs.img")
+ if err != nil {
+ return nil, fmt.Errorf("invalid bundle: %w", err)
+ }
+
+ return &osimage.Params{
+ PartitionSize: osimage.PartitionSizeInfo{
+ ESP: 384,
+ System: 4096,
+ Data: 128,
+ },
+ SystemImage: systemImage,
+ EFIPayload: FileSizedReader{efiPayload},
+ ABLoader: bytes.NewReader(abloader),
+ NodeParameters: bytes.NewReader(metropolisSpecRaw),
+ Output: rootDev,
+ }, nil
+}
diff --git a/metropolis/cli/takeover/main.go b/metropolis/cli/takeover/main.go
new file mode 100644
index 0000000..a609a63
--- /dev/null
+++ b/metropolis/cli/takeover/main.go
@@ -0,0 +1,105 @@
+package main
+
+import (
+ "flag"
+ "io"
+ "log"
+ "os"
+ "os/exec"
+ "strings"
+ "time"
+
+ "golang.org/x/sys/unix"
+
+ "source.monogon.dev/osbase/bringup"
+)
+
+// Environment variable which tells the takeover binary to run the correct stage
+const launchModeEnv = "TAKEOVER_LAUNCH_MODE"
+
+const (
+ launchModeTakeover = ""
+ launchModeDetached = "DETACHED"
+ launchModeInit = "INIT"
+)
+
+func main() {
+ switch m := os.Getenv(launchModeEnv); m {
+ case launchModeTakeover:
+ launchTakeover()
+ case launchModeDetached:
+ launchDetached()
+ case launchModeInit:
+ launchInit()
+ default:
+ panic("unknown launch mode: " + m)
+ }
+}
+
+func launchTakeover() {
+ disk := flag.String("disk", "", "disk to install to without /dev/")
+ flag.Parse()
+ if disk == nil || *disk == "" {
+ log.Fatal("missing target disk")
+ }
+
+ nodeParamsRaw, err := io.ReadAll(os.Stdin)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ // try removing /dev/ just to be safe
+ diskName := strings.ReplaceAll(*disk, "/dev/", "")
+ warns, err := setupTakeover(nodeParamsRaw, diskName)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ if len(warns) != 0 {
+ for _, s := range warns {
+ os.Stdout.WriteString(s)
+ }
+ }
+
+ // Close stdout, we're done responding
+ os.Stdout.Close()
+
+ // Start second stage which waits for 5 seconds while performing
+ // final cleanup.
+ detachedCmd := exec.Command("/proc/self/exe")
+ detachedCmd.Env = []string{launchModeEnv + "=" + launchModeDetached}
+ if err := detachedCmd.Start(); err != nil {
+ log.Fatalf("failed to launch final stage: %v", err)
+ }
+ // Release the second stage so that the first stage can cleanly terminate.
+ if err := detachedCmd.Process.Release(); err != nil {
+ log.Fatalf("error releasing final stage process: %v", err)
+ }
+}
+
+// launchDetached executes the second stage
+func launchDetached() {
+ // Wait 5 seconds for data to be sent, connections to be closed and
+ // syncs to be executed
+ time.Sleep(5 * time.Second)
+ // Perform kexec, this will not return unless it fails
+ err := unix.Reboot(unix.LINUX_REBOOT_CMD_KEXEC)
+ msg := "takeover: reboot succeeded, but we're still runing??"
+ if err != nil {
+ msg = err.Error()
+ }
+ // We have no standard output/error anymore, if this fails it's
+ // just borked. Attempt to dump the error into kmesg for manual
+ // debugging.
+ kmsg, err := os.OpenFile("/dev/kmsg", os.O_WRONLY, 0)
+ if err != nil {
+ os.Exit(2)
+ }
+ kmsg.WriteString(msg)
+ kmsg.Close()
+ os.Exit(1)
+}
+
+func launchInit() {
+ bringup.Runnable(takeoverRunnable).Run()
+}
diff --git a/metropolis/cli/takeover/takeover.go b/metropolis/cli/takeover/takeover.go
new file mode 100644
index 0000000..327d3c1
--- /dev/null
+++ b/metropolis/cli/takeover/takeover.go
@@ -0,0 +1,221 @@
+package main
+
+import (
+ "archive/zip"
+ "bytes"
+ _ "embed"
+ "fmt"
+ "io"
+ "os"
+ "path/filepath"
+
+ "github.com/cavaliergopher/cpio"
+ "github.com/klauspost/compress/zstd"
+ "golang.org/x/sys/unix"
+ "google.golang.org/protobuf/proto"
+
+ apb "source.monogon.dev/metropolis/proto/api"
+ netapi "source.monogon.dev/osbase/net/proto"
+
+ "source.monogon.dev/osbase/bootparam"
+ "source.monogon.dev/osbase/build/mkimage/osimage"
+ "source.monogon.dev/osbase/kexec"
+ netdump "source.monogon.dev/osbase/net/dump"
+)
+
+//go:embed third_party/linux/bzImage
+var kernel []byte
+
+//go:embed third_party/ucode.cpio
+var ucode []byte
+
+//go:embed initramfs.cpio.zst
+var initramfs []byte
+
+// newMemfile creates a new file which is not located on a specific filesystem,
+// but is instead backed by anonymous memory.
+func newMemfile(name string, flags int) (*os.File, error) {
+ fd, err := unix.MemfdCreate(name, flags)
+ if err != nil {
+ return nil, fmt.Errorf("memfd_create failed: %w", err)
+ }
+ return os.NewFile(uintptr(fd), name), nil
+}
+
+func setupTakeover(nodeParamsRaw []byte, target string) ([]string, error) {
+ // Validate we are running via EFI.
+ if _, err := os.Stat("/sys/firmware/efi"); os.IsNotExist(err) {
+ //nolint:ST1005
+ return nil, fmt.Errorf("Monogon OS can only be installed on EFI-booted machines, this one is not")
+ }
+
+ currPath, err := os.Executable()
+ if err != nil {
+ return nil, err
+ }
+
+ bundleRaw, err := os.Open(filepath.Join(filepath.Dir(currPath), "bundle.zip"))
+ if err != nil {
+ return nil, err
+ }
+
+ bundleStat, err := bundleRaw.Stat()
+ if err != nil {
+ return nil, err
+ }
+
+ bundle, err := zip.NewReader(bundleRaw, bundleStat.Size())
+ if err != nil {
+ return nil, fmt.Errorf("failed to open node bundle: %w", err)
+ }
+
+ // Dump the current network configuration
+ netconf, warnings, err := netdump.Dump()
+ if err != nil {
+ return nil, fmt.Errorf("failed to dump network configuration: %w", err)
+ }
+
+ if len(netconf.Nameserver) == 0 {
+ netconf.Nameserver = []*netapi.Nameserver{{
+ Ip: "8.8.8.8",
+ }, {
+ Ip: "1.1.1.1",
+ }}
+ }
+
+ var params apb.NodeParameters
+ if err := proto.Unmarshal(nodeParamsRaw, ¶ms); err != nil {
+ return nil, fmt.Errorf("failed to unmarshal node parameters: %w", err)
+ }
+
+ // Override the NodeParameters.NetworkConfig with the current NetworkConfig
+ // if it's missing.
+ if params.NetworkConfig == nil {
+ params.NetworkConfig = netconf
+ }
+
+ // Marshal NodeParameters again.
+ nodeParamsRaw, err = proto.Marshal(¶ms)
+ if err != nil {
+ return nil, fmt.Errorf("failed marshaling: %w", err)
+ }
+
+ oParams, err := setupOSImageParams(bundle, nodeParamsRaw, target)
+ if err != nil {
+ return nil, err
+ }
+
+ // Validate that this installation will not fail because of disk issues
+ if _, err := osimage.Plan(oParams); err != nil {
+ return nil, fmt.Errorf("failed to plan installation: %w", err)
+ }
+
+ // Load data from embedded files into memfiles as the kexec load syscall
+ // requires file descriptors.
+ kernelFile, err := newMemfile("kernel", 0)
+ if err != nil {
+ return nil, fmt.Errorf("failed to create kernel memfile: %w", err)
+ }
+ initramfsFile, err := newMemfile("initramfs", 0)
+ if err != nil {
+ return nil, fmt.Errorf("failed to create initramfs memfile: %w", err)
+ }
+ if _, err := kernelFile.ReadFrom(bytes.NewReader(kernel)); err != nil {
+ return nil, fmt.Errorf("failed to read kernel into memory-backed file: %w", err)
+ }
+ if _, err := initramfsFile.ReadFrom(bytes.NewReader(ucode)); err != nil {
+ return nil, fmt.Errorf("failed to read ucode into memory-backed file: %w", err)
+ }
+ if _, err := initramfsFile.ReadFrom(bytes.NewReader(initramfs)); err != nil {
+ return nil, fmt.Errorf("failed to read initramfs into memory-backed file: %w", err)
+ }
+
+ // Append this executable, the bundle and node params to initramfs
+ compressedW, err := zstd.NewWriter(initramfsFile, zstd.WithEncoderLevel(1))
+ if err != nil {
+ return nil, fmt.Errorf("while creating zstd writer: %w", err)
+ }
+ {
+ self, err := os.Open("/proc/self/exe")
+ if err != nil {
+ return nil, err
+ }
+ selfStat, err := self.Stat()
+ if err != nil {
+ return nil, err
+ }
+
+ cpioW := cpio.NewWriter(compressedW)
+ cpioW.WriteHeader(&cpio.Header{
+ Name: "/init",
+ Size: selfStat.Size(),
+ Mode: cpio.TypeReg | 0o755,
+ })
+ io.Copy(cpioW, self)
+ cpioW.Close()
+ }
+ {
+ cpioW := cpio.NewWriter(compressedW)
+ cpioW.WriteHeader(&cpio.Header{
+ Name: "/bundle.zip",
+ Size: bundleStat.Size(),
+ Mode: cpio.TypeReg | 0o644,
+ })
+ bundleRaw.Seek(0, io.SeekStart)
+ io.Copy(cpioW, bundleRaw)
+ cpioW.Close()
+ }
+ {
+ cpioW := cpio.NewWriter(compressedW)
+ cpioW.WriteHeader(&cpio.Header{
+ Name: "/params.pb",
+ Size: int64(len(nodeParamsRaw)),
+ Mode: cpio.TypeReg | 0o644,
+ })
+ cpioW.Write(nodeParamsRaw)
+ cpioW.Close()
+ }
+ compressedW.Close()
+
+ initParams := bootparam.Params{
+ bootparam.Param{Param: "quiet"},
+ bootparam.Param{Param: launchModeEnv, Value: launchModeInit},
+ bootparam.Param{Param: EnvInstallTarget, Value: target},
+ bootparam.Param{Param: "init", Value: "/init"},
+ }
+
+ var customConsoles bool
+ cmdline, err := os.ReadFile("/proc/cmdline")
+ if err != nil {
+ warnings = append(warnings, fmt.Errorf("unable to read current kernel command line: %w", err))
+ } else {
+ params, _, err := bootparam.Unmarshal(string(cmdline))
+ // If the existing command line is well-formed, add all existing console
+ // parameters to the console for the agent
+ if err == nil {
+ for _, p := range params {
+ if p.Param == "console" {
+ initParams = append(initParams, p)
+ customConsoles = true
+ }
+ }
+ }
+ }
+ if !customConsoles {
+ // Add the "default" console on x86
+ initParams = append(initParams, bootparam.Param{Param: "console", Value: "ttyS0,115200"})
+ }
+ agentCmdline, err := bootparam.Marshal(initParams, "")
+ if err != nil {
+ return nil, fmt.Errorf("failed to marshal bootparams: %w", err)
+ }
+ // Stage agent payload into kernel memory
+ if err := kexec.FileLoad(kernelFile, initramfsFile, agentCmdline); err != nil {
+ return nil, fmt.Errorf("failed to load kexec payload: %w", err)
+ }
+ var warningsStrs []string
+ for _, w := range warnings {
+ warningsStrs = append(warningsStrs, w.Error())
+ }
+ return warningsStrs, nil
+}