cloud: move takeover to agent/takeover
The takeover package is tightly coupled with the agent, so lets move it
there.
Change-Id: I38ae69d4f4e7a4f6a04b0fefb5f127ebc71f5961
Reviewed-on: https://review.monogon.dev/c/monogon/+/2790
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
Tested-by: Jenkins CI
diff --git a/cloud/agent/takeover/takeover.go b/cloud/agent/takeover/takeover.go
new file mode 100644
index 0000000..d313174
--- /dev/null
+++ b/cloud/agent/takeover/takeover.go
@@ -0,0 +1,241 @@
+// takeover is a self-contained executable which when executed loads the BMaaS
+// agent via kexec. It is intended to be called over SSH, given a binary
+// TakeoverInit message over standard input and (if all preparation work
+// completed successfully) will respond with a TakeoverResponse on standard
+// output. At that point the new kernel and agent initramfs are fully staged
+// by the current kernel.
+// The second stage which is also part of this binary, selected by an
+// environment variable, is then executed in detached mode and the main
+// takeover binary called over SSH terminates.
+// The second stage waits for 5 seconds for the main binary to exit, the SSH
+// session to be torn down and various other things before issuing the final
+// non-returning syscall which jumps into the new kernel.
+
+package main
+
+import (
+ "bytes"
+ "crypto/ed25519"
+ "crypto/rand"
+ _ "embed"
+ "errors"
+ "fmt"
+ "io"
+ "log"
+ "os"
+ "os/exec"
+ "time"
+
+ "github.com/cavaliergopher/cpio"
+ "github.com/klauspost/compress/zstd"
+ "golang.org/x/sys/unix"
+ "google.golang.org/protobuf/proto"
+
+ "source.monogon.dev/cloud/agent/api"
+ "source.monogon.dev/metropolis/pkg/bootparam"
+ "source.monogon.dev/metropolis/pkg/kexec"
+ netdump "source.monogon.dev/net/dump"
+ netapi "source.monogon.dev/net/proto"
+)
+
+//go:embed third_party/linux/bzImage
+var kernel []byte
+
+//go:embed ucode.cpio
+var ucode []byte
+
+//go:embed initramfs.cpio.zst
+var initramfs []byte
+
+// newMemfile creates a new file which is not located on a specific filesystem,
+// but is instead backed by anonymous memory.
+func newMemfile(name string, flags int) (*os.File, error) {
+ fd, err := unix.MemfdCreate(name, flags)
+ if err != nil {
+ return nil, fmt.Errorf("memfd_create failed: %w", err)
+ }
+ return os.NewFile(uintptr(fd), name), nil
+}
+
+func setupTakeover() (*api.TakeoverSuccess, error) {
+ // Read init specification from stdin.
+ initRaw, err := io.ReadAll(os.Stdin)
+ if err != nil {
+ return nil, fmt.Errorf("failed to read TakeoverInit message from stdin: %w", err)
+ }
+ var takeoverInit api.TakeoverInit
+ if err := proto.Unmarshal(initRaw, &takeoverInit); err != nil {
+ return nil, fmt.Errorf("failed to parse TakeoverInit messag from stdin: %w", err)
+ }
+
+ // Sanity check for empty TakeoverInit messages
+ if takeoverInit.BmaasEndpoint == "" {
+ return nil, errors.New("BMaaS endpoint is empty, check that a proper TakeoverInit message has been provided")
+ }
+
+ // Load data from embedded files into memfiles as the kexec load syscall
+ // requires file descriptors.
+ kernelFile, err := newMemfile("kernel", 0)
+ if err != nil {
+ return nil, fmt.Errorf("failed to create kernel memfile: %w", err)
+ }
+ initramfsFile, err := newMemfile("initramfs", 0)
+ if err != nil {
+ return nil, fmt.Errorf("failed to create initramfs memfile: %w", err)
+ }
+ if _, err := kernelFile.ReadFrom(bytes.NewReader(kernel)); err != nil {
+ return nil, fmt.Errorf("failed to read kernel into memory-backed file: %w", err)
+ }
+ if _, err := initramfsFile.ReadFrom(bytes.NewReader(ucode)); err != nil {
+ return nil, fmt.Errorf("failed to read ucode into memory-backed file: %w", err)
+ }
+ if _, err := initramfsFile.ReadFrom(bytes.NewReader(initramfs)); err != nil {
+ return nil, fmt.Errorf("failed to read initramfs into memory-backed file: %w", err)
+ }
+
+ // Dump the current network configuration
+ netconf, warnings, err := netdump.Dump()
+ if err != nil {
+ return nil, fmt.Errorf("failed to dump network configuration: %w", err)
+ }
+
+ if len(netconf.Nameserver) == 0 {
+ netconf.Nameserver = []*netapi.Nameserver{{
+ Ip: "8.8.8.8",
+ }, {
+ Ip: "1.1.1.1",
+ }}
+ }
+
+ // Generate agent private key
+ pubKey, privKey, err := ed25519.GenerateKey(rand.Reader)
+ if err != nil {
+ return nil, fmt.Errorf("unable to generate Ed25519 key: %w", err)
+ }
+
+ agentInit := api.AgentInit{
+ TakeoverInit: &takeoverInit,
+ PrivateKey: privKey,
+ NetworkConfig: netconf,
+ }
+ agentInitRaw, err := proto.Marshal(&agentInit)
+ if err != nil {
+ return nil, fmt.Errorf("unable to marshal AgentInit message: %v", err)
+ }
+
+ // Append AgentInit spec to initramfs
+ compressedW, err := zstd.NewWriter(initramfsFile, zstd.WithEncoderLevel(1))
+ if err != nil {
+ return nil, fmt.Errorf("while creating zstd writer: %w", err)
+ }
+ cpioW := cpio.NewWriter(compressedW)
+ cpioW.WriteHeader(&cpio.Header{
+ Name: "/init.pb",
+ Size: int64(len(agentInitRaw)),
+ Mode: cpio.TypeReg | 0o644,
+ })
+ cpioW.Write(agentInitRaw)
+ cpioW.Close()
+ compressedW.Close()
+
+ agentParams := bootparam.Params{
+ bootparam.Param{Param: "quiet"},
+ bootparam.Param{Param: "init", Value: "/init"},
+ }
+
+ var customConsoles bool
+ cmdline, err := os.ReadFile("/proc/cmdline")
+ if err != nil {
+ warnings = append(warnings, fmt.Errorf("unable to read current kernel command line: %w", err))
+ } else {
+ params, _, err := bootparam.Unmarshal(string(cmdline))
+ // If the existing command line is well-formed, add all existing console
+ // parameters to the console for the agent
+ if err == nil {
+ for _, p := range params {
+ if p.Param == "console" {
+ agentParams = append(agentParams, p)
+ customConsoles = true
+ }
+ }
+ }
+ }
+ if !customConsoles {
+ // Add the "default" console on x86
+ agentParams = append(agentParams, bootparam.Param{Param: "console", Value: "ttyS0,115200"})
+ }
+ agentCmdline, err := bootparam.Marshal(agentParams, "")
+ // Stage agent payload into kernel memory
+ if err := kexec.FileLoad(kernelFile, initramfsFile, agentCmdline); err != nil {
+ return nil, fmt.Errorf("failed to load kexec payload: %w", err)
+ }
+ var warningsStrs []string
+ for _, w := range warnings {
+ warningsStrs = append(warningsStrs, w.Error())
+ }
+ return &api.TakeoverSuccess{
+ InitMessage: &takeoverInit,
+ Key: pubKey,
+ Warning: warningsStrs,
+ }, nil
+}
+
+// Environment variable which tells the takeover binary to run the second stage
+const detachedLaunchEnv = "TAKEOVER_DETACHED_LAUNCH"
+
+func main() {
+ // Check if the second stage should be executed
+ if os.Getenv(detachedLaunchEnv) == "1" {
+ // Wait 5 seconds for data to be sent, connections to be closed and
+ // syncs to be executed
+ time.Sleep(5 * time.Second)
+ // Perform kexec, this will not return unless it fails
+ err := unix.Reboot(unix.LINUX_REBOOT_CMD_KEXEC)
+ var msg string = "takeover: reboot succeeded, but we're still runing??"
+ if err != nil {
+ msg = err.Error()
+ }
+ // We have no standard output/error anymore, if this fails it's
+ // just borked. Attempt to dump the error into kmesg for manual
+ // debugging.
+ kmsg, err := os.OpenFile("/dev/kmsg", os.O_WRONLY, 0)
+ if err != nil {
+ os.Exit(2)
+ }
+ kmsg.WriteString(msg)
+ kmsg.Close()
+ os.Exit(1)
+ }
+
+ var takeoverResp api.TakeoverResponse
+ res, err := setupTakeover()
+ if err != nil {
+ takeoverResp.Result = &api.TakeoverResponse_Error{Error: &api.TakeoverError{
+ Message: err.Error(),
+ }}
+ } else {
+ takeoverResp.Result = &api.TakeoverResponse_Success{Success: res}
+ }
+ // Respond to stdout
+ takeoverRespRaw, err := proto.Marshal(&takeoverResp)
+ if err != nil {
+ log.Fatalf("failed to marshal response: %v", err)
+ }
+ if _, err := os.Stdout.Write(takeoverRespRaw); err != nil {
+ log.Fatalf("failed to write response to stdout: %v", err)
+ }
+ // Close stdout, we're done responding
+ os.Stdout.Close()
+
+ // Start second stage which waits for 5 seconds while performing
+ // final cleanup.
+ detachedCmd := exec.Command("/proc/self/exe")
+ detachedCmd.Env = []string{detachedLaunchEnv + "=1"}
+ if err := detachedCmd.Start(); err != nil {
+ log.Fatalf("failed to launch final stage: %v", err)
+ }
+ // Release the second stage so that the first stage can cleanly terminate.
+ if err := detachedCmd.Process.Release(); err != nil {
+ log.Fatalf("error releasing final stage process: %v", err)
+ }
+}