blob: 9de39f5209952e08487725fa2ded0517eb29f08a [file] [log] [blame]
// takeover is a self-contained executable which when executed loads the BMaaS
// agent via kexec. It is intended to be called over SSH, given a binary
// TakeoverInit message over standard input and (if all preparation work
// completed successfully) will respond with a TakeoverResponse on standard
// output. At that point the new kernel and agent initramfs are fully staged
// by the current kernel.
// The second stage which is also part of this binary, selected by an
// environment variable, is then executed in detached mode and the main
// takeover binary called over SSH terminates.
// The second stage waits for 5 seconds for the main binary to exit, the SSH
// session to be torn down and various other things before issuing the final
// non-returning syscall which jumps into the new kernel.
package main
import (
"bytes"
"crypto/ed25519"
"crypto/rand"
_ "embed"
"errors"
"fmt"
"io"
"log"
"os"
"os/exec"
"time"
"github.com/cavaliergopher/cpio"
"github.com/pierrec/lz4/v4"
"golang.org/x/sys/unix"
"google.golang.org/protobuf/proto"
"source.monogon.dev/cloud/agent/api"
"source.monogon.dev/metropolis/pkg/bootparam"
"source.monogon.dev/metropolis/pkg/kexec"
netdump "source.monogon.dev/net/dump"
netapi "source.monogon.dev/net/proto"
)
//go:embed third_party/linux/bzImage
var kernel []byte
//go:embed ucode.cpio
var ucode []byte
//go:embed cloud/agent/initramfs.cpio.lz4
var initramfs []byte
// newMemfile creates a new file which is not located on a specific filesystem,
// but is instead backed by anonymous memory.
func newMemfile(name string, flags int) (*os.File, error) {
fd, err := unix.MemfdCreate(name, flags)
if err != nil {
return nil, fmt.Errorf("memfd_create failed: %w", err)
}
return os.NewFile(uintptr(fd), name), nil
}
func setupTakeover() (*api.TakeoverSuccess, error) {
// Read init specification from stdin.
initRaw, err := io.ReadAll(os.Stdin)
if err != nil {
return nil, fmt.Errorf("failed to read TakeoverInit message from stdin: %w", err)
}
var takeoverInit api.TakeoverInit
if err := proto.Unmarshal(initRaw, &takeoverInit); err != nil {
return nil, fmt.Errorf("failed to parse TakeoverInit messag from stdin: %w", err)
}
// Sanity check for empty TakeoverInit messages
if takeoverInit.BmaasEndpoint == "" {
return nil, errors.New("BMaaS endpoint is empty, check that a proper TakeoverInit message has been provided")
}
// Load data from embedded files into memfiles as the kexec load syscall
// requires file descriptors.
kernelFile, err := newMemfile("kernel", 0)
if err != nil {
return nil, fmt.Errorf("failed to create kernel memfile: %w", err)
}
initramfsFile, err := newMemfile("initramfs", 0)
if err != nil {
return nil, fmt.Errorf("failed to create initramfs memfile: %w", err)
}
if _, err := kernelFile.ReadFrom(bytes.NewReader(kernel)); err != nil {
return nil, fmt.Errorf("failed to read kernel into memory-backed file: %w", err)
}
if _, err := initramfsFile.ReadFrom(bytes.NewReader(ucode)); err != nil {
return nil, fmt.Errorf("failed to read ucode into memory-backed file: %w", err)
}
if _, err := initramfsFile.ReadFrom(bytes.NewReader(initramfs)); err != nil {
return nil, fmt.Errorf("failed to read initramfs into memory-backed file: %w", err)
}
// Dump the current network configuration
netconf, warnings, err := netdump.Dump()
if err != nil {
return nil, fmt.Errorf("failed to dump network configuration: %w", err)
}
if len(netconf.Nameserver) == 0 {
netconf.Nameserver = []*netapi.Nameserver{{
Ip: "8.8.8.8",
}, {
Ip: "1.1.1.1",
}}
}
// Generate agent private key
pubKey, privKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
return nil, fmt.Errorf("unable to generate Ed25519 key: %w", err)
}
agentInit := api.AgentInit{
TakeoverInit: &takeoverInit,
PrivateKey: privKey,
NetworkConfig: netconf,
}
agentInitRaw, err := proto.Marshal(&agentInit)
if err != nil {
return nil, fmt.Errorf("unable to marshal AgentInit message: %v", err)
}
// Append AgentInit spec to initramfs
compressedOut := lz4.NewWriter(initramfsFile)
compressedOut.Apply(lz4.LegacyOption(true))
cpioW := cpio.NewWriter(compressedOut)
cpioW.WriteHeader(&cpio.Header{
Name: "/init.pb",
Size: int64(len(agentInitRaw)),
Mode: cpio.TypeReg | 0o644,
})
cpioW.Write(agentInitRaw)
cpioW.Close()
compressedOut.Close()
agentParams := bootparam.Params{
bootparam.Param{Param: "quiet"},
bootparam.Param{Param: "init", Value: "/init"},
}
var customConsoles bool
cmdline, err := os.ReadFile("/proc/cmdline")
if err != nil {
warnings = append(warnings, fmt.Errorf("unable to read current kernel command line: %w", err))
} else {
params, _, err := bootparam.Unmarshal(string(cmdline))
// If the existing command line is well-formed, add all existing console
// parameters to the console for the agent
if err == nil {
for _, p := range params {
if p.Param == "console" {
agentParams = append(agentParams, p)
customConsoles = true
}
}
}
}
if !customConsoles {
// Add the "default" console on x86
agentParams = append(agentParams, bootparam.Param{Param: "console", Value: "ttyS0,115200"})
}
agentCmdline, err := bootparam.Marshal(agentParams, "")
// Stage agent payload into kernel memory
if err := kexec.FileLoad(kernelFile, initramfsFile, agentCmdline); err != nil {
return nil, fmt.Errorf("failed to load kexec payload: %w", err)
}
var warningsStrs []string
for _, w := range warnings {
warningsStrs = append(warningsStrs, w.Error())
}
return &api.TakeoverSuccess{
InitMessage: &takeoverInit,
Key: pubKey,
Warning: warningsStrs,
}, nil
}
// Environment variable which tells the takeover binary to run the second stage
const detachedLaunchEnv = "TAKEOVER_DETACHED_LAUNCH"
func main() {
// Check if the second stage should be executed
if os.Getenv(detachedLaunchEnv) == "1" {
// Wait 5 seconds for data to be sent, connections to be closed and
// syncs to be executed
time.Sleep(5 * time.Second)
// Perform kexec, this will not return unless it fails
err := unix.Reboot(unix.LINUX_REBOOT_CMD_KEXEC)
var msg string = "takeover: reboot succeeded, but we're still runing??"
if err != nil {
msg = err.Error()
}
// We have no standard output/error anymore, if this fails it's
// just borked. Attempt to dump the error into kmesg for manual
// debugging.
kmsg, err := os.OpenFile("/dev/kmsg", os.O_WRONLY, 0)
if err != nil {
os.Exit(2)
}
kmsg.WriteString(msg)
kmsg.Close()
os.Exit(1)
}
var takeoverResp api.TakeoverResponse
res, err := setupTakeover()
if err != nil {
takeoverResp.Result = &api.TakeoverResponse_Error{Error: &api.TakeoverError{
Message: err.Error(),
}}
} else {
takeoverResp.Result = &api.TakeoverResponse_Success{Success: res}
}
// Respond to stdout
takeoverRespRaw, err := proto.Marshal(&takeoverResp)
if err != nil {
log.Fatalf("failed to marshal response: %v", err)
}
if _, err := os.Stdout.Write(takeoverRespRaw); err != nil {
log.Fatalf("failed to write response to stdout: %v", err)
}
// Close stdout, we're done responding
os.Stdout.Close()
// Start second stage which waits for 5 seconds while performing
// final cleanup.
detachedCmd := exec.Command("/proc/self/exe")
detachedCmd.Env = []string{detachedLaunchEnv + "=1"}
if err := detachedCmd.Start(); err != nil {
log.Fatalf("failed to launch final stage: %v", err)
}
// Release the second stage so that the first stage can cleanly terminate.
if err := detachedCmd.Process.Release(); err != nil {
log.Fatalf("error releasing final stage process: %v", err)
}
}