Lorenz Brun | 7922d41 | 2023-02-21 20:47:39 +0100 | [diff] [blame] | 1 | // takeover is a self-contained executable which when executed loads the BMaaS |
| 2 | // agent via kexec. It is intended to be called over SSH, given a binary |
| 3 | // TakeoverInit message over standard input and (if all preparation work |
| 4 | // completed successfully) will respond with a TakeoverResponse on standard |
| 5 | // output. At that point the new kernel and agent initramfs are fully staged |
| 6 | // by the current kernel. |
| 7 | // The second stage which is also part of this binary, selected by an |
| 8 | // environment variable, is then executed in detached mode and the main |
| 9 | // takeover binary called over SSH terminates. |
| 10 | // The second stage waits for 5 seconds for the main binary to exit, the SSH |
| 11 | // session to be torn down and various other things before issuing the final |
| 12 | // non-returning syscall which jumps into the new kernel. |
| 13 | |
| 14 | package main |
| 15 | |
| 16 | import ( |
| 17 | "bytes" |
| 18 | "crypto/ed25519" |
| 19 | "crypto/rand" |
| 20 | _ "embed" |
| 21 | "errors" |
| 22 | "fmt" |
| 23 | "io" |
| 24 | "log" |
| 25 | "os" |
| 26 | "os/exec" |
| 27 | "time" |
| 28 | |
| 29 | "github.com/cavaliergopher/cpio" |
Lorenz Brun | 62f1d36 | 2023-11-14 16:18:24 +0100 | [diff] [blame] | 30 | "github.com/klauspost/compress/zstd" |
Lorenz Brun | 7922d41 | 2023-02-21 20:47:39 +0100 | [diff] [blame] | 31 | "golang.org/x/sys/unix" |
| 32 | "google.golang.org/protobuf/proto" |
| 33 | |
| 34 | "source.monogon.dev/cloud/agent/api" |
| 35 | "source.monogon.dev/metropolis/pkg/bootparam" |
| 36 | "source.monogon.dev/metropolis/pkg/kexec" |
| 37 | netdump "source.monogon.dev/net/dump" |
Lorenz Brun | d0be371 | 2023-04-11 13:22:25 +0200 | [diff] [blame] | 38 | netapi "source.monogon.dev/net/proto" |
Lorenz Brun | 7922d41 | 2023-02-21 20:47:39 +0100 | [diff] [blame] | 39 | ) |
| 40 | |
| 41 | //go:embed third_party/linux/bzImage |
| 42 | var kernel []byte |
| 43 | |
| 44 | //go:embed ucode.cpio |
| 45 | var ucode []byte |
| 46 | |
Lorenz Brun | 62f1d36 | 2023-11-14 16:18:24 +0100 | [diff] [blame] | 47 | //go:embed cloud/agent/initramfs.cpio.zst |
Lorenz Brun | 7922d41 | 2023-02-21 20:47:39 +0100 | [diff] [blame] | 48 | var initramfs []byte |
| 49 | |
| 50 | // newMemfile creates a new file which is not located on a specific filesystem, |
| 51 | // but is instead backed by anonymous memory. |
| 52 | func newMemfile(name string, flags int) (*os.File, error) { |
| 53 | fd, err := unix.MemfdCreate(name, flags) |
| 54 | if err != nil { |
| 55 | return nil, fmt.Errorf("memfd_create failed: %w", err) |
| 56 | } |
| 57 | return os.NewFile(uintptr(fd), name), nil |
| 58 | } |
| 59 | |
| 60 | func setupTakeover() (*api.TakeoverSuccess, error) { |
| 61 | // Read init specification from stdin. |
| 62 | initRaw, err := io.ReadAll(os.Stdin) |
| 63 | if err != nil { |
| 64 | return nil, fmt.Errorf("failed to read TakeoverInit message from stdin: %w", err) |
| 65 | } |
| 66 | var takeoverInit api.TakeoverInit |
| 67 | if err := proto.Unmarshal(initRaw, &takeoverInit); err != nil { |
| 68 | return nil, fmt.Errorf("failed to parse TakeoverInit messag from stdin: %w", err) |
| 69 | } |
| 70 | |
| 71 | // Sanity check for empty TakeoverInit messages |
| 72 | if takeoverInit.BmaasEndpoint == "" { |
| 73 | return nil, errors.New("BMaaS endpoint is empty, check that a proper TakeoverInit message has been provided") |
| 74 | } |
| 75 | |
| 76 | // Load data from embedded files into memfiles as the kexec load syscall |
| 77 | // requires file descriptors. |
| 78 | kernelFile, err := newMemfile("kernel", 0) |
| 79 | if err != nil { |
| 80 | return nil, fmt.Errorf("failed to create kernel memfile: %w", err) |
| 81 | } |
| 82 | initramfsFile, err := newMemfile("initramfs", 0) |
| 83 | if err != nil { |
| 84 | return nil, fmt.Errorf("failed to create initramfs memfile: %w", err) |
| 85 | } |
| 86 | if _, err := kernelFile.ReadFrom(bytes.NewReader(kernel)); err != nil { |
| 87 | return nil, fmt.Errorf("failed to read kernel into memory-backed file: %w", err) |
| 88 | } |
| 89 | if _, err := initramfsFile.ReadFrom(bytes.NewReader(ucode)); err != nil { |
| 90 | return nil, fmt.Errorf("failed to read ucode into memory-backed file: %w", err) |
| 91 | } |
| 92 | if _, err := initramfsFile.ReadFrom(bytes.NewReader(initramfs)); err != nil { |
| 93 | return nil, fmt.Errorf("failed to read initramfs into memory-backed file: %w", err) |
| 94 | } |
| 95 | |
| 96 | // Dump the current network configuration |
| 97 | netconf, warnings, err := netdump.Dump() |
| 98 | if err != nil { |
| 99 | return nil, fmt.Errorf("failed to dump network configuration: %w", err) |
| 100 | } |
| 101 | |
Lorenz Brun | d0be371 | 2023-04-11 13:22:25 +0200 | [diff] [blame] | 102 | if len(netconf.Nameserver) == 0 { |
| 103 | netconf.Nameserver = []*netapi.Nameserver{{ |
| 104 | Ip: "8.8.8.8", |
| 105 | }, { |
| 106 | Ip: "1.1.1.1", |
| 107 | }} |
| 108 | } |
| 109 | |
Lorenz Brun | 7922d41 | 2023-02-21 20:47:39 +0100 | [diff] [blame] | 110 | // Generate agent private key |
| 111 | pubKey, privKey, err := ed25519.GenerateKey(rand.Reader) |
| 112 | if err != nil { |
| 113 | return nil, fmt.Errorf("unable to generate Ed25519 key: %w", err) |
| 114 | } |
| 115 | |
| 116 | agentInit := api.AgentInit{ |
| 117 | TakeoverInit: &takeoverInit, |
| 118 | PrivateKey: privKey, |
| 119 | NetworkConfig: netconf, |
| 120 | } |
| 121 | agentInitRaw, err := proto.Marshal(&agentInit) |
| 122 | if err != nil { |
| 123 | return nil, fmt.Errorf("unable to marshal AgentInit message: %v", err) |
| 124 | } |
| 125 | |
| 126 | // Append AgentInit spec to initramfs |
Lorenz Brun | 62f1d36 | 2023-11-14 16:18:24 +0100 | [diff] [blame] | 127 | compressedW, err := zstd.NewWriter(initramfsFile, zstd.WithEncoderLevel(1)) |
| 128 | if err != nil { |
| 129 | return nil, fmt.Errorf("while creating zstd writer: %w", err) |
| 130 | } |
| 131 | cpioW := cpio.NewWriter(compressedW) |
Lorenz Brun | 7922d41 | 2023-02-21 20:47:39 +0100 | [diff] [blame] | 132 | cpioW.WriteHeader(&cpio.Header{ |
| 133 | Name: "/init.pb", |
| 134 | Size: int64(len(agentInitRaw)), |
| 135 | Mode: cpio.TypeReg | 0o644, |
| 136 | }) |
| 137 | cpioW.Write(agentInitRaw) |
| 138 | cpioW.Close() |
Lorenz Brun | 62f1d36 | 2023-11-14 16:18:24 +0100 | [diff] [blame] | 139 | compressedW.Close() |
Lorenz Brun | 7922d41 | 2023-02-21 20:47:39 +0100 | [diff] [blame] | 140 | |
| 141 | agentParams := bootparam.Params{ |
| 142 | bootparam.Param{Param: "quiet"}, |
| 143 | bootparam.Param{Param: "init", Value: "/init"}, |
Lorenz Brun | 7922d41 | 2023-02-21 20:47:39 +0100 | [diff] [blame] | 144 | } |
| 145 | |
Lorenz Brun | 5d503b3 | 2023-04-11 13:20:23 +0200 | [diff] [blame] | 146 | var customConsoles bool |
Lorenz Brun | 7922d41 | 2023-02-21 20:47:39 +0100 | [diff] [blame] | 147 | cmdline, err := os.ReadFile("/proc/cmdline") |
| 148 | if err != nil { |
| 149 | warnings = append(warnings, fmt.Errorf("unable to read current kernel command line: %w", err)) |
| 150 | } else { |
| 151 | params, _, err := bootparam.Unmarshal(string(cmdline)) |
| 152 | // If the existing command line is well-formed, add all existing console |
| 153 | // parameters to the console for the agent |
| 154 | if err == nil { |
| 155 | for _, p := range params { |
| 156 | if p.Param == "console" { |
| 157 | agentParams = append(agentParams, p) |
Lorenz Brun | 5d503b3 | 2023-04-11 13:20:23 +0200 | [diff] [blame] | 158 | customConsoles = true |
Lorenz Brun | 7922d41 | 2023-02-21 20:47:39 +0100 | [diff] [blame] | 159 | } |
| 160 | } |
| 161 | } |
| 162 | } |
Lorenz Brun | 5d503b3 | 2023-04-11 13:20:23 +0200 | [diff] [blame] | 163 | if !customConsoles { |
| 164 | // Add the "default" console on x86 |
| 165 | agentParams = append(agentParams, bootparam.Param{Param: "console", Value: "ttyS0,115200"}) |
| 166 | } |
Lorenz Brun | 7922d41 | 2023-02-21 20:47:39 +0100 | [diff] [blame] | 167 | agentCmdline, err := bootparam.Marshal(agentParams, "") |
| 168 | // Stage agent payload into kernel memory |
| 169 | if err := kexec.FileLoad(kernelFile, initramfsFile, agentCmdline); err != nil { |
| 170 | return nil, fmt.Errorf("failed to load kexec payload: %w", err) |
| 171 | } |
| 172 | var warningsStrs []string |
| 173 | for _, w := range warnings { |
| 174 | warningsStrs = append(warningsStrs, w.Error()) |
| 175 | } |
| 176 | return &api.TakeoverSuccess{ |
| 177 | InitMessage: &takeoverInit, |
| 178 | Key: pubKey, |
| 179 | Warning: warningsStrs, |
| 180 | }, nil |
| 181 | } |
| 182 | |
| 183 | // Environment variable which tells the takeover binary to run the second stage |
| 184 | const detachedLaunchEnv = "TAKEOVER_DETACHED_LAUNCH" |
| 185 | |
| 186 | func main() { |
| 187 | // Check if the second stage should be executed |
| 188 | if os.Getenv(detachedLaunchEnv) == "1" { |
| 189 | // Wait 5 seconds for data to be sent, connections to be closed and |
| 190 | // syncs to be executed |
| 191 | time.Sleep(5 * time.Second) |
| 192 | // Perform kexec, this will not return unless it fails |
| 193 | err := unix.Reboot(unix.LINUX_REBOOT_CMD_KEXEC) |
| 194 | var msg string = "takeover: reboot succeeded, but we're still runing??" |
| 195 | if err != nil { |
| 196 | msg = err.Error() |
| 197 | } |
| 198 | // We have no standard output/error anymore, if this fails it's |
| 199 | // just borked. Attempt to dump the error into kmesg for manual |
| 200 | // debugging. |
| 201 | kmsg, err := os.OpenFile("/dev/kmsg", os.O_WRONLY, 0) |
| 202 | if err != nil { |
| 203 | os.Exit(2) |
| 204 | } |
| 205 | kmsg.WriteString(msg) |
| 206 | kmsg.Close() |
| 207 | os.Exit(1) |
| 208 | } |
| 209 | |
| 210 | var takeoverResp api.TakeoverResponse |
| 211 | res, err := setupTakeover() |
| 212 | if err != nil { |
| 213 | takeoverResp.Result = &api.TakeoverResponse_Error{Error: &api.TakeoverError{ |
| 214 | Message: err.Error(), |
| 215 | }} |
| 216 | } else { |
| 217 | takeoverResp.Result = &api.TakeoverResponse_Success{Success: res} |
| 218 | } |
| 219 | // Respond to stdout |
| 220 | takeoverRespRaw, err := proto.Marshal(&takeoverResp) |
| 221 | if err != nil { |
| 222 | log.Fatalf("failed to marshal response: %v", err) |
| 223 | } |
| 224 | if _, err := os.Stdout.Write(takeoverRespRaw); err != nil { |
| 225 | log.Fatalf("failed to write response to stdout: %v", err) |
| 226 | } |
| 227 | // Close stdout, we're done responding |
| 228 | os.Stdout.Close() |
| 229 | |
| 230 | // Start second stage which waits for 5 seconds while performing |
| 231 | // final cleanup. |
| 232 | detachedCmd := exec.Command("/proc/self/exe") |
| 233 | detachedCmd.Env = []string{detachedLaunchEnv + "=1"} |
| 234 | if err := detachedCmd.Start(); err != nil { |
| 235 | log.Fatalf("failed to launch final stage: %v", err) |
| 236 | } |
| 237 | // Release the second stage so that the first stage can cleanly terminate. |
| 238 | if err := detachedCmd.Process.Release(); err != nil { |
| 239 | log.Fatalf("error releasing final stage process: %v", err) |
| 240 | } |
| 241 | } |