blob: fc7169049f920495d14737358f5309809a06c3e8 [file] [log] [blame]
Lorenz Brun7922d412023-02-21 20:47:39 +01001// takeover is a self-contained executable which when executed loads the BMaaS
2// agent via kexec. It is intended to be called over SSH, given a binary
3// TakeoverInit message over standard input and (if all preparation work
4// completed successfully) will respond with a TakeoverResponse on standard
5// output. At that point the new kernel and agent initramfs are fully staged
6// by the current kernel.
7// The second stage which is also part of this binary, selected by an
8// environment variable, is then executed in detached mode and the main
9// takeover binary called over SSH terminates.
10// The second stage waits for 5 seconds for the main binary to exit, the SSH
11// session to be torn down and various other things before issuing the final
12// non-returning syscall which jumps into the new kernel.
13
14package main
15
16import (
17 "bytes"
18 "crypto/ed25519"
19 "crypto/rand"
20 _ "embed"
21 "errors"
22 "fmt"
23 "io"
24 "log"
25 "os"
26 "os/exec"
27 "time"
28
29 "github.com/cavaliergopher/cpio"
30 "github.com/pierrec/lz4/v4"
31 "golang.org/x/sys/unix"
32 "google.golang.org/protobuf/proto"
33
34 "source.monogon.dev/cloud/agent/api"
35 "source.monogon.dev/metropolis/pkg/bootparam"
36 "source.monogon.dev/metropolis/pkg/kexec"
37 netdump "source.monogon.dev/net/dump"
38)
39
40//go:embed third_party/linux/bzImage
41var kernel []byte
42
43//go:embed ucode.cpio
44var ucode []byte
45
46//go:embed cloud/agent/initramfs.cpio.lz4
47var initramfs []byte
48
49// newMemfile creates a new file which is not located on a specific filesystem,
50// but is instead backed by anonymous memory.
51func newMemfile(name string, flags int) (*os.File, error) {
52 fd, err := unix.MemfdCreate(name, flags)
53 if err != nil {
54 return nil, fmt.Errorf("memfd_create failed: %w", err)
55 }
56 return os.NewFile(uintptr(fd), name), nil
57}
58
59func setupTakeover() (*api.TakeoverSuccess, error) {
60 // Read init specification from stdin.
61 initRaw, err := io.ReadAll(os.Stdin)
62 if err != nil {
63 return nil, fmt.Errorf("failed to read TakeoverInit message from stdin: %w", err)
64 }
65 var takeoverInit api.TakeoverInit
66 if err := proto.Unmarshal(initRaw, &takeoverInit); err != nil {
67 return nil, fmt.Errorf("failed to parse TakeoverInit messag from stdin: %w", err)
68 }
69
70 // Sanity check for empty TakeoverInit messages
71 if takeoverInit.BmaasEndpoint == "" {
72 return nil, errors.New("BMaaS endpoint is empty, check that a proper TakeoverInit message has been provided")
73 }
74
75 // Load data from embedded files into memfiles as the kexec load syscall
76 // requires file descriptors.
77 kernelFile, err := newMemfile("kernel", 0)
78 if err != nil {
79 return nil, fmt.Errorf("failed to create kernel memfile: %w", err)
80 }
81 initramfsFile, err := newMemfile("initramfs", 0)
82 if err != nil {
83 return nil, fmt.Errorf("failed to create initramfs memfile: %w", err)
84 }
85 if _, err := kernelFile.ReadFrom(bytes.NewReader(kernel)); err != nil {
86 return nil, fmt.Errorf("failed to read kernel into memory-backed file: %w", err)
87 }
88 if _, err := initramfsFile.ReadFrom(bytes.NewReader(ucode)); err != nil {
89 return nil, fmt.Errorf("failed to read ucode into memory-backed file: %w", err)
90 }
91 if _, err := initramfsFile.ReadFrom(bytes.NewReader(initramfs)); err != nil {
92 return nil, fmt.Errorf("failed to read initramfs into memory-backed file: %w", err)
93 }
94
95 // Dump the current network configuration
96 netconf, warnings, err := netdump.Dump()
97 if err != nil {
98 return nil, fmt.Errorf("failed to dump network configuration: %w", err)
99 }
100
101 // Generate agent private key
102 pubKey, privKey, err := ed25519.GenerateKey(rand.Reader)
103 if err != nil {
104 return nil, fmt.Errorf("unable to generate Ed25519 key: %w", err)
105 }
106
107 agentInit := api.AgentInit{
108 TakeoverInit: &takeoverInit,
109 PrivateKey: privKey,
110 NetworkConfig: netconf,
111 }
112 agentInitRaw, err := proto.Marshal(&agentInit)
113 if err != nil {
114 return nil, fmt.Errorf("unable to marshal AgentInit message: %v", err)
115 }
116
117 // Append AgentInit spec to initramfs
118 compressedOut := lz4.NewWriter(initramfsFile)
119 compressedOut.Apply(lz4.LegacyOption(true))
120 cpioW := cpio.NewWriter(compressedOut)
121 cpioW.WriteHeader(&cpio.Header{
122 Name: "/init.pb",
123 Size: int64(len(agentInitRaw)),
124 Mode: cpio.TypeReg | 0o644,
125 })
126 cpioW.Write(agentInitRaw)
127 cpioW.Close()
128 compressedOut.Close()
129
130 agentParams := bootparam.Params{
131 bootparam.Param{Param: "quiet"},
132 bootparam.Param{Param: "init", Value: "/init"},
Lorenz Brun7922d412023-02-21 20:47:39 +0100133 }
134
Lorenz Brun5d503b32023-04-11 13:20:23 +0200135 var customConsoles bool
Lorenz Brun7922d412023-02-21 20:47:39 +0100136 cmdline, err := os.ReadFile("/proc/cmdline")
137 if err != nil {
138 warnings = append(warnings, fmt.Errorf("unable to read current kernel command line: %w", err))
139 } else {
140 params, _, err := bootparam.Unmarshal(string(cmdline))
141 // If the existing command line is well-formed, add all existing console
142 // parameters to the console for the agent
143 if err == nil {
144 for _, p := range params {
145 if p.Param == "console" {
146 agentParams = append(agentParams, p)
Lorenz Brun5d503b32023-04-11 13:20:23 +0200147 customConsoles = true
Lorenz Brun7922d412023-02-21 20:47:39 +0100148 }
149 }
150 }
151 }
Lorenz Brun5d503b32023-04-11 13:20:23 +0200152 if !customConsoles {
153 // Add the "default" console on x86
154 agentParams = append(agentParams, bootparam.Param{Param: "console", Value: "ttyS0,115200"})
155 }
Lorenz Brun7922d412023-02-21 20:47:39 +0100156 agentCmdline, err := bootparam.Marshal(agentParams, "")
157 // Stage agent payload into kernel memory
158 if err := kexec.FileLoad(kernelFile, initramfsFile, agentCmdline); err != nil {
159 return nil, fmt.Errorf("failed to load kexec payload: %w", err)
160 }
161 var warningsStrs []string
162 for _, w := range warnings {
163 warningsStrs = append(warningsStrs, w.Error())
164 }
165 return &api.TakeoverSuccess{
166 InitMessage: &takeoverInit,
167 Key: pubKey,
168 Warning: warningsStrs,
169 }, nil
170}
171
172// Environment variable which tells the takeover binary to run the second stage
173const detachedLaunchEnv = "TAKEOVER_DETACHED_LAUNCH"
174
175func main() {
176 // Check if the second stage should be executed
177 if os.Getenv(detachedLaunchEnv) == "1" {
178 // Wait 5 seconds for data to be sent, connections to be closed and
179 // syncs to be executed
180 time.Sleep(5 * time.Second)
181 // Perform kexec, this will not return unless it fails
182 err := unix.Reboot(unix.LINUX_REBOOT_CMD_KEXEC)
183 var msg string = "takeover: reboot succeeded, but we're still runing??"
184 if err != nil {
185 msg = err.Error()
186 }
187 // We have no standard output/error anymore, if this fails it's
188 // just borked. Attempt to dump the error into kmesg for manual
189 // debugging.
190 kmsg, err := os.OpenFile("/dev/kmsg", os.O_WRONLY, 0)
191 if err != nil {
192 os.Exit(2)
193 }
194 kmsg.WriteString(msg)
195 kmsg.Close()
196 os.Exit(1)
197 }
198
199 var takeoverResp api.TakeoverResponse
200 res, err := setupTakeover()
201 if err != nil {
202 takeoverResp.Result = &api.TakeoverResponse_Error{Error: &api.TakeoverError{
203 Message: err.Error(),
204 }}
205 } else {
206 takeoverResp.Result = &api.TakeoverResponse_Success{Success: res}
207 }
208 // Respond to stdout
209 takeoverRespRaw, err := proto.Marshal(&takeoverResp)
210 if err != nil {
211 log.Fatalf("failed to marshal response: %v", err)
212 }
213 if _, err := os.Stdout.Write(takeoverRespRaw); err != nil {
214 log.Fatalf("failed to write response to stdout: %v", err)
215 }
216 // Close stdout, we're done responding
217 os.Stdout.Close()
218
219 // Start second stage which waits for 5 seconds while performing
220 // final cleanup.
221 detachedCmd := exec.Command("/proc/self/exe")
222 detachedCmd.Env = []string{detachedLaunchEnv + "=1"}
223 if err := detachedCmd.Start(); err != nil {
224 log.Fatalf("failed to launch final stage: %v", err)
225 }
226 // Release the second stage so that the first stage can cleanly terminate.
227 if err := detachedCmd.Process.Release(); err != nil {
228 log.Fatalf("error releasing final stage process: %v", err)
229 }
230}