blob: 9de39f5209952e08487725fa2ded0517eb29f08a [file] [log] [blame]
Lorenz Brun7922d412023-02-21 20:47:39 +01001// takeover is a self-contained executable which when executed loads the BMaaS
2// agent via kexec. It is intended to be called over SSH, given a binary
3// TakeoverInit message over standard input and (if all preparation work
4// completed successfully) will respond with a TakeoverResponse on standard
5// output. At that point the new kernel and agent initramfs are fully staged
6// by the current kernel.
7// The second stage which is also part of this binary, selected by an
8// environment variable, is then executed in detached mode and the main
9// takeover binary called over SSH terminates.
10// The second stage waits for 5 seconds for the main binary to exit, the SSH
11// session to be torn down and various other things before issuing the final
12// non-returning syscall which jumps into the new kernel.
13
14package main
15
16import (
17 "bytes"
18 "crypto/ed25519"
19 "crypto/rand"
20 _ "embed"
21 "errors"
22 "fmt"
23 "io"
24 "log"
25 "os"
26 "os/exec"
27 "time"
28
29 "github.com/cavaliergopher/cpio"
30 "github.com/pierrec/lz4/v4"
31 "golang.org/x/sys/unix"
32 "google.golang.org/protobuf/proto"
33
34 "source.monogon.dev/cloud/agent/api"
35 "source.monogon.dev/metropolis/pkg/bootparam"
36 "source.monogon.dev/metropolis/pkg/kexec"
37 netdump "source.monogon.dev/net/dump"
Lorenz Brund0be3712023-04-11 13:22:25 +020038 netapi "source.monogon.dev/net/proto"
Lorenz Brun7922d412023-02-21 20:47:39 +010039)
40
41//go:embed third_party/linux/bzImage
42var kernel []byte
43
44//go:embed ucode.cpio
45var ucode []byte
46
47//go:embed cloud/agent/initramfs.cpio.lz4
48var initramfs []byte
49
50// newMemfile creates a new file which is not located on a specific filesystem,
51// but is instead backed by anonymous memory.
52func newMemfile(name string, flags int) (*os.File, error) {
53 fd, err := unix.MemfdCreate(name, flags)
54 if err != nil {
55 return nil, fmt.Errorf("memfd_create failed: %w", err)
56 }
57 return os.NewFile(uintptr(fd), name), nil
58}
59
60func setupTakeover() (*api.TakeoverSuccess, error) {
61 // Read init specification from stdin.
62 initRaw, err := io.ReadAll(os.Stdin)
63 if err != nil {
64 return nil, fmt.Errorf("failed to read TakeoverInit message from stdin: %w", err)
65 }
66 var takeoverInit api.TakeoverInit
67 if err := proto.Unmarshal(initRaw, &takeoverInit); err != nil {
68 return nil, fmt.Errorf("failed to parse TakeoverInit messag from stdin: %w", err)
69 }
70
71 // Sanity check for empty TakeoverInit messages
72 if takeoverInit.BmaasEndpoint == "" {
73 return nil, errors.New("BMaaS endpoint is empty, check that a proper TakeoverInit message has been provided")
74 }
75
76 // Load data from embedded files into memfiles as the kexec load syscall
77 // requires file descriptors.
78 kernelFile, err := newMemfile("kernel", 0)
79 if err != nil {
80 return nil, fmt.Errorf("failed to create kernel memfile: %w", err)
81 }
82 initramfsFile, err := newMemfile("initramfs", 0)
83 if err != nil {
84 return nil, fmt.Errorf("failed to create initramfs memfile: %w", err)
85 }
86 if _, err := kernelFile.ReadFrom(bytes.NewReader(kernel)); err != nil {
87 return nil, fmt.Errorf("failed to read kernel into memory-backed file: %w", err)
88 }
89 if _, err := initramfsFile.ReadFrom(bytes.NewReader(ucode)); err != nil {
90 return nil, fmt.Errorf("failed to read ucode into memory-backed file: %w", err)
91 }
92 if _, err := initramfsFile.ReadFrom(bytes.NewReader(initramfs)); err != nil {
93 return nil, fmt.Errorf("failed to read initramfs into memory-backed file: %w", err)
94 }
95
96 // Dump the current network configuration
97 netconf, warnings, err := netdump.Dump()
98 if err != nil {
99 return nil, fmt.Errorf("failed to dump network configuration: %w", err)
100 }
101
Lorenz Brund0be3712023-04-11 13:22:25 +0200102 if len(netconf.Nameserver) == 0 {
103 netconf.Nameserver = []*netapi.Nameserver{{
104 Ip: "8.8.8.8",
105 }, {
106 Ip: "1.1.1.1",
107 }}
108 }
109
Lorenz Brun7922d412023-02-21 20:47:39 +0100110 // Generate agent private key
111 pubKey, privKey, err := ed25519.GenerateKey(rand.Reader)
112 if err != nil {
113 return nil, fmt.Errorf("unable to generate Ed25519 key: %w", err)
114 }
115
116 agentInit := api.AgentInit{
117 TakeoverInit: &takeoverInit,
118 PrivateKey: privKey,
119 NetworkConfig: netconf,
120 }
121 agentInitRaw, err := proto.Marshal(&agentInit)
122 if err != nil {
123 return nil, fmt.Errorf("unable to marshal AgentInit message: %v", err)
124 }
125
126 // Append AgentInit spec to initramfs
127 compressedOut := lz4.NewWriter(initramfsFile)
128 compressedOut.Apply(lz4.LegacyOption(true))
129 cpioW := cpio.NewWriter(compressedOut)
130 cpioW.WriteHeader(&cpio.Header{
131 Name: "/init.pb",
132 Size: int64(len(agentInitRaw)),
133 Mode: cpio.TypeReg | 0o644,
134 })
135 cpioW.Write(agentInitRaw)
136 cpioW.Close()
137 compressedOut.Close()
138
139 agentParams := bootparam.Params{
140 bootparam.Param{Param: "quiet"},
141 bootparam.Param{Param: "init", Value: "/init"},
Lorenz Brun7922d412023-02-21 20:47:39 +0100142 }
143
Lorenz Brun5d503b32023-04-11 13:20:23 +0200144 var customConsoles bool
Lorenz Brun7922d412023-02-21 20:47:39 +0100145 cmdline, err := os.ReadFile("/proc/cmdline")
146 if err != nil {
147 warnings = append(warnings, fmt.Errorf("unable to read current kernel command line: %w", err))
148 } else {
149 params, _, err := bootparam.Unmarshal(string(cmdline))
150 // If the existing command line is well-formed, add all existing console
151 // parameters to the console for the agent
152 if err == nil {
153 for _, p := range params {
154 if p.Param == "console" {
155 agentParams = append(agentParams, p)
Lorenz Brun5d503b32023-04-11 13:20:23 +0200156 customConsoles = true
Lorenz Brun7922d412023-02-21 20:47:39 +0100157 }
158 }
159 }
160 }
Lorenz Brun5d503b32023-04-11 13:20:23 +0200161 if !customConsoles {
162 // Add the "default" console on x86
163 agentParams = append(agentParams, bootparam.Param{Param: "console", Value: "ttyS0,115200"})
164 }
Lorenz Brun7922d412023-02-21 20:47:39 +0100165 agentCmdline, err := bootparam.Marshal(agentParams, "")
166 // Stage agent payload into kernel memory
167 if err := kexec.FileLoad(kernelFile, initramfsFile, agentCmdline); err != nil {
168 return nil, fmt.Errorf("failed to load kexec payload: %w", err)
169 }
170 var warningsStrs []string
171 for _, w := range warnings {
172 warningsStrs = append(warningsStrs, w.Error())
173 }
174 return &api.TakeoverSuccess{
175 InitMessage: &takeoverInit,
176 Key: pubKey,
177 Warning: warningsStrs,
178 }, nil
179}
180
181// Environment variable which tells the takeover binary to run the second stage
182const detachedLaunchEnv = "TAKEOVER_DETACHED_LAUNCH"
183
184func main() {
185 // Check if the second stage should be executed
186 if os.Getenv(detachedLaunchEnv) == "1" {
187 // Wait 5 seconds for data to be sent, connections to be closed and
188 // syncs to be executed
189 time.Sleep(5 * time.Second)
190 // Perform kexec, this will not return unless it fails
191 err := unix.Reboot(unix.LINUX_REBOOT_CMD_KEXEC)
192 var msg string = "takeover: reboot succeeded, but we're still runing??"
193 if err != nil {
194 msg = err.Error()
195 }
196 // We have no standard output/error anymore, if this fails it's
197 // just borked. Attempt to dump the error into kmesg for manual
198 // debugging.
199 kmsg, err := os.OpenFile("/dev/kmsg", os.O_WRONLY, 0)
200 if err != nil {
201 os.Exit(2)
202 }
203 kmsg.WriteString(msg)
204 kmsg.Close()
205 os.Exit(1)
206 }
207
208 var takeoverResp api.TakeoverResponse
209 res, err := setupTakeover()
210 if err != nil {
211 takeoverResp.Result = &api.TakeoverResponse_Error{Error: &api.TakeoverError{
212 Message: err.Error(),
213 }}
214 } else {
215 takeoverResp.Result = &api.TakeoverResponse_Success{Success: res}
216 }
217 // Respond to stdout
218 takeoverRespRaw, err := proto.Marshal(&takeoverResp)
219 if err != nil {
220 log.Fatalf("failed to marshal response: %v", err)
221 }
222 if _, err := os.Stdout.Write(takeoverRespRaw); err != nil {
223 log.Fatalf("failed to write response to stdout: %v", err)
224 }
225 // Close stdout, we're done responding
226 os.Stdout.Close()
227
228 // Start second stage which waits for 5 seconds while performing
229 // final cleanup.
230 detachedCmd := exec.Command("/proc/self/exe")
231 detachedCmd.Env = []string{detachedLaunchEnv + "=1"}
232 if err := detachedCmd.Start(); err != nil {
233 log.Fatalf("failed to launch final stage: %v", err)
234 }
235 // Release the second stage so that the first stage can cleanly terminate.
236 if err := detachedCmd.Process.Release(); err != nil {
237 log.Fatalf("error releasing final stage process: %v", err)
238 }
239}