Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 1 | // Copyright 2020 The Monogon Project Authors. |
| 2 | // |
| 3 | // SPDX-License-Identifier: Apache-2.0 |
| 4 | // |
| 5 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | // you may not use this file except in compliance with the License. |
| 7 | // You may obtain a copy of the License at |
| 8 | // |
| 9 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | // |
| 11 | // Unless required by applicable law or agreed to in writing, software |
| 12 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | // See the License for the specific language governing permissions and |
| 15 | // limitations under the License. |
| 16 | |
| 17 | package launch |
| 18 | |
| 19 | import ( |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 20 | "bytes" |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 21 | "context" |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 22 | "crypto/rand" |
| 23 | "errors" |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 24 | "fmt" |
| 25 | "io" |
| 26 | "io/ioutil" |
Leopold Schabel | a013ffa | 2020-06-03 15:09:32 +0200 | [diff] [blame] | 27 | "log" |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 28 | "net" |
| 29 | "os" |
| 30 | "os/exec" |
| 31 | "path/filepath" |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 32 | "strconv" |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 33 | "strings" |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 34 | "syscall" |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 35 | |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 36 | "github.com/golang/protobuf/proto" |
| 37 | "golang.org/x/sys/unix" |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 38 | "google.golang.org/grpc" |
| 39 | |
| 40 | "git.monogon.dev/source/nexantic.git/core/internal/common" |
Serge Bazanski | efdb6e9 | 2020-07-13 17:19:27 +0200 | [diff] [blame^] | 41 | apb "git.monogon.dev/source/nexantic.git/core/proto/api" |
Serge Bazanski | cb883e2 | 2020-07-06 17:47:55 +0200 | [diff] [blame] | 42 | freeport "git.monogon.dev/source/nexantic.git/golibs/common" |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 43 | ) |
| 44 | |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 45 | type qemuValue map[string][]string |
| 46 | |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 47 | // toOption encodes structured data into a QEMU option. |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 48 | // Example: "test", {"key1": {"val1"}, "key2": {"val2", "val3"}} returns "test,key1=val1,key2=val2,key2=val3" |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 49 | func (value qemuValue) toOption(name string) string { |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 50 | var optionValues []string |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 51 | if name != "" { |
| 52 | optionValues = append(optionValues, name) |
| 53 | } |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 54 | for name, values := range value { |
| 55 | if len(values) == 0 { |
| 56 | optionValues = append(optionValues, name) |
| 57 | } |
| 58 | for _, val := range values { |
| 59 | optionValues = append(optionValues, fmt.Sprintf("%v=%v", name, val)) |
| 60 | } |
| 61 | } |
| 62 | return strings.Join(optionValues, ",") |
| 63 | } |
| 64 | |
| 65 | func copyFile(src, dst string) error { |
| 66 | in, err := os.Open(src) |
| 67 | if err != nil { |
| 68 | return err |
| 69 | } |
| 70 | defer in.Close() |
| 71 | |
| 72 | out, err := os.Create(dst) |
| 73 | if err != nil { |
| 74 | return err |
| 75 | } |
| 76 | defer out.Close() |
| 77 | |
| 78 | _, err = io.Copy(out, in) |
| 79 | if err != nil { |
| 80 | return err |
| 81 | } |
| 82 | return out.Close() |
| 83 | } |
| 84 | |
| 85 | // PortMap represents where VM ports are mapped to on the host. It maps from the VM port number to the host port number. |
| 86 | type PortMap map[uint16]uint16 |
| 87 | |
| 88 | // toQemuForwards generates QEMU hostfwd values (https://qemu.weilnetz.de/doc/qemu-doc.html#:~:text=hostfwd=) for all |
| 89 | // mapped ports. |
| 90 | func (p PortMap) toQemuForwards() []string { |
| 91 | var hostfwdOptions []string |
| 92 | for vmPort, hostPort := range p { |
| 93 | hostfwdOptions = append(hostfwdOptions, fmt.Sprintf("tcp::%v-:%v", hostPort, vmPort)) |
| 94 | } |
| 95 | return hostfwdOptions |
| 96 | } |
| 97 | |
| 98 | // DialGRPC creates a gRPC client for a VM port that's forwarded/mapped to the host. The given port is automatically |
| 99 | // resolved to the host-mapped port. |
| 100 | func (p PortMap) DialGRPC(port uint16, opts ...grpc.DialOption) (*grpc.ClientConn, error) { |
| 101 | mappedPort, ok := p[port] |
| 102 | if !ok { |
| 103 | return nil, fmt.Errorf("cannot dial port: port %v is not mapped/forwarded", port) |
| 104 | } |
| 105 | grpcClient, err := grpc.Dial(fmt.Sprintf("localhost:%v", mappedPort), opts...) |
| 106 | if err != nil { |
| 107 | return nil, fmt.Errorf("failed to dial port %v: %w", port, err) |
| 108 | } |
| 109 | return grpcClient, nil |
| 110 | } |
| 111 | |
| 112 | // Options contains all options that can be passed to Launch() |
| 113 | type Options struct { |
| 114 | // Ports contains the port mapping where to expose the internal ports of the VM to the host. See IdentityPortMap() |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 115 | // and ConflictFreePortMap(). Ignored when ConnectToSocket is set. |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 116 | Ports PortMap |
| 117 | |
| 118 | // If set to true, reboots are honored. Otherwise all reboots exit the Launch() command. Smalltown generally restarts |
| 119 | // on almost all errors, so unless you want to test reboot behavior this should be false. |
| 120 | AllowReboot bool |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 121 | |
| 122 | // By default the Smalltown VM is connected to the Host via SLIRP. If ConnectToSocket is set, it is instead connected |
| 123 | // to the given file descriptor/socket. If this is set, all port maps from the Ports option are ignored. |
| 124 | // Intended for networking this instance together with others for running more complex network configurations. |
| 125 | ConnectToSocket *os.File |
| 126 | |
| 127 | // SerialPort is a File(descriptor) over which you can communicate with the serial port of the machine |
| 128 | // It can be set to an existing file descriptor (like os.Stdout/os.Stderr) or you can use NewSocketPair() to get one |
| 129 | // end to talk to from Go. |
| 130 | SerialPort *os.File |
| 131 | |
| 132 | // EnrolmentConfig is passed into the VM and subsequently used for bootstrapping if no enrolment config is built-in |
Serge Bazanski | efdb6e9 | 2020-07-13 17:19:27 +0200 | [diff] [blame^] | 133 | EnrolmentConfig *apb.EnrolmentConfig |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 134 | } |
| 135 | |
| 136 | var requiredPorts = []uint16{common.ConsensusPort, common.NodeServicePort, common.MasterServicePort, |
Lorenz Brun | 70f65b2 | 2020-07-08 17:02:47 +0200 | [diff] [blame] | 137 | common.ExternalServicePort, common.DebugServicePort, common.KubernetesAPIPort, common.DebuggerPort} |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 138 | |
| 139 | // IdentityPortMap returns a port map where each VM port is mapped onto itself on the host. This is mainly useful |
| 140 | // for development against Smalltown. The dbg command requires this mapping. |
| 141 | func IdentityPortMap() PortMap { |
| 142 | portMap := make(PortMap) |
| 143 | for _, port := range requiredPorts { |
| 144 | portMap[port] = port |
| 145 | } |
| 146 | return portMap |
| 147 | } |
| 148 | |
| 149 | // ConflictFreePortMap returns a port map where each VM port is mapped onto a random free port on the host. This is |
| 150 | // intended for automated testing where multiple instances of Smalltown might be running. Please call this function for |
| 151 | // each Launch command separately and as close to it as possible since it cannot guarantee that the ports will remain |
| 152 | // free. |
| 153 | func ConflictFreePortMap() (PortMap, error) { |
| 154 | portMap := make(PortMap) |
| 155 | for _, port := range requiredPorts { |
Serge Bazanski | cb883e2 | 2020-07-06 17:47:55 +0200 | [diff] [blame] | 156 | mappedPort, listenCloser, err := freeport.AllocateTCPPort() |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 157 | if err != nil { |
| 158 | return portMap, fmt.Errorf("failed to get free host port: %w", err) |
| 159 | } |
| 160 | // Defer closing of the listening port until the function is done and all ports are allocated |
| 161 | defer listenCloser.Close() |
| 162 | portMap[port] = mappedPort |
| 163 | } |
| 164 | return portMap, nil |
| 165 | } |
| 166 | |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 167 | // Gets a random EUI-48 Ethernet MAC address |
| 168 | func generateRandomEthernetMAC() (*net.HardwareAddr, error) { |
| 169 | macBuf := make([]byte, 6) |
| 170 | _, err := rand.Read(macBuf) |
| 171 | if err != nil { |
| 172 | return nil, fmt.Errorf("failed to read randomness for MAC: %v", err) |
| 173 | } |
| 174 | |
| 175 | // Set U/L bit and clear I/G bit (locally administered individual MAC) |
| 176 | // Ref IEEE 802-2014 Section 8.2.2 |
| 177 | macBuf[0] = (macBuf[0] | 2) & 0xfe |
| 178 | mac := net.HardwareAddr(macBuf) |
| 179 | return &mac, nil |
| 180 | } |
| 181 | |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 182 | // Launch launches a Smalltown instance with the given options. The instance runs mostly paravirtualized but with some |
| 183 | // emulated hardware similar to how a cloud provider might set up its VMs. The disk is fully writable but is run |
| 184 | // in snapshot mode meaning that changes are not kept beyond a single invocation. |
| 185 | func Launch(ctx context.Context, options Options) error { |
| 186 | // Pin temp directory to /tmp until we can use abstract socket namespace in QEMU (next release after 5.0, |
| 187 | // https://github.com/qemu/qemu/commit/776b97d3605ed0fc94443048fdf988c7725e38a9). swtpm accepts already-open FDs |
| 188 | // so we can pass in an abstract socket namespace FD that we open and pass the name of it to QEMU. Not pinning this |
| 189 | // crashes both swtpm and qemu because we run into UNIX socket length limitations (for legacy reasons 108 chars). |
| 190 | tempDir, err := ioutil.TempDir("/tmp", "launch*") |
| 191 | if err != nil { |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 192 | return fmt.Errorf("failed to create temporary directory: %w", err) |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 193 | } |
| 194 | defer os.RemoveAll(tempDir) |
| 195 | |
| 196 | // Copy TPM state into a temporary directory since it's being modified by the emulator |
| 197 | tpmTargetDir := filepath.Join(tempDir, "tpm") |
| 198 | tpmSrcDir := "core/tpm" |
| 199 | if err := os.Mkdir(tpmTargetDir, 0644); err != nil { |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 200 | return fmt.Errorf("failed to create TPM state directory: %w", err) |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 201 | } |
| 202 | tpmFiles, err := ioutil.ReadDir(tpmSrcDir) |
| 203 | if err != nil { |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 204 | return fmt.Errorf("failed to read TPM directory: %w", err) |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 205 | } |
| 206 | for _, file := range tpmFiles { |
| 207 | name := file.Name() |
| 208 | if err := copyFile(filepath.Join(tpmSrcDir, name), filepath.Join(tpmTargetDir, name)); err != nil { |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 209 | return fmt.Errorf("failed to copy TPM directory: %w", err) |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 210 | } |
| 211 | } |
| 212 | |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 213 | var qemuNetType string |
| 214 | var qemuNetConfig qemuValue |
| 215 | if options.ConnectToSocket != nil { |
| 216 | qemuNetType = "socket" |
| 217 | qemuNetConfig = qemuValue{ |
| 218 | "id": {"net0"}, |
| 219 | "fd": {"3"}, |
| 220 | } |
| 221 | } else { |
| 222 | qemuNetType = "user" |
| 223 | qemuNetConfig = qemuValue{ |
| 224 | "id": {"net0"}, |
| 225 | "net": {"10.42.0.0/24"}, |
| 226 | "dhcpstart": {"10.42.0.10"}, |
| 227 | "hostfwd": options.Ports.toQemuForwards(), |
| 228 | } |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 229 | } |
| 230 | |
| 231 | tpmSocketPath := filepath.Join(tempDir, "tpm-socket") |
| 232 | |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 233 | mac, err := generateRandomEthernetMAC() |
| 234 | if err != nil { |
| 235 | return err |
| 236 | } |
| 237 | |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 238 | qemuArgs := []string{"-machine", "q35", "-accel", "kvm", "-nographic", "-nodefaults", "-m", "2048", |
| 239 | "-cpu", "host", "-smp", "sockets=1,cpus=1,cores=2,threads=2,maxcpus=4", |
| 240 | "-drive", "if=pflash,format=raw,readonly,file=external/edk2/OVMF_CODE.fd", |
| 241 | "-drive", "if=pflash,format=raw,snapshot=on,file=external/edk2/OVMF_VARS.fd", |
| 242 | "-drive", "if=virtio,format=raw,snapshot=on,cache=unsafe,file=core/smalltown.img", |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 243 | "-netdev", qemuNetConfig.toOption(qemuNetType), |
| 244 | "-device", "virtio-net-pci,netdev=net0,mac=" + mac.String(), |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 245 | "-chardev", "socket,id=chrtpm,path=" + tpmSocketPath, |
| 246 | "-tpmdev", "emulator,id=tpm0,chardev=chrtpm", |
| 247 | "-device", "tpm-tis,tpmdev=tpm0", |
| 248 | "-device", "virtio-rng-pci", |
| 249 | "-serial", "stdio"} |
| 250 | |
| 251 | if !options.AllowReboot { |
| 252 | qemuArgs = append(qemuArgs, "-no-reboot") |
| 253 | } |
| 254 | |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 255 | if options.EnrolmentConfig != nil { |
| 256 | enrolmentConfigPath := filepath.Join(tempDir, "enrolment.pb") |
| 257 | enrolmentConfigRaw, err := proto.Marshal(options.EnrolmentConfig) |
| 258 | if err != nil { |
| 259 | return fmt.Errorf("failed to encode enrolment config: %w", err) |
| 260 | } |
| 261 | if err := ioutil.WriteFile(enrolmentConfigPath, enrolmentConfigRaw, 0644); err != nil { |
| 262 | return fmt.Errorf("failed to write enrolment config: %w", err) |
| 263 | } |
| 264 | qemuArgs = append(qemuArgs, "-fw_cfg", "name=com.nexantic.smalltown/enrolment.pb,file="+enrolmentConfigPath) |
| 265 | } |
| 266 | |
Leopold Schabel | a013ffa | 2020-06-03 15:09:32 +0200 | [diff] [blame] | 267 | // Start TPM emulator as a subprocess |
| 268 | tpmCtx, tpmCancel := context.WithCancel(ctx) |
| 269 | defer tpmCancel() |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 270 | |
Leopold Schabel | a013ffa | 2020-06-03 15:09:32 +0200 | [diff] [blame] | 271 | tpmEmuCmd := exec.CommandContext(tpmCtx, "swtpm", "socket", "--tpm2", "--tpmstate", "dir="+tpmTargetDir, "--ctrl", "type=unixio,path="+tpmSocketPath) |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 272 | tpmEmuCmd.Stderr = os.Stderr |
| 273 | tpmEmuCmd.Stdout = os.Stdout |
Leopold Schabel | a013ffa | 2020-06-03 15:09:32 +0200 | [diff] [blame] | 274 | |
| 275 | err = tpmEmuCmd.Start() |
| 276 | if err != nil { |
| 277 | return fmt.Errorf("failed to start TPM emulator: %w", err) |
| 278 | } |
| 279 | |
| 280 | // Start the main qemu binary |
| 281 | systemCmd := exec.CommandContext(ctx, "qemu-system-x86_64", qemuArgs...) |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 282 | if options.ConnectToSocket != nil { |
| 283 | systemCmd.ExtraFiles = []*os.File{options.ConnectToSocket} |
| 284 | } |
| 285 | |
| 286 | var stdErrBuf bytes.Buffer |
| 287 | systemCmd.Stderr = &stdErrBuf |
| 288 | systemCmd.Stdout = options.SerialPort |
Leopold Schabel | a013ffa | 2020-06-03 15:09:32 +0200 | [diff] [blame] | 289 | |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 290 | err = systemCmd.Run() |
Leopold Schabel | a013ffa | 2020-06-03 15:09:32 +0200 | [diff] [blame] | 291 | |
| 292 | // Stop TPM emulator and wait for it to exit to properly reap the child process |
| 293 | tpmCancel() |
| 294 | log.Print("Waiting for TPM emulator to exit") |
| 295 | // Wait returns a SIGKILL error because we just cancelled its context. |
| 296 | // We still need to call it to avoid creating zombies. |
| 297 | _ = tpmEmuCmd.Wait() |
| 298 | |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 299 | var exerr *exec.ExitError |
| 300 | if err != nil && errors.As(err, &exerr) { |
| 301 | status := exerr.ProcessState.Sys().(syscall.WaitStatus) |
| 302 | if status.Signaled() && status.Signal() == syscall.SIGKILL { |
| 303 | // Process was killed externally (most likely by our context being canceled). |
| 304 | // This is a normal exit for us, so return nil |
| 305 | return nil |
| 306 | } |
| 307 | exerr.Stderr = stdErrBuf.Bytes() |
| 308 | newErr := QEMUError(*exerr) |
| 309 | return &newErr |
| 310 | } |
| 311 | return err |
| 312 | } |
| 313 | |
| 314 | // NewSocketPair creates a new socket pair. By connecting both ends to different instances you can connect them |
| 315 | // with a virtual "network cable". The ends can be passed into the ConnectToSocket option. |
| 316 | func NewSocketPair() (*os.File, *os.File, error) { |
| 317 | fds, err := unix.Socketpair(unix.AF_UNIX, syscall.SOCK_STREAM, 0) |
| 318 | if err != nil { |
| 319 | return nil, nil, fmt.Errorf("failed to call socketpair: %w", err) |
| 320 | } |
| 321 | |
| 322 | fd1 := os.NewFile(uintptr(fds[0]), "network0") |
| 323 | fd2 := os.NewFile(uintptr(fds[1]), "network1") |
| 324 | return fd1, fd2, nil |
| 325 | } |
| 326 | |
| 327 | // HostInterfaceMAC is the MAC address the host SLIRP network interface has if it is not disabled (see |
| 328 | // DisableHostNetworkInterface in MicroVMOptions) |
| 329 | var HostInterfaceMAC = net.HardwareAddr{0x02, 0x72, 0x82, 0xbf, 0xc3, 0x56} |
| 330 | |
| 331 | // MicroVMOptions contains all options to start a MicroVM |
| 332 | type MicroVMOptions struct { |
| 333 | // Path to the ELF kernel binary |
| 334 | KernelPath string |
| 335 | |
| 336 | // Path to the Initramfs |
| 337 | InitramfsPath string |
| 338 | |
| 339 | // Cmdline contains additional kernel commandline options |
| 340 | Cmdline string |
| 341 | |
| 342 | // SerialPort is a File(descriptor) over which you can communicate with the serial port of the machine |
| 343 | // It can be set to an existing file descriptor (like os.Stdout/os.Stderr) or you can use NewSocketPair() to get one |
| 344 | // end to talk to from Go. |
| 345 | SerialPort *os.File |
| 346 | |
| 347 | // ExtraChardevs can be used similar to SerialPort, but can contain an arbitrary number of additional serial ports |
| 348 | ExtraChardevs []*os.File |
| 349 | |
| 350 | // ExtraNetworkInterfaces can contain an arbitrary number of file descriptors which are mapped into the VM as virtio |
| 351 | // network interfaces. The first interface is always a SLIRP-backed interface for communicating with the host. |
| 352 | ExtraNetworkInterfaces []*os.File |
| 353 | |
| 354 | // PortMap contains ports that are mapped to the host through the built-in SLIRP network interface. |
| 355 | PortMap PortMap |
| 356 | |
| 357 | // DisableHostNetworkInterface disables the SLIRP-backed host network interface that is normally the first network |
| 358 | // interface. If this is set PortMap is ignored. Mostly useful for speeding up QEMU's startup time for tests. |
| 359 | DisableHostNetworkInterface bool |
| 360 | } |
| 361 | |
| 362 | // RunMicroVM launches a tiny VM mostly intended for testing. Very quick to boot (<40ms). |
| 363 | func RunMicroVM(ctx context.Context, opts *MicroVMOptions) error { |
| 364 | // Generate options for all the file descriptors we'll be passing as virtio "serial ports" |
| 365 | var extraArgs []string |
| 366 | for idx, _ := range opts.ExtraChardevs { |
| 367 | idxStr := strconv.Itoa(idx) |
| 368 | id := "extra" + idxStr |
| 369 | // That this works is pretty much a hack, but upstream QEMU doesn't have a bidirectional chardev backend not |
| 370 | // based around files/sockets on the disk which are a giant pain to work with. |
| 371 | // We're using QEMU's fdset functionality to make FDs available as pseudo-files and then "ab"using the pipe |
| 372 | // backend's fallback functionality to get a single bidirectional chardev backend backed by a passed-down |
| 373 | // RDWR fd. |
| 374 | // Ref https://lists.gnu.org/archive/html/qemu-devel/2015-12/msg01256.html |
| 375 | addFdConf := qemuValue{ |
| 376 | "set": {idxStr}, |
| 377 | "fd": {strconv.Itoa(idx + 3)}, |
| 378 | } |
| 379 | chardevConf := qemuValue{ |
| 380 | "id": {id}, |
| 381 | "path": {"/dev/fdset/" + idxStr}, |
| 382 | } |
| 383 | deviceConf := qemuValue{ |
| 384 | "chardev": {id}, |
| 385 | } |
| 386 | extraArgs = append(extraArgs, "-add-fd", addFdConf.toOption(""), |
| 387 | "-chardev", chardevConf.toOption("pipe"), "-device", deviceConf.toOption("virtserialport")) |
| 388 | } |
| 389 | |
| 390 | for idx, _ := range opts.ExtraNetworkInterfaces { |
| 391 | id := fmt.Sprintf("net%v", idx) |
| 392 | netdevConf := qemuValue{ |
| 393 | "id": {id}, |
| 394 | "fd": {strconv.Itoa(idx + 3 + len(opts.ExtraChardevs))}, |
| 395 | } |
| 396 | extraArgs = append(extraArgs, "-netdev", netdevConf.toOption("socket"), "-device", "virtio-net-device,netdev="+id) |
| 397 | } |
| 398 | |
| 399 | // This sets up a minimum viable environment for our Linux kernel. |
| 400 | // It clears all standard QEMU configuration and sets up a MicroVM machine |
| 401 | // (https://github.com/qemu/qemu/blob/master/docs/microvm.rst) with all legacy emulation turned off. This means |
| 402 | // the only "hardware" the Linux kernel inside can communicate with is a single virtio-mmio region. Over that MMIO |
| 403 | // interface we run a paravirtualized RNG (since the kernel in there has nothing to gather that from and it |
| 404 | // delays booting), a single paravirtualized console and an arbitrary number of extra serial ports for talking to |
| 405 | // various things that might run inside. The kernel, initramfs and command line are mapped into VM memory at boot |
| 406 | // time and not loaded from any sort of disk. Booting and shutting off one of these VMs takes <100ms. |
| 407 | baseArgs := []string{"-nodefaults", "-no-user-config", "-nographic", "-no-reboot", |
| 408 | "-accel", "kvm", "-cpu", "host", |
| 409 | // Needed until QEMU updates their bundled qboot version (needs https://github.com/bonzini/qboot/pull/28) |
| 410 | "-bios", "external/com_github_bonzini_qboot/bios.bin", |
| 411 | "-M", "microvm,x-option-roms=off,pic=off,pit=off,rtc=off,isa-serial=off", |
| 412 | "-kernel", opts.KernelPath, |
| 413 | // We force using a triple-fault reboot strategy since otherwise the kernel first tries others (like ACPI) which |
| 414 | // are not available in this very restricted environment. Similarly we need to override the boot console since |
| 415 | // there's nothing on the ISA bus that the kernel could talk to. We also force quiet for performance reasons. |
| 416 | "-append", "reboot=t console=hvc0 quiet " + opts.Cmdline, |
| 417 | "-initrd", opts.InitramfsPath, |
| 418 | "-device", "virtio-rng-device,max-bytes=1024,period=1000", |
| 419 | "-device", "virtio-serial-device,max_ports=16", |
| 420 | "-chardev", "stdio,id=con0", "-device", "virtconsole,chardev=con0", |
| 421 | } |
| 422 | |
| 423 | if !opts.DisableHostNetworkInterface { |
| 424 | qemuNetType := "user" |
| 425 | qemuNetConfig := qemuValue{ |
| 426 | "id": {"usernet0"}, |
| 427 | "net": {"10.42.0.0/24"}, |
| 428 | "dhcpstart": {"10.42.0.10"}, |
| 429 | } |
| 430 | if opts.PortMap != nil { |
| 431 | qemuNetConfig["hostfwd"] = opts.PortMap.toQemuForwards() |
| 432 | } |
| 433 | |
| 434 | baseArgs = append(baseArgs, "-netdev", qemuNetConfig.toOption(qemuNetType), |
| 435 | "-device", "virtio-net-device,netdev=usernet0,mac="+HostInterfaceMAC.String()) |
| 436 | } |
| 437 | |
| 438 | var stdErrBuf bytes.Buffer |
| 439 | cmd := exec.CommandContext(ctx, "qemu-system-x86_64", append(baseArgs, extraArgs...)...) |
| 440 | cmd.Stdout = opts.SerialPort |
| 441 | cmd.Stderr = &stdErrBuf |
| 442 | |
| 443 | cmd.ExtraFiles = append(cmd.ExtraFiles, opts.ExtraChardevs...) |
| 444 | cmd.ExtraFiles = append(cmd.ExtraFiles, opts.ExtraNetworkInterfaces...) |
| 445 | |
| 446 | err := cmd.Run() |
| 447 | var exerr *exec.ExitError |
| 448 | if err != nil && errors.As(err, &exerr) { |
| 449 | exerr.Stderr = stdErrBuf.Bytes() |
| 450 | newErr := QEMUError(*exerr) |
| 451 | return &newErr |
| 452 | } |
| 453 | return err |
| 454 | } |
| 455 | |
| 456 | // QEMUError is a special type of ExitError used when QEMU fails. In addition to normal ExitError features it |
| 457 | // prints stderr for debugging. |
| 458 | type QEMUError exec.ExitError |
| 459 | |
| 460 | func (e *QEMUError) Error() string { |
| 461 | return fmt.Sprintf("%v: %v", e.String(), string(e.Stderr)) |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 462 | } |