Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 1 | // Copyright 2020 The Monogon Project Authors. |
| 2 | // |
| 3 | // SPDX-License-Identifier: Apache-2.0 |
| 4 | // |
| 5 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | // you may not use this file except in compliance with the License. |
| 7 | // You may obtain a copy of the License at |
| 8 | // |
| 9 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | // |
| 11 | // Unless required by applicable law or agreed to in writing, software |
| 12 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | // See the License for the specific language governing permissions and |
| 15 | // limitations under the License. |
| 16 | |
Serge Bazanski | 66e5895 | 2021-10-05 17:06:56 +0200 | [diff] [blame] | 17 | // launch implements test harnesses for running qemu VMs from tests. |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 18 | package launch |
| 19 | |
| 20 | import ( |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 21 | "bytes" |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 22 | "context" |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 23 | "errors" |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 24 | "fmt" |
Lorenz Brun | 942f5e2 | 2022-01-27 15:03:10 +0100 | [diff] [blame] | 25 | "io" |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 26 | "net" |
| 27 | "os" |
| 28 | "os/exec" |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 29 | "strconv" |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 30 | "strings" |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 31 | "syscall" |
Lorenz Brun | ed0503c | 2020-07-28 17:21:25 +0200 | [diff] [blame] | 32 | |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 33 | "golang.org/x/sys/unix" |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 34 | |
Serge Bazanski | 31370b0 | 2021-01-07 16:31:14 +0100 | [diff] [blame] | 35 | "source.monogon.dev/metropolis/pkg/freeport" |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 36 | ) |
| 37 | |
Serge Bazanski | 66e5895 | 2021-10-05 17:06:56 +0200 | [diff] [blame] | 38 | type QemuValue map[string][]string |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 39 | |
Serge Bazanski | 66e5895 | 2021-10-05 17:06:56 +0200 | [diff] [blame] | 40 | // ToOption encodes structured data into a QEMU option. Example: "test", {"key1": |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 41 | // {"val1"}, "key2": {"val2", "val3"}} returns "test,key1=val1,key2=val2,key2=val3" |
Serge Bazanski | 66e5895 | 2021-10-05 17:06:56 +0200 | [diff] [blame] | 42 | func (value QemuValue) ToOption(name string) string { |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 43 | var optionValues []string |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 44 | if name != "" { |
| 45 | optionValues = append(optionValues, name) |
| 46 | } |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 47 | for name, values := range value { |
| 48 | if len(values) == 0 { |
| 49 | optionValues = append(optionValues, name) |
| 50 | } |
| 51 | for _, val := range values { |
| 52 | optionValues = append(optionValues, fmt.Sprintf("%v=%v", name, val)) |
| 53 | } |
| 54 | } |
| 55 | return strings.Join(optionValues, ",") |
| 56 | } |
| 57 | |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 58 | // PortMap represents where VM ports are mapped to on the host. It maps from the VM |
| 59 | // port number to the host port number. |
Serge Bazanski | be74284 | 2022-04-04 13:18:50 +0200 | [diff] [blame^] | 60 | type PortMap map[uint16]uint16 |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 61 | |
Serge Bazanski | 66e5895 | 2021-10-05 17:06:56 +0200 | [diff] [blame] | 62 | // ToQemuForwards generates QEMU hostfwd values (https://qemu.weilnetz.de/doc/qemu- |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 63 | // doc.html#:~:text=hostfwd=) for all mapped ports. |
Serge Bazanski | 66e5895 | 2021-10-05 17:06:56 +0200 | [diff] [blame] | 64 | func (p PortMap) ToQemuForwards() []string { |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 65 | var hostfwdOptions []string |
| 66 | for vmPort, hostPort := range p { |
Serge Bazanski | 52304a8 | 2021-10-29 16:56:18 +0200 | [diff] [blame] | 67 | hostfwdOptions = append(hostfwdOptions, fmt.Sprintf("tcp::%d-:%d", hostPort, vmPort)) |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 68 | } |
| 69 | return hostfwdOptions |
| 70 | } |
| 71 | |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 72 | // IdentityPortMap returns a port map where each given port is mapped onto itself |
| 73 | // on the host. This is mainly useful for development against Metropolis. The dbg |
| 74 | // command requires this mapping. |
Serge Bazanski | be74284 | 2022-04-04 13:18:50 +0200 | [diff] [blame^] | 75 | func IdentityPortMap(ports []uint16) PortMap { |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 76 | portMap := make(PortMap) |
Lorenz Brun | ed0503c | 2020-07-28 17:21:25 +0200 | [diff] [blame] | 77 | for _, port := range ports { |
Serge Bazanski | 52304a8 | 2021-10-29 16:56:18 +0200 | [diff] [blame] | 78 | portMap[port] = uint16(port) |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 79 | } |
| 80 | return portMap |
| 81 | } |
| 82 | |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 83 | // ConflictFreePortMap returns a port map where each given port is mapped onto a |
| 84 | // random free port on the host. This is intended for automated testing where |
| 85 | // multiple instances of Metropolis nodes might be running. Please call this |
| 86 | // function for each Launch command separately and as close to it as possible since |
| 87 | // it cannot guarantee that the ports will remain free. |
Serge Bazanski | be74284 | 2022-04-04 13:18:50 +0200 | [diff] [blame^] | 88 | func ConflictFreePortMap(ports []uint16) (PortMap, error) { |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 89 | portMap := make(PortMap) |
Lorenz Brun | ed0503c | 2020-07-28 17:21:25 +0200 | [diff] [blame] | 90 | for _, port := range ports { |
Serge Bazanski | cb883e2 | 2020-07-06 17:47:55 +0200 | [diff] [blame] | 91 | mappedPort, listenCloser, err := freeport.AllocateTCPPort() |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 92 | if err != nil { |
| 93 | return portMap, fmt.Errorf("failed to get free host port: %w", err) |
| 94 | } |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 95 | // Defer closing of the listening port until the function is done and all ports are |
| 96 | // allocated |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 97 | defer listenCloser.Close() |
| 98 | portMap[port] = mappedPort |
| 99 | } |
| 100 | return portMap, nil |
| 101 | } |
| 102 | |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 103 | // NewSocketPair creates a new socket pair. By connecting both ends to different |
| 104 | // instances you can connect them with a virtual "network cable". The ends can be |
| 105 | // passed into the ConnectToSocket option. |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 106 | func NewSocketPair() (*os.File, *os.File, error) { |
| 107 | fds, err := unix.Socketpair(unix.AF_UNIX, syscall.SOCK_STREAM, 0) |
| 108 | if err != nil { |
| 109 | return nil, nil, fmt.Errorf("failed to call socketpair: %w", err) |
| 110 | } |
| 111 | |
| 112 | fd1 := os.NewFile(uintptr(fds[0]), "network0") |
| 113 | fd2 := os.NewFile(uintptr(fds[1]), "network1") |
| 114 | return fd1, fd2, nil |
| 115 | } |
| 116 | |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 117 | // HostInterfaceMAC is the MAC address the host SLIRP network interface has if it |
| 118 | // is not disabled (see DisableHostNetworkInterface in MicroVMOptions) |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 119 | var HostInterfaceMAC = net.HardwareAddr{0x02, 0x72, 0x82, 0xbf, 0xc3, 0x56} |
| 120 | |
| 121 | // MicroVMOptions contains all options to start a MicroVM |
| 122 | type MicroVMOptions struct { |
| 123 | // Path to the ELF kernel binary |
| 124 | KernelPath string |
| 125 | |
| 126 | // Path to the Initramfs |
| 127 | InitramfsPath string |
| 128 | |
| 129 | // Cmdline contains additional kernel commandline options |
| 130 | Cmdline string |
| 131 | |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 132 | // SerialPort is a File(descriptor) over which you can communicate with the serial |
| 133 | // port of the machine It can be set to an existing file descriptor (like |
| 134 | // os.Stdout/os.Stderr) or you can use NewSocketPair() to get one end to talk to |
| 135 | // from Go. |
Lorenz Brun | 942f5e2 | 2022-01-27 15:03:10 +0100 | [diff] [blame] | 136 | SerialPort io.Writer |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 137 | |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 138 | // ExtraChardevs can be used similar to SerialPort, but can contain an arbitrary |
| 139 | // number of additional serial ports |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 140 | ExtraChardevs []*os.File |
| 141 | |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 142 | // ExtraNetworkInterfaces can contain an arbitrary number of file descriptors which |
| 143 | // are mapped into the VM as virtio network interfaces. The first interface is |
| 144 | // always a SLIRP-backed interface for communicating with the host. |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 145 | ExtraNetworkInterfaces []*os.File |
| 146 | |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 147 | // PortMap contains ports that are mapped to the host through the built-in SLIRP |
| 148 | // network interface. |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 149 | PortMap PortMap |
| 150 | |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 151 | // DisableHostNetworkInterface disables the SLIRP-backed host network interface |
| 152 | // that is normally the first network interface. If this is set PortMap is ignored. |
| 153 | // Mostly useful for speeding up QEMU's startup time for tests. |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 154 | DisableHostNetworkInterface bool |
| 155 | } |
| 156 | |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 157 | // RunMicroVM launches a tiny VM mostly intended for testing. Very quick to boot |
| 158 | // (<40ms). |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 159 | func RunMicroVM(ctx context.Context, opts *MicroVMOptions) error { |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 160 | // Generate options for all the file descriptors we'll be passing as virtio "serial |
| 161 | // ports" |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 162 | var extraArgs []string |
| 163 | for idx, _ := range opts.ExtraChardevs { |
| 164 | idxStr := strconv.Itoa(idx) |
| 165 | id := "extra" + idxStr |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 166 | // That this works is pretty much a hack, but upstream QEMU doesn't have a |
| 167 | // bidirectional chardev backend not based around files/sockets on the disk which |
| 168 | // are a giant pain to work with. We're using QEMU's fdset functionality to make |
| 169 | // FDs available as pseudo-files and then "ab"using the pipe backend's fallback |
| 170 | // functionality to get a single bidirectional chardev backend backed by a passed- |
| 171 | // down RDWR fd. Ref https://lists.gnu.org/archive/html/qemu-devel/2015- |
| 172 | // 12/msg01256.html |
Serge Bazanski | 66e5895 | 2021-10-05 17:06:56 +0200 | [diff] [blame] | 173 | addFdConf := QemuValue{ |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 174 | "set": {idxStr}, |
| 175 | "fd": {strconv.Itoa(idx + 3)}, |
| 176 | } |
Serge Bazanski | 66e5895 | 2021-10-05 17:06:56 +0200 | [diff] [blame] | 177 | chardevConf := QemuValue{ |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 178 | "id": {id}, |
| 179 | "path": {"/dev/fdset/" + idxStr}, |
| 180 | } |
Serge Bazanski | 66e5895 | 2021-10-05 17:06:56 +0200 | [diff] [blame] | 181 | deviceConf := QemuValue{ |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 182 | "chardev": {id}, |
| 183 | } |
Serge Bazanski | 66e5895 | 2021-10-05 17:06:56 +0200 | [diff] [blame] | 184 | extraArgs = append(extraArgs, "-add-fd", addFdConf.ToOption(""), |
| 185 | "-chardev", chardevConf.ToOption("pipe"), "-device", deviceConf.ToOption("virtserialport")) |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 186 | } |
| 187 | |
| 188 | for idx, _ := range opts.ExtraNetworkInterfaces { |
| 189 | id := fmt.Sprintf("net%v", idx) |
Serge Bazanski | 66e5895 | 2021-10-05 17:06:56 +0200 | [diff] [blame] | 190 | netdevConf := QemuValue{ |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 191 | "id": {id}, |
| 192 | "fd": {strconv.Itoa(idx + 3 + len(opts.ExtraChardevs))}, |
| 193 | } |
Serge Bazanski | 66e5895 | 2021-10-05 17:06:56 +0200 | [diff] [blame] | 194 | extraArgs = append(extraArgs, "-netdev", netdevConf.ToOption("socket"), "-device", "virtio-net-device,netdev="+id) |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 195 | } |
| 196 | |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 197 | // This sets up a minimum viable environment for our Linux kernel. It clears all |
| 198 | // standard QEMU configuration and sets up a MicroVM machine |
| 199 | // (https://github.com/qemu/qemu/blob/master/docs/microvm.rst) with all legacy |
| 200 | // emulation turned off. This means the only "hardware" the Linux kernel inside can |
| 201 | // communicate with is a single virtio-mmio region. Over that MMIO interface we run |
| 202 | // a paravirtualized RNG (since the kernel in there has nothing to gather that from |
| 203 | // and it delays booting), a single paravirtualized console and an arbitrary number |
| 204 | // of extra serial ports for talking to various things that might run inside. The |
| 205 | // kernel, initramfs and command line are mapped into VM memory at boot time and |
| 206 | // not loaded from any sort of disk. Booting and shutting off one of these VMs |
| 207 | // takes <100ms. |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 208 | baseArgs := []string{"-nodefaults", "-no-user-config", "-nographic", "-no-reboot", |
| 209 | "-accel", "kvm", "-cpu", "host", |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 210 | // Needed until QEMU updates their bundled qboot version (needs |
| 211 | // https://github.com/bonzini/qboot/pull/28) |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 212 | "-bios", "external/com_github_bonzini_qboot/bios.bin", |
| 213 | "-M", "microvm,x-option-roms=off,pic=off,pit=off,rtc=off,isa-serial=off", |
| 214 | "-kernel", opts.KernelPath, |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 215 | // We force using a triple-fault reboot strategy since otherwise the kernel first |
| 216 | // tries others (like ACPI) which are not available in this very restricted |
| 217 | // environment. Similarly we need to override the boot console since there's |
| 218 | // nothing on the ISA bus that the kernel could talk to. We also force quiet for |
| 219 | // performance reasons. |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 220 | "-append", "reboot=t console=hvc0 quiet " + opts.Cmdline, |
| 221 | "-initrd", opts.InitramfsPath, |
| 222 | "-device", "virtio-rng-device,max-bytes=1024,period=1000", |
| 223 | "-device", "virtio-serial-device,max_ports=16", |
| 224 | "-chardev", "stdio,id=con0", "-device", "virtconsole,chardev=con0", |
| 225 | } |
| 226 | |
| 227 | if !opts.DisableHostNetworkInterface { |
| 228 | qemuNetType := "user" |
Serge Bazanski | 66e5895 | 2021-10-05 17:06:56 +0200 | [diff] [blame] | 229 | qemuNetConfig := QemuValue{ |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 230 | "id": {"usernet0"}, |
| 231 | "net": {"10.42.0.0/24"}, |
| 232 | "dhcpstart": {"10.42.0.10"}, |
| 233 | } |
| 234 | if opts.PortMap != nil { |
Serge Bazanski | 66e5895 | 2021-10-05 17:06:56 +0200 | [diff] [blame] | 235 | qemuNetConfig["hostfwd"] = opts.PortMap.ToQemuForwards() |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 236 | } |
| 237 | |
Serge Bazanski | 66e5895 | 2021-10-05 17:06:56 +0200 | [diff] [blame] | 238 | baseArgs = append(baseArgs, "-netdev", qemuNetConfig.ToOption(qemuNetType), |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 239 | "-device", "virtio-net-device,netdev=usernet0,mac="+HostInterfaceMAC.String()) |
| 240 | } |
| 241 | |
| 242 | var stdErrBuf bytes.Buffer |
| 243 | cmd := exec.CommandContext(ctx, "qemu-system-x86_64", append(baseArgs, extraArgs...)...) |
| 244 | cmd.Stdout = opts.SerialPort |
| 245 | cmd.Stderr = &stdErrBuf |
| 246 | |
| 247 | cmd.ExtraFiles = append(cmd.ExtraFiles, opts.ExtraChardevs...) |
| 248 | cmd.ExtraFiles = append(cmd.ExtraFiles, opts.ExtraNetworkInterfaces...) |
| 249 | |
| 250 | err := cmd.Run() |
Serge Bazanski | 66e5895 | 2021-10-05 17:06:56 +0200 | [diff] [blame] | 251 | // If it's a context error, just quit. There's no way to tell a |
| 252 | // killed-due-to-context vs killed-due-to-external-reason error returned by Run, |
| 253 | // so we approximate by looking at the context's status. |
| 254 | if err != nil && ctx.Err() != nil { |
| 255 | return ctx.Err() |
| 256 | } |
| 257 | |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 258 | var exerr *exec.ExitError |
| 259 | if err != nil && errors.As(err, &exerr) { |
| 260 | exerr.Stderr = stdErrBuf.Bytes() |
| 261 | newErr := QEMUError(*exerr) |
| 262 | return &newErr |
| 263 | } |
| 264 | return err |
| 265 | } |
| 266 | |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 267 | // QEMUError is a special type of ExitError used when QEMU fails. In addition to |
| 268 | // normal ExitError features it prints stderr for debugging. |
Lorenz Brun | 3ff5af3 | 2020-06-24 16:34:11 +0200 | [diff] [blame] | 269 | type QEMUError exec.ExitError |
| 270 | |
| 271 | func (e *QEMUError) Error() string { |
| 272 | return fmt.Sprintf("%v: %v", e.String(), string(e.Stderr)) |
Lorenz Brun | fc5dbc6 | 2020-05-28 12:18:07 +0200 | [diff] [blame] | 273 | } |