blob: d030b2215cd663e0195afa02352d5fd8723cdf6f [file] [log] [blame]
Lorenz Brunfc5dbc62020-05-28 12:18:07 +02001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
Serge Bazanski66e58952021-10-05 17:06:56 +020017// launch implements test harnesses for running qemu VMs from tests.
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020018package launch
19
20import (
Lorenz Brun3ff5af32020-06-24 16:34:11 +020021 "bytes"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020022 "context"
Lorenz Brun3ff5af32020-06-24 16:34:11 +020023 "errors"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020024 "fmt"
Lorenz Brun942f5e22022-01-27 15:03:10 +010025 "io"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020026 "net"
27 "os"
28 "os/exec"
Lorenz Brun3ff5af32020-06-24 16:34:11 +020029 "strconv"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020030 "strings"
Lorenz Brun3ff5af32020-06-24 16:34:11 +020031 "syscall"
Lorenz Bruned0503c2020-07-28 17:21:25 +020032
Tim Windelschmidt244b5672024-02-06 10:18:56 +010033 "github.com/bazelbuild/rules_go/go/runfiles"
Lorenz Brun3ff5af32020-06-24 16:34:11 +020034 "golang.org/x/sys/unix"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020035
Tim Windelschmidt9f21f532024-05-07 15:14:20 +020036 "source.monogon.dev/osbase/freeport"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020037)
38
Serge Bazanski66e58952021-10-05 17:06:56 +020039type QemuValue map[string][]string
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020040
Serge Bazanski66e58952021-10-05 17:06:56 +020041// ToOption encodes structured data into a QEMU option. Example: "test", {"key1":
Serge Bazanski216fe7b2021-05-21 18:36:16 +020042// {"val1"}, "key2": {"val2", "val3"}} returns "test,key1=val1,key2=val2,key2=val3"
Serge Bazanski66e58952021-10-05 17:06:56 +020043func (value QemuValue) ToOption(name string) string {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020044 var optionValues []string
Lorenz Brun3ff5af32020-06-24 16:34:11 +020045 if name != "" {
46 optionValues = append(optionValues, name)
47 }
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020048 for name, values := range value {
49 if len(values) == 0 {
50 optionValues = append(optionValues, name)
51 }
52 for _, val := range values {
53 optionValues = append(optionValues, fmt.Sprintf("%v=%v", name, val))
54 }
55 }
56 return strings.Join(optionValues, ",")
57}
58
Leopoldaf5086b2023-01-15 14:12:42 +010059// PrettyPrintQemuArgs prints the given QEMU arguments to stderr.
60func PrettyPrintQemuArgs(name string, args []string) {
61 var argsFmt string
62 for _, arg := range args {
63 argsFmt += arg
64 if !strings.HasPrefix(arg, "-") {
65 argsFmt += "\n "
66 } else {
67 argsFmt += " "
68 }
69 }
Serge Bazanski05f813b2023-03-16 17:58:39 +010070 Log("Running %s:\n %s\n", name, argsFmt)
Leopoldaf5086b2023-01-15 14:12:42 +010071}
72
Serge Bazanski216fe7b2021-05-21 18:36:16 +020073// PortMap represents where VM ports are mapped to on the host. It maps from the VM
74// port number to the host port number.
Serge Bazanskibe742842022-04-04 13:18:50 +020075type PortMap map[uint16]uint16
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020076
Serge Bazanski66e58952021-10-05 17:06:56 +020077// ToQemuForwards generates QEMU hostfwd values (https://qemu.weilnetz.de/doc/qemu-
Serge Bazanski216fe7b2021-05-21 18:36:16 +020078// doc.html#:~:text=hostfwd=) for all mapped ports.
Serge Bazanski66e58952021-10-05 17:06:56 +020079func (p PortMap) ToQemuForwards() []string {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020080 var hostfwdOptions []string
81 for vmPort, hostPort := range p {
Serge Bazanski52304a82021-10-29 16:56:18 +020082 hostfwdOptions = append(hostfwdOptions, fmt.Sprintf("tcp::%d-:%d", hostPort, vmPort))
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020083 }
84 return hostfwdOptions
85}
86
Serge Bazanski216fe7b2021-05-21 18:36:16 +020087// IdentityPortMap returns a port map where each given port is mapped onto itself
88// on the host. This is mainly useful for development against Metropolis. The dbg
89// command requires this mapping.
Serge Bazanskibe742842022-04-04 13:18:50 +020090func IdentityPortMap(ports []uint16) PortMap {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020091 portMap := make(PortMap)
Lorenz Bruned0503c2020-07-28 17:21:25 +020092 for _, port := range ports {
Tim Windelschmidt5e460a92024-04-11 01:33:09 +020093 portMap[port] = port
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020094 }
95 return portMap
96}
97
Serge Bazanski216fe7b2021-05-21 18:36:16 +020098// ConflictFreePortMap returns a port map where each given port is mapped onto a
99// random free port on the host. This is intended for automated testing where
100// multiple instances of Metropolis nodes might be running. Please call this
101// function for each Launch command separately and as close to it as possible since
102// it cannot guarantee that the ports will remain free.
Serge Bazanskibe742842022-04-04 13:18:50 +0200103func ConflictFreePortMap(ports []uint16) (PortMap, error) {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200104 portMap := make(PortMap)
Lorenz Bruned0503c2020-07-28 17:21:25 +0200105 for _, port := range ports {
Serge Bazanskicb883e22020-07-06 17:47:55 +0200106 mappedPort, listenCloser, err := freeport.AllocateTCPPort()
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200107 if err != nil {
108 return portMap, fmt.Errorf("failed to get free host port: %w", err)
109 }
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200110 // Defer closing of the listening port until the function is done and all ports are
111 // allocated
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200112 defer listenCloser.Close()
113 portMap[port] = mappedPort
114 }
115 return portMap, nil
116}
117
Lorenz Brun150f24a2023-07-13 20:11:06 +0200118// GuestServiceMap maps an IP/port combination inside the virtual guest network
119// to a TCPAddr reachable by the host. If the guest connects to the virtual
120// address/port, this connection gets forwarded to the host.
121type GuestServiceMap map[*net.TCPAddr]net.TCPAddr
122
123// ToQemuForwards generates QEMU guestfwd values (https://qemu.weilnetz.de/doc/qemu-
124// doc.html#:~:text=guestfwd=) for all mapped addresses.
125func (p GuestServiceMap) ToQemuForwards() []string {
126 var guestfwdOptions []string
127 for guestAddr, hostAddr := range p {
128 guestfwdOptions = append(guestfwdOptions, fmt.Sprintf("tcp:%s-tcp:%s", guestAddr.String(), hostAddr.String()))
129 }
130 return guestfwdOptions
131}
132
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200133// NewSocketPair creates a new socket pair. By connecting both ends to different
134// instances you can connect them with a virtual "network cable". The ends can be
135// passed into the ConnectToSocket option.
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200136func NewSocketPair() (*os.File, *os.File, error) {
137 fds, err := unix.Socketpair(unix.AF_UNIX, syscall.SOCK_STREAM, 0)
138 if err != nil {
139 return nil, nil, fmt.Errorf("failed to call socketpair: %w", err)
140 }
141
142 fd1 := os.NewFile(uintptr(fds[0]), "network0")
143 fd2 := os.NewFile(uintptr(fds[1]), "network1")
144 return fd1, fd2, nil
145}
146
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200147// HostInterfaceMAC is the MAC address the host SLIRP network interface has if it
148// is not disabled (see DisableHostNetworkInterface in MicroVMOptions)
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200149var HostInterfaceMAC = net.HardwareAddr{0x02, 0x72, 0x82, 0xbf, 0xc3, 0x56}
150
151// MicroVMOptions contains all options to start a MicroVM
152type MicroVMOptions struct {
Leopoldaf5086b2023-01-15 14:12:42 +0100153 // Name is a human-readable identifier to be used in debug output.
154 Name string
155
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200156 // Path to the ELF kernel binary
157 KernelPath string
158
159 // Path to the Initramfs
160 InitramfsPath string
161
162 // Cmdline contains additional kernel commandline options
163 Cmdline string
164
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200165 // SerialPort is a File(descriptor) over which you can communicate with the serial
166 // port of the machine It can be set to an existing file descriptor (like
167 // os.Stdout/os.Stderr) or you can use NewSocketPair() to get one end to talk to
168 // from Go.
Lorenz Brun942f5e22022-01-27 15:03:10 +0100169 SerialPort io.Writer
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200170
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200171 // ExtraChardevs can be used similar to SerialPort, but can contain an arbitrary
172 // number of additional serial ports
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200173 ExtraChardevs []*os.File
174
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200175 // ExtraNetworkInterfaces can contain an arbitrary number of file descriptors which
176 // are mapped into the VM as virtio network interfaces. The first interface is
177 // always a SLIRP-backed interface for communicating with the host.
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200178 ExtraNetworkInterfaces []*os.File
179
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200180 // PortMap contains ports that are mapped to the host through the built-in SLIRP
181 // network interface.
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200182 PortMap PortMap
183
Lorenz Brun150f24a2023-07-13 20:11:06 +0200184 // GuestServiceMap contains TCP services made available in the guest virtual
185 // network which are running on the host.
186 GuestServiceMap GuestServiceMap
187
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200188 // DisableHostNetworkInterface disables the SLIRP-backed host network interface
189 // that is normally the first network interface. If this is set PortMap is ignored.
190 // Mostly useful for speeding up QEMU's startup time for tests.
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200191 DisableHostNetworkInterface bool
Leopoldacfad5b2023-01-15 14:05:25 +0100192
193 // PcapDump can be used to dump all network traffic to a pcap file.
194 // If unset, no dump is created.
195 PcapDump string
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200196}
197
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200198// RunMicroVM launches a tiny VM mostly intended for testing. Very quick to boot
199// (<40ms).
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200200func RunMicroVM(ctx context.Context, opts *MicroVMOptions) error {
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200201 // Generate options for all the file descriptors we'll be passing as virtio "serial
202 // ports"
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200203 var extraArgs []string
Lorenz Brunce68ab92023-06-06 03:32:39 +0200204 for idx := range opts.ExtraChardevs {
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200205 idxStr := strconv.Itoa(idx)
206 id := "extra" + idxStr
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200207 // That this works is pretty much a hack, but upstream QEMU doesn't have a
208 // bidirectional chardev backend not based around files/sockets on the disk which
209 // are a giant pain to work with. We're using QEMU's fdset functionality to make
210 // FDs available as pseudo-files and then "ab"using the pipe backend's fallback
211 // functionality to get a single bidirectional chardev backend backed by a passed-
212 // down RDWR fd. Ref https://lists.gnu.org/archive/html/qemu-devel/2015-
213 // 12/msg01256.html
Serge Bazanski66e58952021-10-05 17:06:56 +0200214 addFdConf := QemuValue{
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200215 "set": {idxStr},
216 "fd": {strconv.Itoa(idx + 3)},
217 }
Serge Bazanski66e58952021-10-05 17:06:56 +0200218 chardevConf := QemuValue{
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200219 "id": {id},
220 "path": {"/dev/fdset/" + idxStr},
221 }
Serge Bazanski66e58952021-10-05 17:06:56 +0200222 deviceConf := QemuValue{
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200223 "chardev": {id},
224 }
Serge Bazanski66e58952021-10-05 17:06:56 +0200225 extraArgs = append(extraArgs, "-add-fd", addFdConf.ToOption(""),
226 "-chardev", chardevConf.ToOption("pipe"), "-device", deviceConf.ToOption("virtserialport"))
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200227 }
228
Lorenz Brunce68ab92023-06-06 03:32:39 +0200229 for idx := range opts.ExtraNetworkInterfaces {
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200230 id := fmt.Sprintf("net%v", idx)
Serge Bazanski66e58952021-10-05 17:06:56 +0200231 netdevConf := QemuValue{
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200232 "id": {id},
233 "fd": {strconv.Itoa(idx + 3 + len(opts.ExtraChardevs))},
234 }
Serge Bazanski66e58952021-10-05 17:06:56 +0200235 extraArgs = append(extraArgs, "-netdev", netdevConf.ToOption("socket"), "-device", "virtio-net-device,netdev="+id)
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200236 }
237
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200238 // This sets up a minimum viable environment for our Linux kernel. It clears all
239 // standard QEMU configuration and sets up a MicroVM machine
240 // (https://github.com/qemu/qemu/blob/master/docs/microvm.rst) with all legacy
241 // emulation turned off. This means the only "hardware" the Linux kernel inside can
242 // communicate with is a single virtio-mmio region. Over that MMIO interface we run
243 // a paravirtualized RNG (since the kernel in there has nothing to gather that from
244 // and it delays booting), a single paravirtualized console and an arbitrary number
245 // of extra serial ports for talking to various things that might run inside. The
246 // kernel, initramfs and command line are mapped into VM memory at boot time and
247 // not loaded from any sort of disk. Booting and shutting off one of these VMs
248 // takes <100ms.
Tim Windelschmidt244b5672024-02-06 10:18:56 +0100249 biosPath, err := runfiles.Rlocation("com_github_bonzini_qboot/bios.bin")
250 if err != nil {
251 return fmt.Errorf("while searching bios: %w", err)
252 }
253
Lorenz Brunce68ab92023-06-06 03:32:39 +0200254 baseArgs := []string{
255 "-nodefaults", "-no-user-config", "-nographic", "-no-reboot",
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200256 "-accel", "kvm", "-cpu", "host",
Lorenz Brunce68ab92023-06-06 03:32:39 +0200257 "-m", "1G",
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200258 // Needed until QEMU updates their bundled qboot version (needs
259 // https://github.com/bonzini/qboot/pull/28)
Tim Windelschmidt244b5672024-02-06 10:18:56 +0100260 "-bios", biosPath,
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200261 "-M", "microvm,x-option-roms=off,pic=off,pit=off,rtc=off,isa-serial=off",
262 "-kernel", opts.KernelPath,
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200263 // We force using a triple-fault reboot strategy since otherwise the kernel first
264 // tries others (like ACPI) which are not available in this very restricted
265 // environment. Similarly we need to override the boot console since there's
266 // nothing on the ISA bus that the kernel could talk to. We also force quiet for
267 // performance reasons.
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200268 "-append", "reboot=t console=hvc0 quiet " + opts.Cmdline,
269 "-initrd", opts.InitramfsPath,
270 "-device", "virtio-rng-device,max-bytes=1024,period=1000",
271 "-device", "virtio-serial-device,max_ports=16",
272 "-chardev", "stdio,id=con0", "-device", "virtconsole,chardev=con0",
273 }
274
275 if !opts.DisableHostNetworkInterface {
276 qemuNetType := "user"
Serge Bazanski66e58952021-10-05 17:06:56 +0200277 qemuNetConfig := QemuValue{
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200278 "id": {"usernet0"},
279 "net": {"10.42.0.0/24"},
280 "dhcpstart": {"10.42.0.10"},
281 }
282 if opts.PortMap != nil {
Serge Bazanski66e58952021-10-05 17:06:56 +0200283 qemuNetConfig["hostfwd"] = opts.PortMap.ToQemuForwards()
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200284 }
Lorenz Brun150f24a2023-07-13 20:11:06 +0200285 if opts.GuestServiceMap != nil {
286 qemuNetConfig["guestfwd"] = opts.GuestServiceMap.ToQemuForwards()
287 }
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200288
Serge Bazanski66e58952021-10-05 17:06:56 +0200289 baseArgs = append(baseArgs, "-netdev", qemuNetConfig.ToOption(qemuNetType),
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200290 "-device", "virtio-net-device,netdev=usernet0,mac="+HostInterfaceMAC.String())
291 }
292
Leopoldacfad5b2023-01-15 14:05:25 +0100293 if !opts.DisableHostNetworkInterface && opts.PcapDump != "" {
294 qemuNetDump := QemuValue{
295 "id": {"usernet0"},
296 "netdev": {"usernet0"},
297 "file": {opts.PcapDump},
298 }
299 extraArgs = append(extraArgs, "-object", qemuNetDump.ToOption("filter-dump"))
300 }
301
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200302 var stdErrBuf bytes.Buffer
303 cmd := exec.CommandContext(ctx, "qemu-system-x86_64", append(baseArgs, extraArgs...)...)
304 cmd.Stdout = opts.SerialPort
305 cmd.Stderr = &stdErrBuf
306
307 cmd.ExtraFiles = append(cmd.ExtraFiles, opts.ExtraChardevs...)
308 cmd.ExtraFiles = append(cmd.ExtraFiles, opts.ExtraNetworkInterfaces...)
309
Leopoldaf5086b2023-01-15 14:12:42 +0100310 PrettyPrintQemuArgs(opts.Name, cmd.Args)
311
Tim Windelschmidt244b5672024-02-06 10:18:56 +0100312 err = cmd.Run()
Serge Bazanski66e58952021-10-05 17:06:56 +0200313 // If it's a context error, just quit. There's no way to tell a
314 // killed-due-to-context vs killed-due-to-external-reason error returned by Run,
315 // so we approximate by looking at the context's status.
316 if err != nil && ctx.Err() != nil {
317 return ctx.Err()
318 }
319
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200320 var exerr *exec.ExitError
321 if err != nil && errors.As(err, &exerr) {
322 exerr.Stderr = stdErrBuf.Bytes()
323 newErr := QEMUError(*exerr)
324 return &newErr
325 }
326 return err
327}
328
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200329// QEMUError is a special type of ExitError used when QEMU fails. In addition to
330// normal ExitError features it prints stderr for debugging.
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200331type QEMUError exec.ExitError
332
333func (e *QEMUError) Error() string {
334 return fmt.Sprintf("%v: %v", e.String(), string(e.Stderr))
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200335}