blob: a427f1782bec31b48099ab3afec39485b139eb12 [file] [log] [blame]
Tim Windelschmidt6d33a432025-02-04 14:34:25 +01001// Copyright The Monogon Project Authors.
Lorenz Brunfc5dbc62020-05-28 12:18:07 +02002// SPDX-License-Identifier: Apache-2.0
Lorenz Brunfc5dbc62020-05-28 12:18:07 +02003
Tim Windelschmidtd0cdb572025-03-27 17:18:39 +01004// Package qemu implements test harnesses for running qemu VMs from tests.
5package qemu
Lorenz Brunfc5dbc62020-05-28 12:18:07 +02006
7import (
Lorenz Brun3ff5af32020-06-24 16:34:11 +02008 "bytes"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +02009 "context"
Lorenz Brun3ff5af32020-06-24 16:34:11 +020010 "errors"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020011 "fmt"
Lorenz Brun942f5e22022-01-27 15:03:10 +010012 "io"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020013 "net"
14 "os"
15 "os/exec"
Tim Windelschmidt8f1efe92025-04-01 01:28:43 +020016 "path/filepath"
Lorenz Brun3ff5af32020-06-24 16:34:11 +020017 "strconv"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020018 "strings"
Lorenz Brun3ff5af32020-06-24 16:34:11 +020019 "syscall"
Lorenz Bruned0503c2020-07-28 17:21:25 +020020
Tim Windelschmidt8f1efe92025-04-01 01:28:43 +020021 "github.com/bazelbuild/rules_go/go/runfiles"
Lorenz Brun3ff5af32020-06-24 16:34:11 +020022 "golang.org/x/sys/unix"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020023
Jan Schär341cd422025-09-04 10:33:21 +020024 "source.monogon.dev/osbase/test/freeport"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020025)
26
Tim Windelschmidtb03d9ff2025-04-02 15:04:03 +020027var (
28 // HostInterfaceMAC is the MAC address the host SLIRP network interface has if it
29 // is not disabled (see DisableHostNetworkInterface in MicroVMOptions)
30 HostInterfaceMAC = net.HardwareAddr{0x02, 0x72, 0x82, 0xbf, 0xc3, 0x56}
31)
32
Serge Bazanski66e58952021-10-05 17:06:56 +020033type QemuValue map[string][]string
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020034
Serge Bazanski66e58952021-10-05 17:06:56 +020035// ToOption encodes structured data into a QEMU option. Example: "test", {"key1":
Serge Bazanski216fe7b2021-05-21 18:36:16 +020036// {"val1"}, "key2": {"val2", "val3"}} returns "test,key1=val1,key2=val2,key2=val3"
Serge Bazanski66e58952021-10-05 17:06:56 +020037func (value QemuValue) ToOption(name string) string {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020038 var optionValues []string
Lorenz Brun3ff5af32020-06-24 16:34:11 +020039 if name != "" {
40 optionValues = append(optionValues, name)
41 }
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020042 for name, values := range value {
43 if len(values) == 0 {
44 optionValues = append(optionValues, name)
45 }
46 for _, val := range values {
47 optionValues = append(optionValues, fmt.Sprintf("%v=%v", name, val))
48 }
49 }
50 return strings.Join(optionValues, ",")
51}
52
Leopoldaf5086b2023-01-15 14:12:42 +010053// PrettyPrintQemuArgs prints the given QEMU arguments to stderr.
54func PrettyPrintQemuArgs(name string, args []string) {
55 var argsFmt string
56 for _, arg := range args {
57 argsFmt += arg
58 if !strings.HasPrefix(arg, "-") {
59 argsFmt += "\n "
60 } else {
61 argsFmt += " "
62 }
63 }
Tim Windelschmidtd0cdb572025-03-27 17:18:39 +010064 fmt.Fprintf(os.Stderr, "Running %s:\n %s\n", name, argsFmt)
Leopoldaf5086b2023-01-15 14:12:42 +010065}
66
Serge Bazanski216fe7b2021-05-21 18:36:16 +020067// PortMap represents where VM ports are mapped to on the host. It maps from the VM
68// port number to the host port number.
Serge Bazanskibe742842022-04-04 13:18:50 +020069type PortMap map[uint16]uint16
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020070
Serge Bazanski66e58952021-10-05 17:06:56 +020071// ToQemuForwards generates QEMU hostfwd values (https://qemu.weilnetz.de/doc/qemu-
Serge Bazanski216fe7b2021-05-21 18:36:16 +020072// doc.html#:~:text=hostfwd=) for all mapped ports.
Serge Bazanski66e58952021-10-05 17:06:56 +020073func (p PortMap) ToQemuForwards() []string {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020074 var hostfwdOptions []string
75 for vmPort, hostPort := range p {
Serge Bazanski52304a82021-10-29 16:56:18 +020076 hostfwdOptions = append(hostfwdOptions, fmt.Sprintf("tcp::%d-:%d", hostPort, vmPort))
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020077 }
78 return hostfwdOptions
79}
80
Serge Bazanski216fe7b2021-05-21 18:36:16 +020081// IdentityPortMap returns a port map where each given port is mapped onto itself
82// on the host. This is mainly useful for development against Metropolis. The dbg
83// command requires this mapping.
Serge Bazanskibe742842022-04-04 13:18:50 +020084func IdentityPortMap(ports []uint16) PortMap {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020085 portMap := make(PortMap)
Lorenz Bruned0503c2020-07-28 17:21:25 +020086 for _, port := range ports {
Tim Windelschmidt5e460a92024-04-11 01:33:09 +020087 portMap[port] = port
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020088 }
89 return portMap
90}
91
Serge Bazanski216fe7b2021-05-21 18:36:16 +020092// ConflictFreePortMap returns a port map where each given port is mapped onto a
93// random free port on the host. This is intended for automated testing where
94// multiple instances of Metropolis nodes might be running. Please call this
95// function for each Launch command separately and as close to it as possible since
96// it cannot guarantee that the ports will remain free.
Serge Bazanskibe742842022-04-04 13:18:50 +020097func ConflictFreePortMap(ports []uint16) (PortMap, error) {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020098 portMap := make(PortMap)
Lorenz Bruned0503c2020-07-28 17:21:25 +020099 for _, port := range ports {
Serge Bazanskicb883e22020-07-06 17:47:55 +0200100 mappedPort, listenCloser, err := freeport.AllocateTCPPort()
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200101 if err != nil {
102 return portMap, fmt.Errorf("failed to get free host port: %w", err)
103 }
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200104 // Defer closing of the listening port until the function is done and all ports are
105 // allocated
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200106 defer listenCloser.Close()
107 portMap[port] = mappedPort
108 }
109 return portMap, nil
110}
111
Lorenz Brun150f24a2023-07-13 20:11:06 +0200112// GuestServiceMap maps an IP/port combination inside the virtual guest network
113// to a TCPAddr reachable by the host. If the guest connects to the virtual
114// address/port, this connection gets forwarded to the host.
115type GuestServiceMap map[*net.TCPAddr]net.TCPAddr
116
117// ToQemuForwards generates QEMU guestfwd values (https://qemu.weilnetz.de/doc/qemu-
118// doc.html#:~:text=guestfwd=) for all mapped addresses.
119func (p GuestServiceMap) ToQemuForwards() []string {
120 var guestfwdOptions []string
121 for guestAddr, hostAddr := range p {
122 guestfwdOptions = append(guestfwdOptions, fmt.Sprintf("tcp:%s-tcp:%s", guestAddr.String(), hostAddr.String()))
123 }
124 return guestfwdOptions
125}
126
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200127// NewSocketPair creates a new socket pair. By connecting both ends to different
128// instances you can connect them with a virtual "network cable". The ends can be
129// passed into the ConnectToSocket option.
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200130func NewSocketPair() (*os.File, *os.File, error) {
131 fds, err := unix.Socketpair(unix.AF_UNIX, syscall.SOCK_STREAM, 0)
132 if err != nil {
133 return nil, nil, fmt.Errorf("failed to call socketpair: %w", err)
134 }
135
136 fd1 := os.NewFile(uintptr(fds[0]), "network0")
137 fd2 := os.NewFile(uintptr(fds[1]), "network1")
138 return fd1, fd2, nil
139}
140
Tim Windelschmidt8f1efe92025-04-01 01:28:43 +0200141var (
142 // These are filled by bazel at linking time with the canonical path of
143 // their corresponding file. Inside the init function we resolve it
144 // with the rules_go runfiles package to the real path.
145 xQEMUPath string
146)
147
148func init() {
149 var err error
150 for _, path := range []*string{
151 &xQEMUPath,
152 } {
153 *path, err = runfiles.Rlocation(*path)
154 if err != nil {
155 panic(err)
156 }
157 }
158}
159
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200160// MicroVMOptions contains all options to start a MicroVM
161type MicroVMOptions struct {
Leopoldaf5086b2023-01-15 14:12:42 +0100162 // Name is a human-readable identifier to be used in debug output.
163 Name string
164
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200165 // Path to the ELF kernel binary
166 KernelPath string
167
168 // Path to the Initramfs
169 InitramfsPath string
170
171 // Cmdline contains additional kernel commandline options
172 Cmdline string
173
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200174 // SerialPort is a File(descriptor) over which you can communicate with the serial
175 // port of the machine It can be set to an existing file descriptor (like
176 // os.Stdout/os.Stderr) or you can use NewSocketPair() to get one end to talk to
177 // from Go.
Lorenz Brun942f5e22022-01-27 15:03:10 +0100178 SerialPort io.Writer
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200179
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200180 // ExtraChardevs can be used similar to SerialPort, but can contain an arbitrary
181 // number of additional serial ports
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200182 ExtraChardevs []*os.File
183
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200184 // ExtraNetworkInterfaces can contain an arbitrary number of file descriptors which
185 // are mapped into the VM as virtio network interfaces. The first interface is
186 // always a SLIRP-backed interface for communicating with the host.
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200187 ExtraNetworkInterfaces []*os.File
188
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200189 // PortMap contains ports that are mapped to the host through the built-in SLIRP
190 // network interface.
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200191 PortMap PortMap
192
Lorenz Brun150f24a2023-07-13 20:11:06 +0200193 // GuestServiceMap contains TCP services made available in the guest virtual
194 // network which are running on the host.
195 GuestServiceMap GuestServiceMap
196
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200197 // DisableHostNetworkInterface disables the SLIRP-backed host network interface
198 // that is normally the first network interface. If this is set PortMap is ignored.
199 // Mostly useful for speeding up QEMU's startup time for tests.
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200200 DisableHostNetworkInterface bool
Leopoldacfad5b2023-01-15 14:05:25 +0100201
202 // PcapDump can be used to dump all network traffic to a pcap file.
203 // If unset, no dump is created.
204 PcapDump string
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200205}
206
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200207// RunMicroVM launches a tiny VM mostly intended for testing. Very quick to boot
208// (<40ms).
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200209func RunMicroVM(ctx context.Context, opts *MicroVMOptions) error {
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200210 // Generate options for all the file descriptors we'll be passing as virtio "serial
211 // ports"
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200212 var extraArgs []string
Lorenz Brunce68ab92023-06-06 03:32:39 +0200213 for idx := range opts.ExtraChardevs {
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200214 idxStr := strconv.Itoa(idx)
215 id := "extra" + idxStr
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200216 // That this works is pretty much a hack, but upstream QEMU doesn't have a
217 // bidirectional chardev backend not based around files/sockets on the disk which
218 // are a giant pain to work with. We're using QEMU's fdset functionality to make
219 // FDs available as pseudo-files and then "ab"using the pipe backend's fallback
220 // functionality to get a single bidirectional chardev backend backed by a passed-
221 // down RDWR fd. Ref https://lists.gnu.org/archive/html/qemu-devel/2015-
222 // 12/msg01256.html
Serge Bazanski66e58952021-10-05 17:06:56 +0200223 addFdConf := QemuValue{
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200224 "set": {idxStr},
225 "fd": {strconv.Itoa(idx + 3)},
226 }
Serge Bazanski66e58952021-10-05 17:06:56 +0200227 chardevConf := QemuValue{
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200228 "id": {id},
229 "path": {"/dev/fdset/" + idxStr},
230 }
Serge Bazanski66e58952021-10-05 17:06:56 +0200231 deviceConf := QemuValue{
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200232 "chardev": {id},
233 }
Serge Bazanski66e58952021-10-05 17:06:56 +0200234 extraArgs = append(extraArgs, "-add-fd", addFdConf.ToOption(""),
235 "-chardev", chardevConf.ToOption("pipe"), "-device", deviceConf.ToOption("virtserialport"))
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200236 }
237
Lorenz Brunce68ab92023-06-06 03:32:39 +0200238 for idx := range opts.ExtraNetworkInterfaces {
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200239 id := fmt.Sprintf("net%v", idx)
Serge Bazanski66e58952021-10-05 17:06:56 +0200240 netdevConf := QemuValue{
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200241 "id": {id},
242 "fd": {strconv.Itoa(idx + 3 + len(opts.ExtraChardevs))},
243 }
Serge Bazanski66e58952021-10-05 17:06:56 +0200244 extraArgs = append(extraArgs, "-netdev", netdevConf.ToOption("socket"), "-device", "virtio-net-device,netdev="+id)
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200245 }
246
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200247 // This sets up a minimum viable environment for our Linux kernel. It clears all
248 // standard QEMU configuration and sets up a MicroVM machine
249 // (https://github.com/qemu/qemu/blob/master/docs/microvm.rst) with all legacy
250 // emulation turned off. This means the only "hardware" the Linux kernel inside can
251 // communicate with is a single virtio-mmio region. Over that MMIO interface we run
252 // a paravirtualized RNG (since the kernel in there has nothing to gather that from
253 // and it delays booting), a single paravirtualized console and an arbitrary number
254 // of extra serial ports for talking to various things that might run inside. The
255 // kernel, initramfs and command line are mapped into VM memory at boot time and
256 // not loaded from any sort of disk. Booting and shutting off one of these VMs
257 // takes <100ms.
Lorenz Brunce68ab92023-06-06 03:32:39 +0200258 baseArgs := []string{
259 "-nodefaults", "-no-user-config", "-nographic", "-no-reboot",
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200260 "-accel", "kvm", "-cpu", "host",
Lorenz Brunce68ab92023-06-06 03:32:39 +0200261 "-m", "1G",
Tim Windelschmidt492434a2024-10-22 14:29:55 +0200262 // Needed because QEMU does not boot without specifying the qboot bios
263 // even tho the documentation clearly states that this is the default.
Tim Windelschmidt8f1efe92025-04-01 01:28:43 +0200264 "-bios", filepath.Join(filepath.Dir(xQEMUPath), "../share/qemu/qboot.rom"),
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200265 "-M", "microvm,x-option-roms=off,pic=off,pit=off,rtc=off,isa-serial=off",
266 "-kernel", opts.KernelPath,
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200267 // We force using a triple-fault reboot strategy since otherwise the kernel first
268 // tries others (like ACPI) which are not available in this very restricted
269 // environment. Similarly we need to override the boot console since there's
270 // nothing on the ISA bus that the kernel could talk to. We also force quiet for
271 // performance reasons.
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200272 "-append", "reboot=t console=hvc0 quiet " + opts.Cmdline,
273 "-initrd", opts.InitramfsPath,
274 "-device", "virtio-rng-device,max-bytes=1024,period=1000",
275 "-device", "virtio-serial-device,max_ports=16",
276 "-chardev", "stdio,id=con0", "-device", "virtconsole,chardev=con0",
277 }
278
279 if !opts.DisableHostNetworkInterface {
280 qemuNetType := "user"
Serge Bazanski66e58952021-10-05 17:06:56 +0200281 qemuNetConfig := QemuValue{
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200282 "id": {"usernet0"},
283 "net": {"10.42.0.0/24"},
284 "dhcpstart": {"10.42.0.10"},
285 }
286 if opts.PortMap != nil {
Serge Bazanski66e58952021-10-05 17:06:56 +0200287 qemuNetConfig["hostfwd"] = opts.PortMap.ToQemuForwards()
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200288 }
Lorenz Brun150f24a2023-07-13 20:11:06 +0200289 if opts.GuestServiceMap != nil {
290 qemuNetConfig["guestfwd"] = opts.GuestServiceMap.ToQemuForwards()
291 }
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200292
Serge Bazanski66e58952021-10-05 17:06:56 +0200293 baseArgs = append(baseArgs, "-netdev", qemuNetConfig.ToOption(qemuNetType),
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200294 "-device", "virtio-net-device,netdev=usernet0,mac="+HostInterfaceMAC.String())
295 }
296
Leopoldacfad5b2023-01-15 14:05:25 +0100297 if !opts.DisableHostNetworkInterface && opts.PcapDump != "" {
298 qemuNetDump := QemuValue{
299 "id": {"usernet0"},
300 "netdev": {"usernet0"},
301 "file": {opts.PcapDump},
302 }
303 extraArgs = append(extraArgs, "-object", qemuNetDump.ToOption("filter-dump"))
304 }
305
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200306 var stdErrBuf bytes.Buffer
Tim Windelschmidt8f1efe92025-04-01 01:28:43 +0200307 cmd := exec.CommandContext(ctx, xQEMUPath, append(baseArgs, extraArgs...)...)
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200308 cmd.Stdout = opts.SerialPort
309 cmd.Stderr = &stdErrBuf
310
311 cmd.ExtraFiles = append(cmd.ExtraFiles, opts.ExtraChardevs...)
312 cmd.ExtraFiles = append(cmd.ExtraFiles, opts.ExtraNetworkInterfaces...)
313
Leopoldaf5086b2023-01-15 14:12:42 +0100314 PrettyPrintQemuArgs(opts.Name, cmd.Args)
315
Tim Windelschmidt492434a2024-10-22 14:29:55 +0200316 err := cmd.Run()
Serge Bazanski66e58952021-10-05 17:06:56 +0200317 // If it's a context error, just quit. There's no way to tell a
318 // killed-due-to-context vs killed-due-to-external-reason error returned by Run,
319 // so we approximate by looking at the context's status.
320 if err != nil && ctx.Err() != nil {
321 return ctx.Err()
322 }
323
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200324 var exerr *exec.ExitError
325 if err != nil && errors.As(err, &exerr) {
326 exerr.Stderr = stdErrBuf.Bytes()
327 newErr := QEMUError(*exerr)
328 return &newErr
329 }
330 return err
331}
332
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200333// QEMUError is a special type of ExitError used when QEMU fails. In addition to
334// normal ExitError features it prints stderr for debugging.
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200335type QEMUError exec.ExitError
336
337func (e *QEMUError) Error() string {
338 return fmt.Sprintf("%v: %v", e.String(), string(e.Stderr))
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200339}