blob: 219e7873c050fa55fe95de6075f6077b6b6c5fe3 [file] [log] [blame]
Lorenz Brunfc5dbc62020-05-28 12:18:07 +02001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
Serge Bazanski66e58952021-10-05 17:06:56 +020017// launch implements test harnesses for running qemu VMs from tests.
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020018package launch
19
20import (
Lorenz Brun3ff5af32020-06-24 16:34:11 +020021 "bytes"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020022 "context"
Lorenz Brun3ff5af32020-06-24 16:34:11 +020023 "errors"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020024 "fmt"
Lorenz Brun942f5e22022-01-27 15:03:10 +010025 "io"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020026 "net"
27 "os"
28 "os/exec"
Lorenz Brun3ff5af32020-06-24 16:34:11 +020029 "strconv"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020030 "strings"
Lorenz Brun3ff5af32020-06-24 16:34:11 +020031 "syscall"
Lorenz Bruned0503c2020-07-28 17:21:25 +020032
Lorenz Brun3ff5af32020-06-24 16:34:11 +020033 "golang.org/x/sys/unix"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020034 "google.golang.org/grpc"
35
Serge Bazanski52304a82021-10-29 16:56:18 +020036 "source.monogon.dev/metropolis/node"
Serge Bazanski31370b02021-01-07 16:31:14 +010037 "source.monogon.dev/metropolis/pkg/freeport"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020038)
39
Serge Bazanski66e58952021-10-05 17:06:56 +020040type QemuValue map[string][]string
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020041
Serge Bazanski66e58952021-10-05 17:06:56 +020042// ToOption encodes structured data into a QEMU option. Example: "test", {"key1":
Serge Bazanski216fe7b2021-05-21 18:36:16 +020043// {"val1"}, "key2": {"val2", "val3"}} returns "test,key1=val1,key2=val2,key2=val3"
Serge Bazanski66e58952021-10-05 17:06:56 +020044func (value QemuValue) ToOption(name string) string {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020045 var optionValues []string
Lorenz Brun3ff5af32020-06-24 16:34:11 +020046 if name != "" {
47 optionValues = append(optionValues, name)
48 }
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020049 for name, values := range value {
50 if len(values) == 0 {
51 optionValues = append(optionValues, name)
52 }
53 for _, val := range values {
54 optionValues = append(optionValues, fmt.Sprintf("%v=%v", name, val))
55 }
56 }
57 return strings.Join(optionValues, ",")
58}
59
Serge Bazanski216fe7b2021-05-21 18:36:16 +020060// PortMap represents where VM ports are mapped to on the host. It maps from the VM
61// port number to the host port number.
Serge Bazanski52304a82021-10-29 16:56:18 +020062type PortMap map[node.Port]uint16
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020063
Serge Bazanski66e58952021-10-05 17:06:56 +020064// ToQemuForwards generates QEMU hostfwd values (https://qemu.weilnetz.de/doc/qemu-
Serge Bazanski216fe7b2021-05-21 18:36:16 +020065// doc.html#:~:text=hostfwd=) for all mapped ports.
Serge Bazanski66e58952021-10-05 17:06:56 +020066func (p PortMap) ToQemuForwards() []string {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020067 var hostfwdOptions []string
68 for vmPort, hostPort := range p {
Serge Bazanski52304a82021-10-29 16:56:18 +020069 hostfwdOptions = append(hostfwdOptions, fmt.Sprintf("tcp::%d-:%d", hostPort, vmPort))
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020070 }
71 return hostfwdOptions
72}
73
Serge Bazanski216fe7b2021-05-21 18:36:16 +020074// DialGRPC creates a gRPC client for a VM port that's forwarded/mapped to the
75// host. The given port is automatically resolved to the host-mapped port.
Serge Bazanski52304a82021-10-29 16:56:18 +020076func (p PortMap) DialGRPC(port node.Port, opts ...grpc.DialOption) (*grpc.ClientConn, error) {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020077 mappedPort, ok := p[port]
78 if !ok {
Serge Bazanski52304a82021-10-29 16:56:18 +020079 return nil, fmt.Errorf("cannot dial port: port %d is not mapped/forwarded", port)
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020080 }
Serge Bazanski52304a82021-10-29 16:56:18 +020081 grpcClient, err := grpc.Dial(fmt.Sprintf("localhost:%d", mappedPort), opts...)
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020082 if err != nil {
Serge Bazanski52304a82021-10-29 16:56:18 +020083 return nil, fmt.Errorf("failed to dial port %d: %w", port, err)
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020084 }
85 return grpcClient, nil
86}
87
Serge Bazanski216fe7b2021-05-21 18:36:16 +020088// IdentityPortMap returns a port map where each given port is mapped onto itself
89// on the host. This is mainly useful for development against Metropolis. The dbg
90// command requires this mapping.
Serge Bazanski52304a82021-10-29 16:56:18 +020091func IdentityPortMap(ports []node.Port) PortMap {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020092 portMap := make(PortMap)
Lorenz Bruned0503c2020-07-28 17:21:25 +020093 for _, port := range ports {
Serge Bazanski52304a82021-10-29 16:56:18 +020094 portMap[port] = uint16(port)
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020095 }
96 return portMap
97}
98
Serge Bazanski216fe7b2021-05-21 18:36:16 +020099// ConflictFreePortMap returns a port map where each given port is mapped onto a
100// random free port on the host. This is intended for automated testing where
101// multiple instances of Metropolis nodes might be running. Please call this
102// function for each Launch command separately and as close to it as possible since
103// it cannot guarantee that the ports will remain free.
Serge Bazanski52304a82021-10-29 16:56:18 +0200104func ConflictFreePortMap(ports []node.Port) (PortMap, error) {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200105 portMap := make(PortMap)
Lorenz Bruned0503c2020-07-28 17:21:25 +0200106 for _, port := range ports {
Serge Bazanskicb883e22020-07-06 17:47:55 +0200107 mappedPort, listenCloser, err := freeport.AllocateTCPPort()
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200108 if err != nil {
109 return portMap, fmt.Errorf("failed to get free host port: %w", err)
110 }
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200111 // Defer closing of the listening port until the function is done and all ports are
112 // allocated
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200113 defer listenCloser.Close()
114 portMap[port] = mappedPort
115 }
116 return portMap, nil
117}
118
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200119// NewSocketPair creates a new socket pair. By connecting both ends to different
120// instances you can connect them with a virtual "network cable". The ends can be
121// passed into the ConnectToSocket option.
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200122func NewSocketPair() (*os.File, *os.File, error) {
123 fds, err := unix.Socketpair(unix.AF_UNIX, syscall.SOCK_STREAM, 0)
124 if err != nil {
125 return nil, nil, fmt.Errorf("failed to call socketpair: %w", err)
126 }
127
128 fd1 := os.NewFile(uintptr(fds[0]), "network0")
129 fd2 := os.NewFile(uintptr(fds[1]), "network1")
130 return fd1, fd2, nil
131}
132
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200133// HostInterfaceMAC is the MAC address the host SLIRP network interface has if it
134// is not disabled (see DisableHostNetworkInterface in MicroVMOptions)
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200135var HostInterfaceMAC = net.HardwareAddr{0x02, 0x72, 0x82, 0xbf, 0xc3, 0x56}
136
137// MicroVMOptions contains all options to start a MicroVM
138type MicroVMOptions struct {
139 // Path to the ELF kernel binary
140 KernelPath string
141
142 // Path to the Initramfs
143 InitramfsPath string
144
145 // Cmdline contains additional kernel commandline options
146 Cmdline string
147
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200148 // SerialPort is a File(descriptor) over which you can communicate with the serial
149 // port of the machine It can be set to an existing file descriptor (like
150 // os.Stdout/os.Stderr) or you can use NewSocketPair() to get one end to talk to
151 // from Go.
Lorenz Brun942f5e22022-01-27 15:03:10 +0100152 SerialPort io.Writer
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200153
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200154 // ExtraChardevs can be used similar to SerialPort, but can contain an arbitrary
155 // number of additional serial ports
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200156 ExtraChardevs []*os.File
157
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200158 // ExtraNetworkInterfaces can contain an arbitrary number of file descriptors which
159 // are mapped into the VM as virtio network interfaces. The first interface is
160 // always a SLIRP-backed interface for communicating with the host.
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200161 ExtraNetworkInterfaces []*os.File
162
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200163 // PortMap contains ports that are mapped to the host through the built-in SLIRP
164 // network interface.
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200165 PortMap PortMap
166
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200167 // DisableHostNetworkInterface disables the SLIRP-backed host network interface
168 // that is normally the first network interface. If this is set PortMap is ignored.
169 // Mostly useful for speeding up QEMU's startup time for tests.
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200170 DisableHostNetworkInterface bool
171}
172
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200173// RunMicroVM launches a tiny VM mostly intended for testing. Very quick to boot
174// (<40ms).
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200175func RunMicroVM(ctx context.Context, opts *MicroVMOptions) error {
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200176 // Generate options for all the file descriptors we'll be passing as virtio "serial
177 // ports"
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200178 var extraArgs []string
179 for idx, _ := range opts.ExtraChardevs {
180 idxStr := strconv.Itoa(idx)
181 id := "extra" + idxStr
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200182 // That this works is pretty much a hack, but upstream QEMU doesn't have a
183 // bidirectional chardev backend not based around files/sockets on the disk which
184 // are a giant pain to work with. We're using QEMU's fdset functionality to make
185 // FDs available as pseudo-files and then "ab"using the pipe backend's fallback
186 // functionality to get a single bidirectional chardev backend backed by a passed-
187 // down RDWR fd. Ref https://lists.gnu.org/archive/html/qemu-devel/2015-
188 // 12/msg01256.html
Serge Bazanski66e58952021-10-05 17:06:56 +0200189 addFdConf := QemuValue{
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200190 "set": {idxStr},
191 "fd": {strconv.Itoa(idx + 3)},
192 }
Serge Bazanski66e58952021-10-05 17:06:56 +0200193 chardevConf := QemuValue{
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200194 "id": {id},
195 "path": {"/dev/fdset/" + idxStr},
196 }
Serge Bazanski66e58952021-10-05 17:06:56 +0200197 deviceConf := QemuValue{
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200198 "chardev": {id},
199 }
Serge Bazanski66e58952021-10-05 17:06:56 +0200200 extraArgs = append(extraArgs, "-add-fd", addFdConf.ToOption(""),
201 "-chardev", chardevConf.ToOption("pipe"), "-device", deviceConf.ToOption("virtserialport"))
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200202 }
203
204 for idx, _ := range opts.ExtraNetworkInterfaces {
205 id := fmt.Sprintf("net%v", idx)
Serge Bazanski66e58952021-10-05 17:06:56 +0200206 netdevConf := QemuValue{
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200207 "id": {id},
208 "fd": {strconv.Itoa(idx + 3 + len(opts.ExtraChardevs))},
209 }
Serge Bazanski66e58952021-10-05 17:06:56 +0200210 extraArgs = append(extraArgs, "-netdev", netdevConf.ToOption("socket"), "-device", "virtio-net-device,netdev="+id)
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200211 }
212
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200213 // This sets up a minimum viable environment for our Linux kernel. It clears all
214 // standard QEMU configuration and sets up a MicroVM machine
215 // (https://github.com/qemu/qemu/blob/master/docs/microvm.rst) with all legacy
216 // emulation turned off. This means the only "hardware" the Linux kernel inside can
217 // communicate with is a single virtio-mmio region. Over that MMIO interface we run
218 // a paravirtualized RNG (since the kernel in there has nothing to gather that from
219 // and it delays booting), a single paravirtualized console and an arbitrary number
220 // of extra serial ports for talking to various things that might run inside. The
221 // kernel, initramfs and command line are mapped into VM memory at boot time and
222 // not loaded from any sort of disk. Booting and shutting off one of these VMs
223 // takes <100ms.
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200224 baseArgs := []string{"-nodefaults", "-no-user-config", "-nographic", "-no-reboot",
225 "-accel", "kvm", "-cpu", "host",
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200226 // Needed until QEMU updates their bundled qboot version (needs
227 // https://github.com/bonzini/qboot/pull/28)
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200228 "-bios", "external/com_github_bonzini_qboot/bios.bin",
229 "-M", "microvm,x-option-roms=off,pic=off,pit=off,rtc=off,isa-serial=off",
230 "-kernel", opts.KernelPath,
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200231 // We force using a triple-fault reboot strategy since otherwise the kernel first
232 // tries others (like ACPI) which are not available in this very restricted
233 // environment. Similarly we need to override the boot console since there's
234 // nothing on the ISA bus that the kernel could talk to. We also force quiet for
235 // performance reasons.
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200236 "-append", "reboot=t console=hvc0 quiet " + opts.Cmdline,
237 "-initrd", opts.InitramfsPath,
238 "-device", "virtio-rng-device,max-bytes=1024,period=1000",
239 "-device", "virtio-serial-device,max_ports=16",
240 "-chardev", "stdio,id=con0", "-device", "virtconsole,chardev=con0",
241 }
242
243 if !opts.DisableHostNetworkInterface {
244 qemuNetType := "user"
Serge Bazanski66e58952021-10-05 17:06:56 +0200245 qemuNetConfig := QemuValue{
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200246 "id": {"usernet0"},
247 "net": {"10.42.0.0/24"},
248 "dhcpstart": {"10.42.0.10"},
249 }
250 if opts.PortMap != nil {
Serge Bazanski66e58952021-10-05 17:06:56 +0200251 qemuNetConfig["hostfwd"] = opts.PortMap.ToQemuForwards()
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200252 }
253
Serge Bazanski66e58952021-10-05 17:06:56 +0200254 baseArgs = append(baseArgs, "-netdev", qemuNetConfig.ToOption(qemuNetType),
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200255 "-device", "virtio-net-device,netdev=usernet0,mac="+HostInterfaceMAC.String())
256 }
257
258 var stdErrBuf bytes.Buffer
259 cmd := exec.CommandContext(ctx, "qemu-system-x86_64", append(baseArgs, extraArgs...)...)
260 cmd.Stdout = opts.SerialPort
261 cmd.Stderr = &stdErrBuf
262
263 cmd.ExtraFiles = append(cmd.ExtraFiles, opts.ExtraChardevs...)
264 cmd.ExtraFiles = append(cmd.ExtraFiles, opts.ExtraNetworkInterfaces...)
265
266 err := cmd.Run()
Serge Bazanski66e58952021-10-05 17:06:56 +0200267 // If it's a context error, just quit. There's no way to tell a
268 // killed-due-to-context vs killed-due-to-external-reason error returned by Run,
269 // so we approximate by looking at the context's status.
270 if err != nil && ctx.Err() != nil {
271 return ctx.Err()
272 }
273
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200274 var exerr *exec.ExitError
275 if err != nil && errors.As(err, &exerr) {
276 exerr.Stderr = stdErrBuf.Bytes()
277 newErr := QEMUError(*exerr)
278 return &newErr
279 }
280 return err
281}
282
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200283// QEMUError is a special type of ExitError used when QEMU fails. In addition to
284// normal ExitError features it prints stderr for debugging.
Lorenz Brun3ff5af32020-06-24 16:34:11 +0200285type QEMUError exec.ExitError
286
287func (e *QEMUError) Error() string {
288 return fmt.Sprintf("%v: %v", e.String(), string(e.Stderr))
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200289}