|  | // Copyright 2020 The Monogon Project Authors. | 
|  | // | 
|  | // SPDX-License-Identifier: Apache-2.0 | 
|  | // | 
|  | // Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | // you may not use this file except in compliance with the License. | 
|  | // You may obtain a copy of the License at | 
|  | // | 
|  | //     http://www.apache.org/licenses/LICENSE-2.0 | 
|  | // | 
|  | // Unless required by applicable law or agreed to in writing, software | 
|  | // distributed under the License is distributed on an "AS IS" BASIS, | 
|  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | // See the License for the specific language governing permissions and | 
|  | // limitations under the License. | 
|  |  | 
|  | // launch implements test harnesses for running qemu VMs from tests. | 
|  | package launch | 
|  |  | 
|  | import ( | 
|  | "bytes" | 
|  | "context" | 
|  | "errors" | 
|  | "fmt" | 
|  | "io" | 
|  | "net" | 
|  | "os" | 
|  | "os/exec" | 
|  | "strconv" | 
|  | "strings" | 
|  | "syscall" | 
|  |  | 
|  | "golang.org/x/sys/unix" | 
|  |  | 
|  | "source.monogon.dev/metropolis/pkg/freeport" | 
|  | ) | 
|  |  | 
|  | type QemuValue map[string][]string | 
|  |  | 
|  | // ToOption encodes structured data into a QEMU option. Example: "test", {"key1": | 
|  | // {"val1"}, "key2": {"val2", "val3"}} returns "test,key1=val1,key2=val2,key2=val3" | 
|  | func (value QemuValue) ToOption(name string) string { | 
|  | var optionValues []string | 
|  | if name != "" { | 
|  | optionValues = append(optionValues, name) | 
|  | } | 
|  | for name, values := range value { | 
|  | if len(values) == 0 { | 
|  | optionValues = append(optionValues, name) | 
|  | } | 
|  | for _, val := range values { | 
|  | optionValues = append(optionValues, fmt.Sprintf("%v=%v", name, val)) | 
|  | } | 
|  | } | 
|  | return strings.Join(optionValues, ",") | 
|  | } | 
|  |  | 
|  | // PortMap represents where VM ports are mapped to on the host. It maps from the VM | 
|  | // port number to the host port number. | 
|  | type PortMap map[uint16]uint16 | 
|  |  | 
|  | // ToQemuForwards generates QEMU hostfwd values (https://qemu.weilnetz.de/doc/qemu- | 
|  | // doc.html#:~:text=hostfwd=) for all mapped ports. | 
|  | func (p PortMap) ToQemuForwards() []string { | 
|  | var hostfwdOptions []string | 
|  | for vmPort, hostPort := range p { | 
|  | hostfwdOptions = append(hostfwdOptions, fmt.Sprintf("tcp::%d-:%d", hostPort, vmPort)) | 
|  | } | 
|  | return hostfwdOptions | 
|  | } | 
|  |  | 
|  | // IdentityPortMap returns a port map where each given port is mapped onto itself | 
|  | // on the host. This is mainly useful for development against Metropolis. The dbg | 
|  | // command requires this mapping. | 
|  | func IdentityPortMap(ports []uint16) PortMap { | 
|  | portMap := make(PortMap) | 
|  | for _, port := range ports { | 
|  | portMap[port] = uint16(port) | 
|  | } | 
|  | return portMap | 
|  | } | 
|  |  | 
|  | // ConflictFreePortMap returns a port map where each given port is mapped onto a | 
|  | // random free port on the host. This is intended for automated testing where | 
|  | // multiple instances of Metropolis nodes might be running. Please call this | 
|  | // function for each Launch command separately and as close to it as possible since | 
|  | // it cannot guarantee that the ports will remain free. | 
|  | func ConflictFreePortMap(ports []uint16) (PortMap, error) { | 
|  | portMap := make(PortMap) | 
|  | for _, port := range ports { | 
|  | mappedPort, listenCloser, err := freeport.AllocateTCPPort() | 
|  | if err != nil { | 
|  | return portMap, fmt.Errorf("failed to get free host port: %w", err) | 
|  | } | 
|  | // Defer closing of the listening port until the function is done and all ports are | 
|  | // allocated | 
|  | defer listenCloser.Close() | 
|  | portMap[port] = mappedPort | 
|  | } | 
|  | return portMap, nil | 
|  | } | 
|  |  | 
|  | // NewSocketPair creates a new socket pair. By connecting both ends to different | 
|  | // instances you can connect them with a virtual "network cable". The ends can be | 
|  | // passed into the ConnectToSocket option. | 
|  | func NewSocketPair() (*os.File, *os.File, error) { | 
|  | fds, err := unix.Socketpair(unix.AF_UNIX, syscall.SOCK_STREAM, 0) | 
|  | if err != nil { | 
|  | return nil, nil, fmt.Errorf("failed to call socketpair: %w", err) | 
|  | } | 
|  |  | 
|  | fd1 := os.NewFile(uintptr(fds[0]), "network0") | 
|  | fd2 := os.NewFile(uintptr(fds[1]), "network1") | 
|  | return fd1, fd2, nil | 
|  | } | 
|  |  | 
|  | // HostInterfaceMAC is the MAC address the host SLIRP network interface has if it | 
|  | // is not disabled (see DisableHostNetworkInterface in MicroVMOptions) | 
|  | var HostInterfaceMAC = net.HardwareAddr{0x02, 0x72, 0x82, 0xbf, 0xc3, 0x56} | 
|  |  | 
|  | // MicroVMOptions contains all options to start a MicroVM | 
|  | type MicroVMOptions struct { | 
|  | // Path to the ELF kernel binary | 
|  | KernelPath string | 
|  |  | 
|  | // Path to the Initramfs | 
|  | InitramfsPath string | 
|  |  | 
|  | // Cmdline contains additional kernel commandline options | 
|  | Cmdline string | 
|  |  | 
|  | // SerialPort is a File(descriptor) over which you can communicate with the serial | 
|  | // port of the machine It can be set to an existing file descriptor (like | 
|  | // os.Stdout/os.Stderr) or you can use NewSocketPair() to get one end to talk to | 
|  | // from Go. | 
|  | SerialPort io.Writer | 
|  |  | 
|  | // ExtraChardevs can be used similar to SerialPort, but can contain an arbitrary | 
|  | // number of additional serial ports | 
|  | ExtraChardevs []*os.File | 
|  |  | 
|  | // ExtraNetworkInterfaces can contain an arbitrary number of file descriptors which | 
|  | // are mapped into the VM as virtio network interfaces. The first interface is | 
|  | // always a SLIRP-backed interface for communicating with the host. | 
|  | ExtraNetworkInterfaces []*os.File | 
|  |  | 
|  | // PortMap contains ports that are mapped to the host through the built-in SLIRP | 
|  | // network interface. | 
|  | PortMap PortMap | 
|  |  | 
|  | // DisableHostNetworkInterface disables the SLIRP-backed host network interface | 
|  | // that is normally the first network interface. If this is set PortMap is ignored. | 
|  | // Mostly useful for speeding up QEMU's startup time for tests. | 
|  | DisableHostNetworkInterface bool | 
|  | } | 
|  |  | 
|  | // RunMicroVM launches a tiny VM mostly intended for testing. Very quick to boot | 
|  | // (<40ms). | 
|  | func RunMicroVM(ctx context.Context, opts *MicroVMOptions) error { | 
|  | // Generate options for all the file descriptors we'll be passing as virtio "serial | 
|  | // ports" | 
|  | var extraArgs []string | 
|  | for idx, _ := range opts.ExtraChardevs { | 
|  | idxStr := strconv.Itoa(idx) | 
|  | id := "extra" + idxStr | 
|  | // That this works is pretty much a hack, but upstream QEMU doesn't have a | 
|  | // bidirectional chardev backend not based around files/sockets on the disk which | 
|  | // are a giant pain to work with. We're using QEMU's fdset functionality to make | 
|  | // FDs available as pseudo-files and then "ab"using the pipe backend's fallback | 
|  | // functionality to get a single bidirectional chardev backend backed by a passed- | 
|  | // down RDWR fd. Ref https://lists.gnu.org/archive/html/qemu-devel/2015- | 
|  | // 12/msg01256.html | 
|  | addFdConf := QemuValue{ | 
|  | "set": {idxStr}, | 
|  | "fd":  {strconv.Itoa(idx + 3)}, | 
|  | } | 
|  | chardevConf := QemuValue{ | 
|  | "id":   {id}, | 
|  | "path": {"/dev/fdset/" + idxStr}, | 
|  | } | 
|  | deviceConf := QemuValue{ | 
|  | "chardev": {id}, | 
|  | } | 
|  | extraArgs = append(extraArgs, "-add-fd", addFdConf.ToOption(""), | 
|  | "-chardev", chardevConf.ToOption("pipe"), "-device", deviceConf.ToOption("virtserialport")) | 
|  | } | 
|  |  | 
|  | for idx, _ := range opts.ExtraNetworkInterfaces { | 
|  | id := fmt.Sprintf("net%v", idx) | 
|  | netdevConf := QemuValue{ | 
|  | "id": {id}, | 
|  | "fd": {strconv.Itoa(idx + 3 + len(opts.ExtraChardevs))}, | 
|  | } | 
|  | extraArgs = append(extraArgs, "-netdev", netdevConf.ToOption("socket"), "-device", "virtio-net-device,netdev="+id) | 
|  | } | 
|  |  | 
|  | // This sets up a minimum viable environment for our Linux kernel. It clears all | 
|  | // standard QEMU configuration and sets up a MicroVM machine | 
|  | // (https://github.com/qemu/qemu/blob/master/docs/microvm.rst) with all legacy | 
|  | // emulation turned off. This means the only "hardware" the Linux kernel inside can | 
|  | // communicate with is a single virtio-mmio region. Over that MMIO interface we run | 
|  | // a paravirtualized RNG (since the kernel in there has nothing to gather that from | 
|  | // and it delays booting), a single paravirtualized console and an arbitrary number | 
|  | // of extra serial ports for talking to various things that might run inside. The | 
|  | // kernel, initramfs and command line are mapped into VM memory at boot time and | 
|  | // not loaded from any sort of disk. Booting and shutting off one of these VMs | 
|  | // takes <100ms. | 
|  | baseArgs := []string{"-nodefaults", "-no-user-config", "-nographic", "-no-reboot", | 
|  | "-accel", "kvm", "-cpu", "host", | 
|  | // Needed until QEMU updates their bundled qboot version (needs | 
|  | // https://github.com/bonzini/qboot/pull/28) | 
|  | "-bios", "external/com_github_bonzini_qboot/bios.bin", | 
|  | "-M", "microvm,x-option-roms=off,pic=off,pit=off,rtc=off,isa-serial=off", | 
|  | "-kernel", opts.KernelPath, | 
|  | // We force using a triple-fault reboot strategy since otherwise the kernel first | 
|  | // tries others (like ACPI) which are not available in this very restricted | 
|  | // environment. Similarly we need to override the boot console since there's | 
|  | // nothing on the ISA bus that the kernel could talk to. We also force quiet for | 
|  | // performance reasons. | 
|  | "-append", "reboot=t console=hvc0 quiet " + opts.Cmdline, | 
|  | "-initrd", opts.InitramfsPath, | 
|  | "-device", "virtio-rng-device,max-bytes=1024,period=1000", | 
|  | "-device", "virtio-serial-device,max_ports=16", | 
|  | "-chardev", "stdio,id=con0", "-device", "virtconsole,chardev=con0", | 
|  | } | 
|  |  | 
|  | if !opts.DisableHostNetworkInterface { | 
|  | qemuNetType := "user" | 
|  | qemuNetConfig := QemuValue{ | 
|  | "id":        {"usernet0"}, | 
|  | "net":       {"10.42.0.0/24"}, | 
|  | "dhcpstart": {"10.42.0.10"}, | 
|  | } | 
|  | if opts.PortMap != nil { | 
|  | qemuNetConfig["hostfwd"] = opts.PortMap.ToQemuForwards() | 
|  | } | 
|  |  | 
|  | baseArgs = append(baseArgs, "-netdev", qemuNetConfig.ToOption(qemuNetType), | 
|  | "-device", "virtio-net-device,netdev=usernet0,mac="+HostInterfaceMAC.String()) | 
|  | } | 
|  |  | 
|  | var stdErrBuf bytes.Buffer | 
|  | cmd := exec.CommandContext(ctx, "qemu-system-x86_64", append(baseArgs, extraArgs...)...) | 
|  | cmd.Stdout = opts.SerialPort | 
|  | cmd.Stderr = &stdErrBuf | 
|  |  | 
|  | cmd.ExtraFiles = append(cmd.ExtraFiles, opts.ExtraChardevs...) | 
|  | cmd.ExtraFiles = append(cmd.ExtraFiles, opts.ExtraNetworkInterfaces...) | 
|  |  | 
|  | err := cmd.Run() | 
|  | // If it's a context error, just quit. There's no way to tell a | 
|  | // killed-due-to-context vs killed-due-to-external-reason error returned by Run, | 
|  | // so we approximate by looking at the context's status. | 
|  | if err != nil && ctx.Err() != nil { | 
|  | return ctx.Err() | 
|  | } | 
|  |  | 
|  | var exerr *exec.ExitError | 
|  | if err != nil && errors.As(err, &exerr) { | 
|  | exerr.Stderr = stdErrBuf.Bytes() | 
|  | newErr := QEMUError(*exerr) | 
|  | return &newErr | 
|  | } | 
|  | return err | 
|  | } | 
|  |  | 
|  | // QEMUError is a special type of ExitError used when QEMU fails. In addition to | 
|  | // normal ExitError features it prints stderr for debugging. | 
|  | type QEMUError exec.ExitError | 
|  |  | 
|  | func (e *QEMUError) Error() string { | 
|  | return fmt.Sprintf("%v: %v", e.String(), string(e.Stderr)) | 
|  | } |