blob: c3fde27adad025e8a45dfdde90f6708569798721 [file] [log] [blame]
Tim Windelschmidt6d33a432025-02-04 14:34:25 +01001// Copyright The Monogon Project Authors.
Lorenz Brun52f7f292020-06-24 16:42:02 +02002// SPDX-License-Identifier: Apache-2.0
Lorenz Brun52f7f292020-06-24 16:42:02 +02003
4// nanoswitch is a virtualized switch/router combo intended for testing.
Serge Bazanski216fe7b2021-05-21 18:36:16 +02005// It uses the first interface as an external interface to connect to the host
6// and pass traffic in and out. All other interfaces are switched together and
7// served by a built-in DHCP server. Traffic from that network to the
8// SLIRP/external network is SNATed as the host-side SLIRP ignores routed
9// packets.
Serge Bazanskibe742842022-04-04 13:18:50 +020010//
11// It also has built-in userspace proxying support for accessing the first
12// node's services, as well as a SOCKS proxy to access all nodes within the
13// network.
Lorenz Brun52f7f292020-06-24 16:42:02 +020014package main
15
16import (
17 "bytes"
18 "context"
19 "fmt"
20 "io"
Lorenz Brun52f7f292020-06-24 16:42:02 +020021 "net"
22 "os"
23 "time"
24
25 "github.com/google/nftables"
26 "github.com/google/nftables/expr"
27 "github.com/insomniacslk/dhcp/dhcpv4"
28 "github.com/insomniacslk/dhcp/dhcpv4/server4"
29 "github.com/vishvananda/netlink"
Lorenz Brun52f7f292020-06-24 16:42:02 +020030
Serge Bazanski31370b02021-01-07 16:31:14 +010031 common "source.monogon.dev/metropolis/node"
32 "source.monogon.dev/metropolis/node/core/network/dhcp4c"
33 dhcpcb "source.monogon.dev/metropolis/node/core/network/dhcp4c/callback"
Tim Windelschmidt4f586b52025-04-02 15:04:10 +020034 "source.monogon.dev/osbase/bringup"
Tim Windelschmidt9f21f532024-05-07 15:14:20 +020035 "source.monogon.dev/osbase/supervisor"
Lorenz Brun52f7f292020-06-24 16:42:02 +020036)
37
Tim Windelschmidtb03d9ff2025-04-02 15:04:03 +020038var (
39 // HostInterfaceMAC is the MAC address the host SLIRP network interface has if it
40 // is not disabled (see DisableHostNetworkInterface in MicroVMOptions)
41 // ONCHANGE(//osbase/test/qemu:launch.go): constraints must be kept in sync with
42 // HostInterfaceMAC.
43 HostInterfaceMAC = net.HardwareAddr{0x02, 0x72, 0x82, 0xbf, 0xc3, 0x56}
44
45 switchIP = net.IP{10, 1, 0, 1}
46 switchSubnetMask = net.CIDRMask(24, 32)
47)
Lorenz Brun52f7f292020-06-24 16:42:02 +020048
Serge Bazanski216fe7b2021-05-21 18:36:16 +020049// defaultLeaseOptions sets the lease options needed to properly configure
50// connectivity to nanoswitch.
Lorenz Brun52f7f292020-06-24 16:42:02 +020051func defaultLeaseOptions(reply *dhcpv4.DHCPv4) {
52 reply.GatewayIPAddr = switchIP
Serge Bazanski216fe7b2021-05-21 18:36:16 +020053 // SLIRP fake DNS server.
54 reply.UpdateOption(dhcpv4.OptDNS(net.IPv4(10, 42, 0, 3)))
Lorenz Brun52f7f292020-06-24 16:42:02 +020055 reply.UpdateOption(dhcpv4.OptRouter(switchIP))
Serge Bazanski216fe7b2021-05-21 18:36:16 +020056 // Make sure we exercise our DHCP client in E2E tests.
57 reply.UpdateOption(dhcpv4.OptIPAddressLeaseTime(30 * time.Second))
Lorenz Brun52f7f292020-06-24 16:42:02 +020058 reply.UpdateOption(dhcpv4.OptSubnetMask(switchSubnetMask))
59}
60
Serge Bazanski216fe7b2021-05-21 18:36:16 +020061// runDHCPServer runs an extremely minimal DHCP server with most options
62// hardcoded, a wrapping bump allocator for the IPs, 30 second lease timeout
63// and no support for DHCP collision detection.
Lorenz Brun52f7f292020-06-24 16:42:02 +020064func runDHCPServer(link netlink.Link) supervisor.Runnable {
Serge Bazanski3e5e5802022-06-21 13:46:31 +020065 currentIP := net.IP{10, 1, 0, 2}
Lorenz Brun52f7f292020-06-24 16:42:02 +020066
Serge Bazanskid279dc02022-05-06 12:17:42 +020067 // Map from stringified MAC address to IP address, allowing handing out the
68 // same IP to a given MAC on re-discovery.
69 leases := make(map[string]net.IP)
70
Lorenz Brun52f7f292020-06-24 16:42:02 +020071 return func(ctx context.Context) error {
72 laddr := net.UDPAddr{
73 IP: net.IPv4(0, 0, 0, 0),
74 Port: 67,
75 }
76 server, err := server4.NewServer(link.Attrs().Name, &laddr, func(conn net.PacketConn, peer net.Addr, m *dhcpv4.DHCPv4) {
77 if m == nil {
78 return
79 }
80 reply, err := dhcpv4.NewReplyFromRequest(m)
81 if err != nil {
Serge Bazanskic7359672020-10-30 16:38:57 +010082 supervisor.Logger(ctx).Warningf("Failed to generate DHCP reply: %v", err)
Lorenz Brun52f7f292020-06-24 16:42:02 +020083 return
84 }
85 reply.UpdateOption(dhcpv4.OptServerIdentifier(switchIP))
86 reply.ServerIPAddr = switchIP
87
88 switch m.MessageType() {
89 case dhcpv4.MessageTypeDiscover:
90 reply.UpdateOption(dhcpv4.OptMessageType(dhcpv4.MessageTypeOffer))
91 defaultLeaseOptions(reply)
Serge Bazanskid279dc02022-05-06 12:17:42 +020092 hwaddr := m.ClientHWAddr.String()
93 // Either hand out already allocated address from leases, or allocate new.
94 if ip, ok := leases[hwaddr]; ok {
95 reply.YourIPAddr = ip
96 } else {
Serge Bazanski3e5e5802022-06-21 13:46:31 +020097 leases[hwaddr] = net.ParseIP(currentIP.String())
98 reply.YourIPAddr = leases[hwaddr]
Serge Bazanskid279dc02022-05-06 12:17:42 +020099 currentIP[3]++ // Works only because it's a /24
100 }
101 supervisor.Logger(ctx).Infof("Replying with DHCP IP %s to %s", reply.YourIPAddr.String(), hwaddr)
Lorenz Brun52f7f292020-06-24 16:42:02 +0200102 case dhcpv4.MessageTypeRequest:
103 reply.UpdateOption(dhcpv4.OptMessageType(dhcpv4.MessageTypeAck))
104 defaultLeaseOptions(reply)
Lorenz Brundbac6cc2020-11-30 10:57:26 +0100105 if m.RequestedIPAddress() != nil {
106 reply.YourIPAddr = m.RequestedIPAddress()
107 } else {
108 reply.YourIPAddr = m.ClientIPAddr
109 }
Lorenz Brun52f7f292020-06-24 16:42:02 +0200110 case dhcpv4.MessageTypeRelease, dhcpv4.MessageTypeDecline:
111 supervisor.Logger(ctx).Info("Ignoring Release/Decline")
112 }
113 if _, err := conn.WriteTo(reply.ToBytes(), peer); err != nil {
Serge Bazanskic7359672020-10-30 16:38:57 +0100114 supervisor.Logger(ctx).Warningf("Cannot reply to client: %v", err)
Lorenz Brun52f7f292020-06-24 16:42:02 +0200115 }
116 })
117 if err != nil {
118 return err
119 }
120 supervisor.Signal(ctx, supervisor.SignalHealthy)
121 go func() {
122 <-ctx.Done()
123 server.Close()
124 }()
125 return server.Serve()
126 }
127}
128
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200129// userspaceProxy listens on port and proxies all TCP connections to the same
130// port on targetIP
Serge Bazanski52304a82021-10-29 16:56:18 +0200131func userspaceProxy(targetIP net.IP, port common.Port) supervisor.Runnable {
Lorenz Brun52f7f292020-06-24 16:42:02 +0200132 return func(ctx context.Context) error {
133 logger := supervisor.Logger(ctx)
134 tcpListener, err := net.ListenTCP("tcp", &net.TCPAddr{IP: net.IPv4(0, 0, 0, 0), Port: int(port)})
135 if err != nil {
136 return err
137 }
138 supervisor.Signal(ctx, supervisor.SignalHealthy)
139 go func() {
140 <-ctx.Done()
141 tcpListener.Close()
142 }()
143 for {
144 conn, err := tcpListener.AcceptTCP()
145 if err != nil {
146 if ctx.Err() != nil {
147 return ctx.Err()
148 }
149 return err
150 }
151 go func(conn *net.TCPConn) {
152 defer conn.Close()
153 upstreamConn, err := net.DialTCP("tcp", nil, &net.TCPAddr{IP: targetIP, Port: int(port)})
154 if err != nil {
Serge Bazanskic7359672020-10-30 16:38:57 +0100155 logger.Infof("Userspace proxy failed to connect to upstream: %v", err)
Lorenz Brun52f7f292020-06-24 16:42:02 +0200156 return
157 }
158 defer upstreamConn.Close()
159 go io.Copy(upstreamConn, conn)
160 io.Copy(conn, upstreamConn)
161 }(conn)
162 }
163
164 }
165}
166
167// addNetworkRoutes sets up routing from DHCP
168func addNetworkRoutes(link netlink.Link, addr net.IPNet, gw net.IP) error {
169 if err := netlink.AddrReplace(link, &netlink.Addr{IPNet: &addr}); err != nil {
170 return fmt.Errorf("failed to add DHCP address to network interface \"%v\": %w", link.Attrs().Name, err)
171 }
172
173 if gw.IsUnspecified() {
174 return nil
175 }
176
177 route := &netlink.Route{
178 Dst: &net.IPNet{IP: net.IPv4(0, 0, 0, 0), Mask: net.IPv4Mask(0, 0, 0, 0)},
179 Gw: gw,
180 Scope: netlink.SCOPE_UNIVERSE,
181 }
182 if err := netlink.RouteAdd(route); err != nil {
Tim Windelschmidtadcf5d72024-05-21 13:46:25 +0200183 return fmt.Errorf("could not add default route: netlink.RouteAdd(%+v): %w", route, err)
Lorenz Brun52f7f292020-06-24 16:42:02 +0200184 }
185 return nil
186}
187
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200188// nfifname converts an interface name into 16 bytes padded with zeroes (for
189// nftables)
Lorenz Brun52f7f292020-06-24 16:42:02 +0200190func nfifname(n string) []byte {
191 b := make([]byte, 16)
Tim Windelschmidt5e460a92024-04-11 01:33:09 +0200192 copy(b, n+"\x00")
Lorenz Brun52f7f292020-06-24 16:42:02 +0200193 return b
194}
195
196func main() {
Tim Windelschmidt4f586b52025-04-02 15:04:10 +0200197 bringup.Runnable(root).Run()
198}
199
200func root(ctx context.Context) (err error) {
201 logger := supervisor.Logger(ctx)
202 logger.Info("Starting NanoSwitch, a tiny TOR switch emulator")
203
204 c := &nftables.Conn{}
205
206 links, err := netlink.LinkList()
Lorenz Brundf952412020-12-21 14:59:36 +0100207 if err != nil {
Tim Windelschmidt4f586b52025-04-02 15:04:10 +0200208 logger.Fatalf("Failed to list links: %v", err)
Lorenz Brundf952412020-12-21 14:59:36 +0100209 }
Tim Windelschmidt4f586b52025-04-02 15:04:10 +0200210 var externalLink netlink.Link
211 var vmLinks []netlink.Link
212 for _, link := range links {
213 attrs := link.Attrs()
214 if link.Type() == "device" && len(attrs.HardwareAddr) > 0 {
215 if attrs.Flags&net.FlagUp != net.FlagUp {
216 netlink.LinkSetUp(link) // Attempt to take up all ethernet links
Lorenz Brun52f7f292020-06-24 16:42:02 +0200217 }
Tim Windelschmidtb03d9ff2025-04-02 15:04:03 +0200218 if bytes.Equal(attrs.HardwareAddr, HostInterfaceMAC) {
Tim Windelschmidt4f586b52025-04-02 15:04:10 +0200219 externalLink = link
220 } else {
221 vmLinks = append(vmLinks, link)
Lorenz Brun52f7f292020-06-24 16:42:02 +0200222 }
223 }
Tim Windelschmidt4f586b52025-04-02 15:04:10 +0200224 }
225 vmBridgeLink := &netlink.Bridge{LinkAttrs: netlink.LinkAttrs{Name: "vmbridge", Flags: net.FlagUp}}
226 if err := netlink.LinkAdd(vmBridgeLink); err != nil {
227 logger.Fatalf("Failed to create vmbridge: %v", err)
228 }
229 for _, link := range vmLinks {
230 if err := netlink.LinkSetMaster(link, vmBridgeLink); err != nil {
231 logger.Fatalf("Failed to add VM interface to bridge: %v", err)
232 }
233 logger.Infof("Assigned interface %s to bridge", link.Attrs().Name)
234 }
235 if err := netlink.AddrReplace(vmBridgeLink, &netlink.Addr{IPNet: &net.IPNet{IP: switchIP, Mask: switchSubnetMask}}); err != nil {
236 logger.Fatalf("Failed to assign static IP to vmbridge: %v", err)
237 }
238 if externalLink != nil {
239 nat := c.AddTable(&nftables.Table{
240 Family: nftables.TableFamilyIPv4,
241 Name: "nat",
242 })
Lorenz Brun52f7f292020-06-24 16:42:02 +0200243
Tim Windelschmidt4f586b52025-04-02 15:04:10 +0200244 postrouting := c.AddChain(&nftables.Chain{
245 Name: "postrouting",
246 Hooknum: nftables.ChainHookPostrouting,
247 Priority: nftables.ChainPriorityNATSource,
248 Table: nat,
249 Type: nftables.ChainTypeNAT,
250 })
Lorenz Brun52f7f292020-06-24 16:42:02 +0200251
Tim Windelschmidt4f586b52025-04-02 15:04:10 +0200252 // Masquerade/SNAT all traffic going out of the external interface
253 c.AddRule(&nftables.Rule{
254 Table: nat,
255 Chain: postrouting,
256 Exprs: []expr.Any{
257 &expr.Meta{Key: expr.MetaKeyOIFNAME, Register: 1},
258 &expr.Cmp{
259 Op: expr.CmpOpEq,
260 Register: 1,
261 Data: nfifname(externalLink.Attrs().Name),
Lorenz Brun52f7f292020-06-24 16:42:02 +0200262 },
Tim Windelschmidt4f586b52025-04-02 15:04:10 +0200263 &expr.Masq{},
264 },
265 })
Lorenz Brun52f7f292020-06-24 16:42:02 +0200266
Tim Windelschmidt4f586b52025-04-02 15:04:10 +0200267 if err := c.Flush(); err != nil {
268 panic(err)
Lorenz Brun52f7f292020-06-24 16:42:02 +0200269 }
Tim Windelschmidt4f586b52025-04-02 15:04:10 +0200270
271 netIface := &net.Interface{
272 Name: externalLink.Attrs().Name,
273 MTU: externalLink.Attrs().MTU,
274 Index: externalLink.Attrs().Index,
275 Flags: externalLink.Attrs().Flags,
276 HardwareAddr: externalLink.Attrs().HardwareAddr,
277 }
278 dhcpClient, err := dhcp4c.NewClient(netIface)
279 if err != nil {
280 logger.Fatalf("Failed to create DHCP client: %v", err)
281 }
282 dhcpClient.RequestedOptions = []dhcpv4.OptionCode{dhcpv4.OptionRouter}
283 dhcpClient.LeaseCallback = dhcpcb.Compose(dhcpcb.ManageIP(externalLink), dhcpcb.ManageRoutes(externalLink))
284 supervisor.Run(ctx, "dhcp-client", dhcpClient.Run)
285 if err := os.WriteFile("/proc/sys/net/ipv4/ip_forward", []byte("1\n"), 0644); err != nil {
286 logger.Fatalf("Failed to write ip forwards: %v", err)
287 }
288 } else {
289 logger.Info("No upstream interface detected")
290 }
291 supervisor.Run(ctx, "dhcp-server", runDHCPServer(vmBridgeLink))
292 supervisor.Run(ctx, "proxy-cur1", userspaceProxy(net.IPv4(10, 1, 0, 2), common.CuratorServicePort))
293 supervisor.Run(ctx, "proxy-dbg1", userspaceProxy(net.IPv4(10, 1, 0, 2), common.DebugServicePort))
294 supervisor.Run(ctx, "proxy-k8s-api1", userspaceProxy(net.IPv4(10, 1, 0, 2), common.KubernetesAPIPort))
295 supervisor.Run(ctx, "proxy-k8s-api-wrapped1", userspaceProxy(net.IPv4(10, 1, 0, 2), common.KubernetesAPIWrappedPort))
296 supervisor.Run(ctx, "socks", runSOCKSProxy)
297 supervisor.Signal(ctx, supervisor.SignalHealthy)
298 supervisor.Signal(ctx, supervisor.SignalDone)
299 return nil
Lorenz Brun52f7f292020-06-24 16:42:02 +0200300}