blob: 9ce6d495325aeb93f9a0a4f21266c79d8cf6b601 [file] [log] [blame]
Serge Bazanski93d593b2023-03-28 16:43:47 +02001package clusternet
2
3import (
4 "fmt"
5 "net"
6 "os"
7
8 "github.com/vishvananda/netlink"
9 "golang.zx2c4.com/wireguard/wgctrl"
10 "golang.zx2c4.com/wireguard/wgctrl/wgtypes"
11
12 common "source.monogon.dev/metropolis/node"
13 "source.monogon.dev/metropolis/node/core/localstorage"
14)
15
16const (
17 // clusterNetDevicename is the name of the WireGuard interface that will be
18 // created in the host network namespace.
19 clusterNetDeviceName = "clusternet"
20)
21
22// wireguard decouples the cluster networking service from actual mutations
23// performed in the local Linux networking namespace. This is mostly done to help
24// in testing the cluster networking service.
25//
26// Because it's effectively just a mockable interface, see the actual
27// localWireguard method implementations for documentation.
28type wireguard interface {
29 ensureOnDiskKey(dir *localstorage.DataKubernetesClusterNetworkingDirectory) error
30 setup(clusterNet *net.IPNet) error
31 configurePeers(n []*node) error
32 unconfigurePeer(n *node) error
33 key() wgtypes.Key
34 close()
35}
36
37type localWireguard struct {
38 wgClient *wgctrl.Client
39 privKey wgtypes.Key
40}
41
42// ensureOnDiskKey loads the private key from disk or (if none exists) generates
43// one and persists it. The resulting key is then saved into the localWireguard
44// instance.
45func (s *localWireguard) ensureOnDiskKey(dir *localstorage.DataKubernetesClusterNetworkingDirectory) error {
46 keyRaw, err := dir.Key.Read()
47 if os.IsNotExist(err) {
48 key, err := wgtypes.GeneratePrivateKey()
49 if err != nil {
50 return fmt.Errorf("when generating key: %w", err)
51 }
52 if err := dir.Key.Write([]byte(key.String()), 0600); err != nil {
53 return fmt.Errorf("save failed: %w", err)
54 }
55 s.privKey = key
56 return nil
57 } else if err != nil {
58 return fmt.Errorf("load failed: %w", err)
59 }
60
61 key, err := wgtypes.ParseKey(string(keyRaw))
62 if err != nil {
63 return fmt.Errorf("invalid private key in file: %w", err)
64 }
65 s.privKey = key
66 return nil
67}
68
69// setup the local network namespace by creating a WireGuard interface and adding
70// a clusterNet route to it. If a matching WireGuard interface already exists in
71// the system, it is first deleted.
72//
73// ensureOnDiskKey must be called before calling this function.
74func (s *localWireguard) setup(clusterNet *net.IPNet) error {
75 links, err := netlink.LinkList()
76 if err != nil {
77 return fmt.Errorf("could not list links: %w", err)
78 }
79 for _, link := range links {
80 if link.Attrs().Name != clusterNetDeviceName {
81 continue
82 }
83 if err := netlink.LinkDel(link); err != nil {
84 return fmt.Errorf("could not remove existing clusternet link: %w", err)
85 }
86 }
87
88 wgInterface := &netlink.Wireguard{LinkAttrs: netlink.LinkAttrs{Name: clusterNetDeviceName, Flags: net.FlagUp}}
89 if err := netlink.LinkAdd(wgInterface); err != nil {
90 return fmt.Errorf("when adding network interface: %w", err)
91 }
92
93 wgClient, err := wgctrl.New()
94 if err != nil {
95 return fmt.Errorf("when creating wireguard client: %w", err)
96 }
97 s.wgClient = wgClient
98
99 listenPort := int(common.WireGuardPort)
100 if err := s.wgClient.ConfigureDevice(clusterNetDeviceName, wgtypes.Config{
101 PrivateKey: &s.privKey,
102 ListenPort: &listenPort,
103 }); err != nil {
104 return fmt.Errorf("when setting up device: %w", err)
105 }
106
107 if err := netlink.RouteAdd(&netlink.Route{
108 Dst: clusterNet,
109 LinkIndex: wgInterface.Index,
110 Protocol: common.ProtocolClusternet,
111 }); err != nil && !os.IsExist(err) {
112 return fmt.Errorf("when creating cluster route: %w", err)
113 }
114 return nil
115}
116
117// configurePeers creates or updates a peers on the local wireguard interface
118// based on the given nodes.
119//
120// If any node is somehow invalid and causes a parse/reconfiguration error, the
121// function will return an error. The caller should retry with a different set of
122// nodes, performing search/bisection on its own.
123func (s *localWireguard) configurePeers(nodes []*node) error {
124 var configs []wgtypes.PeerConfig
125
126 for i, n := range nodes {
127 if s.privKey.PublicKey().String() == n.pubkey {
128 // Node doesn't need to connect to itself
129 continue
130 }
131 pubkeyParsed, err := wgtypes.ParseKey(n.pubkey)
132 if err != nil {
133 return fmt.Errorf("node %d: failed to parse public-key %q: %w", i, n.pubkey, err)
134 }
135 addressParsed := net.ParseIP(n.address)
136 if addressParsed == nil {
137 return fmt.Errorf("node %d: failed to parse address %q: %w", i, n.address, err)
138 }
139 var allowedIPs []net.IPNet
140 for _, prefix := range n.prefixes {
141 _, podNet, err := net.ParseCIDR(prefix)
142 if err != nil {
143 // Just eat the parse error. Not much we can do here. We have enough validation
144 // in the rest of the system that we shouldn't ever reach this.
145 continue
146 }
147 allowedIPs = append(allowedIPs, *podNet)
148 }
149 endpoint := net.UDPAddr{Port: int(common.WireGuardPort), IP: addressParsed}
150 configs = append(configs, wgtypes.PeerConfig{
151 PublicKey: pubkeyParsed,
152 Endpoint: &endpoint,
153 ReplaceAllowedIPs: true,
154 AllowedIPs: allowedIPs,
155 })
156 }
157
158 err := s.wgClient.ConfigureDevice(clusterNetDeviceName, wgtypes.Config{
159 Peers: configs,
160 })
161 if err != nil {
162 return fmt.Errorf("failed to configure WireGuard peers: %w", err)
163 }
164 return nil
165}
166
167// unconfigurePeer removes the peer from the local WireGuard interface based on
168// the given node. If no peer existed matching the given node, this operation is
169// a no-op.
170func (s *localWireguard) unconfigurePeer(n *node) error {
171 pubkeyParsed, err := wgtypes.ParseKey(n.pubkey)
172 if err != nil {
173 return fmt.Errorf("failed to parse public-key %q: %w", n.pubkey, err)
174 }
175
176 err = s.wgClient.ConfigureDevice(clusterNetDeviceName, wgtypes.Config{
177 Peers: []wgtypes.PeerConfig{{
178 PublicKey: pubkeyParsed,
179 Remove: true,
180 }},
181 })
182 if err != nil {
183 return fmt.Errorf("failed to delete WireGuard peer: %w", err)
184 }
185 return nil
186}
187
188func (s *localWireguard) key() wgtypes.Key {
189 return s.privKey
190}
191
192// close cleans up after the wireguard client, but does _not_ remove the
193// interface or peers.
194func (s *localWireguard) close() {
195 s.wgClient.Close()
196 s.wgClient = nil
197}