blob: b92cfb27bcd6890942b2f5f6311de9c3306037b2 [file] [log] [blame]
Serge Bazanski93d593b2023-03-28 16:43:47 +02001package clusternet
2
3import (
Serge Bazanski93d593b2023-03-28 16:43:47 +02004 "fmt"
5 "net"
6 "os"
Serge Bazanski60461b22023-10-26 19:16:59 +02007 "slices"
8 "sort"
Serge Bazanski93d593b2023-03-28 16:43:47 +02009 "sync"
10 "testing"
11 "time"
12
13 "golang.zx2c4.com/wireguard/wgctrl"
14 "golang.zx2c4.com/wireguard/wgctrl/wgtypes"
Serge Bazanski93d593b2023-03-28 16:43:47 +020015
16 common "source.monogon.dev/metropolis/node"
17 "source.monogon.dev/metropolis/node/core/localstorage"
18 "source.monogon.dev/metropolis/node/core/localstorage/declarative"
Serge Bazanskib565cc62023-03-30 18:43:51 +020019 "source.monogon.dev/metropolis/node/core/network"
Serge Bazanski93d593b2023-03-28 16:43:47 +020020 "source.monogon.dev/metropolis/pkg/event/memory"
21 "source.monogon.dev/metropolis/pkg/supervisor"
Tim Windelschmidt5d0906e2023-07-20 20:23:57 +020022 "source.monogon.dev/metropolis/test/util"
Serge Bazanski93d593b2023-03-28 16:43:47 +020023
24 apb "source.monogon.dev/metropolis/node/core/curator/proto/api"
Serge Bazanski60461b22023-10-26 19:16:59 +020025 cpb "source.monogon.dev/metropolis/proto/common"
Serge Bazanski93d593b2023-03-28 16:43:47 +020026)
27
Serge Bazanski93d593b2023-03-28 16:43:47 +020028// fakeWireguard implements wireguard while keeping peer information internally.
29type fakeWireguard struct {
30 k wgtypes.Key
31
32 muNodes sync.Mutex
Serge Bazanski60461b22023-10-26 19:16:59 +020033 nodes map[string]*apb.Node
Serge Bazanski93d593b2023-03-28 16:43:47 +020034 failNextUpdate bool
35}
36
37func (f *fakeWireguard) ensureOnDiskKey(_ *localstorage.DataKubernetesClusterNetworkingDirectory) error {
38 f.k, _ = wgtypes.GeneratePrivateKey()
39 return nil
40}
41
42func (f *fakeWireguard) setup(clusterNet *net.IPNet) error {
43 f.muNodes.Lock()
44 defer f.muNodes.Unlock()
Serge Bazanski60461b22023-10-26 19:16:59 +020045 f.nodes = make(map[string]*apb.Node)
Serge Bazanski93d593b2023-03-28 16:43:47 +020046 return nil
47}
48
Serge Bazanski60461b22023-10-26 19:16:59 +020049func (f *fakeWireguard) configurePeers(nodes []*apb.Node) error {
Serge Bazanski93d593b2023-03-28 16:43:47 +020050 f.muNodes.Lock()
51 defer f.muNodes.Unlock()
Serge Bazanski60461b22023-10-26 19:16:59 +020052
Serge Bazanski93d593b2023-03-28 16:43:47 +020053 if f.failNextUpdate {
54 f.failNextUpdate = false
55 return fmt.Errorf("synthetic test failure")
56 }
Serge Bazanski60461b22023-10-26 19:16:59 +020057
Serge Bazanski93d593b2023-03-28 16:43:47 +020058 for _, n := range nodes {
Serge Bazanski60461b22023-10-26 19:16:59 +020059 f.nodes[n.Id] = n
Serge Bazanski93d593b2023-03-28 16:43:47 +020060 }
61 return nil
62}
63
Serge Bazanski60461b22023-10-26 19:16:59 +020064func (f *fakeWireguard) unconfigurePeer(node *apb.Node) error {
Serge Bazanski93d593b2023-03-28 16:43:47 +020065 f.muNodes.Lock()
66 defer f.muNodes.Unlock()
Serge Bazanski60461b22023-10-26 19:16:59 +020067 delete(f.nodes, node.Id)
Serge Bazanski93d593b2023-03-28 16:43:47 +020068 return nil
69}
70
71func (f *fakeWireguard) key() wgtypes.Key {
72 return f.k
73}
74
75func (f *fakeWireguard) close() {
76}
77
78// TestClusternetBasic exercises clusternet with a fake curator and fake
79// wireguard, trying to exercise as many edge cases as possible.
80func TestClusternetBasic(t *testing.T) {
81 key1, err := wgtypes.GeneratePrivateKey()
82 if err != nil {
83 t.Fatalf("Failed to generate private key: %v", err)
84 }
85 key2, err := wgtypes.GeneratePrivateKey()
86 if err != nil {
87 t.Fatalf("Failed to generate private key: %v", err)
88 }
89
Tim Windelschmidt5d0906e2023-07-20 20:23:57 +020090 cur, cl := util.MakeTestCurator(t)
Serge Bazanski93d593b2023-03-28 16:43:47 +020091 defer cl.Close()
92 curator := apb.NewCuratorClient(cl)
93
Serge Bazanskib565cc62023-03-30 18:43:51 +020094 var nval memory.Value[*network.Status]
95
Serge Bazanski93d593b2023-03-28 16:43:47 +020096 var podNetwork memory.Value[*Prefixes]
97 wg := &fakeWireguard{}
98 svc := Service{
99 Curator: curator,
100 ClusterNet: net.IPNet{
101 IP: net.IP([]byte{10, 10, 0, 0}),
102 Mask: net.IPv4Mask(255, 255, 0, 0),
103 },
104 DataDirectory: nil,
105 LocalKubernetesPodNetwork: &podNetwork,
Serge Bazanskib565cc62023-03-30 18:43:51 +0200106 Network: &nval,
Serge Bazanski93d593b2023-03-28 16:43:47 +0200107
108 wg: wg,
109 }
110 supervisor.TestHarness(t, svc.Run)
111
Serge Bazanski60461b22023-10-26 19:16:59 +0200112 checkState := func(nodes map[string]*apb.Node) error {
Serge Bazanski93d593b2023-03-28 16:43:47 +0200113 t.Helper()
114 wg.muNodes.Lock()
115 defer wg.muNodes.Unlock()
116 for nid, n := range nodes {
117 n2, ok := wg.nodes[nid]
118 if !ok {
119 return fmt.Errorf("node %q missing in programmed peers", nid)
120 }
Serge Bazanski60461b22023-10-26 19:16:59 +0200121 if got, want := n2.Clusternet.WireguardPubkey, n.Clusternet.WireguardPubkey; got != want {
122 return fmt.Errorf("node %q pubkey mismatch: %q in programmed peers, %q wanted", nid, got, want)
Serge Bazanski93d593b2023-03-28 16:43:47 +0200123 }
Serge Bazanski60461b22023-10-26 19:16:59 +0200124 if got, want := n2.Status.ExternalAddress, n.Status.ExternalAddress; got != want {
125 return fmt.Errorf("node %q address mismatch: %q in programmed peers, %q wanted", nid, got, want)
Serge Bazanski93d593b2023-03-28 16:43:47 +0200126 }
Serge Bazanski60461b22023-10-26 19:16:59 +0200127 var p, p2 []string
128 for _, prefix := range n.Clusternet.Prefixes {
129 p = append(p, prefix.Cidr)
130 }
131 for _, prefix := range n2.Clusternet.Prefixes {
132 p2 = append(p2, prefix.Cidr)
133 }
134 sort.Strings(p)
135 sort.Strings(p2)
136 if !slices.Equal(p, p2) {
137 return fmt.Errorf("node %q prefixes mismatch: %v in programmed peers, %v wanted", nid, p2, p)
Serge Bazanski93d593b2023-03-28 16:43:47 +0200138 }
139 }
140 for nid, _ := range wg.nodes {
141 if _, ok := nodes[nid]; !ok {
142 return fmt.Errorf("node %q present in programmed peers", nid)
143 }
144 }
145 return nil
146 }
147
Serge Bazanski60461b22023-10-26 19:16:59 +0200148 assertStateEventual := func(nodes map[string]*apb.Node) {
Serge Bazanski93d593b2023-03-28 16:43:47 +0200149 t.Helper()
150 deadline := time.Now().Add(5 * time.Second)
151 for {
152 err := checkState(nodes)
153 if err == nil {
154 break
155 }
156 if time.Now().After(deadline) {
157 t.Error(err)
158 return
159 }
160 }
161
162 }
163
164 // Start with a single node.
Tim Windelschmidt5d0906e2023-07-20 20:23:57 +0200165 cur.NodeWithPrefixes(key1, "metropolis-fake-1", "1.2.3.4")
Serge Bazanski60461b22023-10-26 19:16:59 +0200166 assertStateEventual(map[string]*apb.Node{
Serge Bazanski93d593b2023-03-28 16:43:47 +0200167 "metropolis-fake-1": {
Serge Bazanski60461b22023-10-26 19:16:59 +0200168 Status: &cpb.NodeStatus{
169 ExternalAddress: "1.2.3.4",
170 },
171 Clusternet: &cpb.NodeClusterNetworking{
172 WireguardPubkey: key1.PublicKey().String(),
173 },
Serge Bazanski93d593b2023-03-28 16:43:47 +0200174 },
175 })
176 // Change the node's peer address.
Tim Windelschmidt5d0906e2023-07-20 20:23:57 +0200177 cur.NodeWithPrefixes(key1, "metropolis-fake-1", "1.2.3.5")
Serge Bazanski60461b22023-10-26 19:16:59 +0200178 assertStateEventual(map[string]*apb.Node{
Serge Bazanski93d593b2023-03-28 16:43:47 +0200179 "metropolis-fake-1": {
Serge Bazanski60461b22023-10-26 19:16:59 +0200180 Status: &cpb.NodeStatus{
181 ExternalAddress: "1.2.3.5",
182 },
183 Clusternet: &cpb.NodeClusterNetworking{
184 WireguardPubkey: key1.PublicKey().String(),
185 },
Serge Bazanski93d593b2023-03-28 16:43:47 +0200186 },
187 })
188 // Add another node.
Tim Windelschmidt5d0906e2023-07-20 20:23:57 +0200189 cur.NodeWithPrefixes(key2, "metropolis-fake-2", "1.2.3.6")
Serge Bazanski60461b22023-10-26 19:16:59 +0200190 assertStateEventual(map[string]*apb.Node{
Serge Bazanski93d593b2023-03-28 16:43:47 +0200191 "metropolis-fake-1": {
Serge Bazanski60461b22023-10-26 19:16:59 +0200192 Status: &cpb.NodeStatus{
193 ExternalAddress: "1.2.3.5",
194 },
195 Clusternet: &cpb.NodeClusterNetworking{
196 WireguardPubkey: key1.PublicKey().String(),
197 },
Serge Bazanski93d593b2023-03-28 16:43:47 +0200198 },
199 "metropolis-fake-2": {
Serge Bazanski60461b22023-10-26 19:16:59 +0200200 Status: &cpb.NodeStatus{
201 ExternalAddress: "1.2.3.6",
202 },
203 Clusternet: &cpb.NodeClusterNetworking{
204 WireguardPubkey: key2.PublicKey().String(),
205 },
Serge Bazanski93d593b2023-03-28 16:43:47 +0200206 },
207 })
208 // Add some prefixes to both nodes, but fail the next configurePeers call.
209 wg.muNodes.Lock()
210 wg.failNextUpdate = true
211 wg.muNodes.Unlock()
Tim Windelschmidt5d0906e2023-07-20 20:23:57 +0200212 cur.NodeWithPrefixes(key1, "metropolis-fake-1", "1.2.3.5", "10.100.10.0/24", "10.100.20.0/24")
213 cur.NodeWithPrefixes(key2, "metropolis-fake-2", "1.2.3.6", "10.100.30.0/24", "10.100.40.0/24")
Serge Bazanski60461b22023-10-26 19:16:59 +0200214 assertStateEventual(map[string]*apb.Node{
Serge Bazanski93d593b2023-03-28 16:43:47 +0200215 "metropolis-fake-1": {
Serge Bazanski60461b22023-10-26 19:16:59 +0200216 Status: &cpb.NodeStatus{
217 ExternalAddress: "1.2.3.5",
218 },
219 Clusternet: &cpb.NodeClusterNetworking{
220 WireguardPubkey: key1.PublicKey().String(),
221 // No prefixes as the call failed.
Serge Bazanski93d593b2023-03-28 16:43:47 +0200222 },
223 },
224 "metropolis-fake-2": {
Serge Bazanski60461b22023-10-26 19:16:59 +0200225 Status: &cpb.NodeStatus{
226 ExternalAddress: "1.2.3.6",
227 },
228 Clusternet: &cpb.NodeClusterNetworking{
229 WireguardPubkey: key2.PublicKey().String(),
230 Prefixes: []*cpb.NodeClusterNetworking_Prefix{
231 {Cidr: "10.100.30.0/24"},
232 {Cidr: "10.100.40.0/24"},
233 },
Serge Bazanski93d593b2023-03-28 16:43:47 +0200234 },
235 },
236 })
237 // Delete one of the nodes.
Tim Windelschmidt5d0906e2023-07-20 20:23:57 +0200238 cur.DeleteNode("metropolis-fake-1")
Serge Bazanski60461b22023-10-26 19:16:59 +0200239 assertStateEventual(map[string]*apb.Node{
Serge Bazanski93d593b2023-03-28 16:43:47 +0200240 "metropolis-fake-2": {
Serge Bazanski60461b22023-10-26 19:16:59 +0200241 Status: &cpb.NodeStatus{
242 ExternalAddress: "1.2.3.6",
243 },
244 Clusternet: &cpb.NodeClusterNetworking{
245 WireguardPubkey: key2.PublicKey().String(),
246 Prefixes: []*cpb.NodeClusterNetworking_Prefix{
247 {Cidr: "10.100.30.0/24"},
248 {Cidr: "10.100.40.0/24"},
249 },
Serge Bazanski93d593b2023-03-28 16:43:47 +0200250 },
251 },
252 })
253}
254
255// TestWireguardImplementation makes sure localWireguard behaves as expected.
256func TestWireguardIntegration(t *testing.T) {
257 if os.Getenv("IN_KTEST") != "true" {
258 t.Skip("Not in ktest")
259 }
260
261 root := &localstorage.Root{}
262 tmp, err := os.MkdirTemp("", "clusternet")
263 if err != nil {
264 t.Fatal(err)
265 }
266 err = declarative.PlaceFS(root, tmp)
267 if err != nil {
268 t.Fatal(err)
269 }
270 os.MkdirAll(root.Data.Kubernetes.ClusterNetworking.FullPath(), 0700)
271 wg := &localWireguard{}
272
273 // Ensure key once and make note of it.
274 if err := wg.ensureOnDiskKey(&root.Data.Kubernetes.ClusterNetworking); err != nil {
275 t.Fatalf("Could not ensure wireguard key: %v", err)
276 }
277 key := wg.key().String()
278 // Do it again, and make sure the key hasn't changed.
279 wg = &localWireguard{}
280 if err := wg.ensureOnDiskKey(&root.Data.Kubernetes.ClusterNetworking); err != nil {
281 t.Fatalf("Could not ensure wireguard key second time: %v", err)
282 }
283 if want, got := key, wg.key().String(); want != got {
284 t.Fatalf("Key changed, was %q, became %q", want, got)
285 }
286
287 // Setup the interface.
288 cnet := net.IPNet{
289 IP: net.IP([]byte{10, 10, 0, 0}),
290 Mask: net.IPv4Mask(255, 255, 0, 0),
291 }
292 if err := wg.setup(&cnet); err != nil {
293 t.Fatalf("Failed to setup interface: %v", err)
294 }
295 // Do it again.
296 wg.close()
297 if err := wg.setup(&cnet); err != nil {
298 t.Fatalf("Failed to setup interface second time: %v", err)
299 }
300
301 // Check that the key and listen port are configured correctly.
302 wgClient, err := wgctrl.New()
303 if err != nil {
304 t.Fatalf("Failed to create wireguard client: %v", err)
305 }
306 wgDev, err := wgClient.Device(clusterNetDeviceName)
307 if err != nil {
308 t.Fatalf("Failed to connect to netlink's WireGuard config endpoint: %v", err)
309 }
310 if want, got := key, wgDev.PrivateKey.String(); want != got {
311 t.Errorf("Wireguard key mismatch, wanted %q, got %q", want, got)
312 }
313 if want, got := int(common.WireGuardPort), wgDev.ListenPort; want != got {
314 t.Errorf("Wireguard port mismatch, wanted %d, got %d", want, got)
315 }
316
317 // Add some peers and check that we got them.
318 pkeys := make([]wgtypes.Key, 2)
319 pkeys[0], err = wgtypes.GeneratePrivateKey()
320 if err != nil {
321 t.Fatalf("Failed to generate private key: %v", err)
322 }
323 pkeys[1], err = wgtypes.GeneratePrivateKey()
324 if err != nil {
325 t.Fatalf("Failed to generate private key: %v", err)
326 }
Serge Bazanski60461b22023-10-26 19:16:59 +0200327 err = wg.configurePeers([]*apb.Node{
Serge Bazanski93d593b2023-03-28 16:43:47 +0200328 {
Serge Bazanski60461b22023-10-26 19:16:59 +0200329 Id: "test-0",
330 Status: &cpb.NodeStatus{
331 ExternalAddress: "10.100.0.1",
332 },
333 Clusternet: &cpb.NodeClusterNetworking{
334 WireguardPubkey: pkeys[0].PublicKey().String(),
335 Prefixes: []*cpb.NodeClusterNetworking_Prefix{
336 {Cidr: "10.0.0.0/24"},
337 {Cidr: "10.0.1.0/24"},
338 },
Serge Bazanski93d593b2023-03-28 16:43:47 +0200339 },
340 },
341 {
Serge Bazanski60461b22023-10-26 19:16:59 +0200342 Id: "test-1",
343 Status: &cpb.NodeStatus{
344 ExternalAddress: "10.100.1.1",
345 },
346 Clusternet: &cpb.NodeClusterNetworking{
347 WireguardPubkey: pkeys[1].PublicKey().String(),
348 Prefixes: []*cpb.NodeClusterNetworking_Prefix{
349 {Cidr: "10.1.0.0/24"},
350 {Cidr: "10.1.1.0/24"},
351 },
Serge Bazanski93d593b2023-03-28 16:43:47 +0200352 },
353 },
354 })
355 if err != nil {
356 t.Fatalf("Configuring peers failed: %v", err)
357 }
358
359 wgDev, err = wgClient.Device(clusterNetDeviceName)
360 if err != nil {
361 t.Fatalf("Failed to connect to netlink's WireGuard config endpoint: %v", err)
362 }
363 if want, got := 2, len(wgDev.Peers); want != got {
364 t.Errorf("Wanted %d peers, got %d", want, got)
365 } else {
366 for i := 0; i < 2; i++ {
367 if want, got := pkeys[i].PublicKey().String(), wgDev.Peers[i].PublicKey.String(); want != got {
368 t.Errorf("Peer %d should have key %q, got %q", i, want, got)
369 }
370 if want, got := fmt.Sprintf("10.100.%d.1:%s", i, common.WireGuardPort.PortString()), wgDev.Peers[i].Endpoint.String(); want != got {
371 t.Errorf("Peer %d should have endpoint %q, got %q", i, want, got)
372 }
373 if want, got := 2, len(wgDev.Peers[i].AllowedIPs); want != got {
374 t.Errorf("Peer %d should have %d peers, got %d", i, want, got)
375 } else {
376 for j := 0; j < 2; j++ {
377 if want, got := fmt.Sprintf("10.%d.%d.0/24", i, j), wgDev.Peers[i].AllowedIPs[j].String(); want != got {
378 t.Errorf("Peer %d should have allowed ip %d %q, got %q", i, j, want, got)
379 }
380 }
381 }
382 }
383 }
384
385 // Update one of the peers and check that things got applied.
Serge Bazanski60461b22023-10-26 19:16:59 +0200386 err = wg.configurePeers([]*apb.Node{
Serge Bazanski93d593b2023-03-28 16:43:47 +0200387 {
Serge Bazanski60461b22023-10-26 19:16:59 +0200388 Id: "test-0",
389 Status: &cpb.NodeStatus{
390 ExternalAddress: "10.100.0.3",
391 },
392 Clusternet: &cpb.NodeClusterNetworking{
393 WireguardPubkey: pkeys[0].PublicKey().String(),
394 Prefixes: []*cpb.NodeClusterNetworking_Prefix{
395 {Cidr: "10.0.0.0/24"},
396 },
Serge Bazanski93d593b2023-03-28 16:43:47 +0200397 },
398 },
399 })
400 if err != nil {
Serge Bazanski60461b22023-10-26 19:16:59 +0200401 t.Fatalf("Failed to update peer: %v", err)
Serge Bazanski93d593b2023-03-28 16:43:47 +0200402 }
403 wgDev, err = wgClient.Device(clusterNetDeviceName)
404 if err != nil {
405 t.Fatalf("Failed to connect to netlink's WireGuard config endpoint: %v", err)
406 }
407 if want, got := 2, len(wgDev.Peers); want != got {
408 t.Errorf("Wanted %d peers, got %d", want, got)
409 } else {
410 if want, got := pkeys[0].PublicKey().String(), wgDev.Peers[0].PublicKey.String(); want != got {
411 t.Errorf("Peer 0 should have key %q, got %q", want, got)
412 }
413 if want, got := fmt.Sprintf("10.100.0.3:%s", common.WireGuardPort.PortString()), wgDev.Peers[0].Endpoint.String(); want != got {
414 t.Errorf("Peer 0 should have endpoint %q, got %q", want, got)
415 }
416 if want, got := 1, len(wgDev.Peers[0].AllowedIPs); want != got {
417 t.Errorf("Peer 0 should have %d peers, got %d", want, got)
418 } else {
419 if want, got := "10.0.0.0/24", wgDev.Peers[0].AllowedIPs[0].String(); want != got {
420 t.Errorf("Peer 0 should have allowed ip 0 %q, got %q", want, got)
421 }
422 }
423 }
424
425 // Remove one of the peers and make sure it's gone.
Serge Bazanski60461b22023-10-26 19:16:59 +0200426 err = wg.unconfigurePeer(&apb.Node{
427 Clusternet: &cpb.NodeClusterNetworking{
428 WireguardPubkey: pkeys[0].PublicKey().String(),
429 },
Serge Bazanski93d593b2023-03-28 16:43:47 +0200430 })
431 if err != nil {
432 t.Fatalf("Failed to unconfigure peer: %v", err)
433 }
Serge Bazanski60461b22023-10-26 19:16:59 +0200434 err = wg.unconfigurePeer(&apb.Node{
435 Clusternet: &cpb.NodeClusterNetworking{
436 WireguardPubkey: pkeys[0].PublicKey().String(),
437 },
Serge Bazanski93d593b2023-03-28 16:43:47 +0200438 })
439 if err != nil {
440 t.Fatalf("Failed to unconfigure peer a second time: %v", err)
441 }
442 wgDev, err = wgClient.Device(clusterNetDeviceName)
443 if err != nil {
444 t.Fatalf("Failed to connect to netlink's WireGuard config endpoint: %v", err)
445 }
446 if want, got := 1, len(wgDev.Peers); want != got {
447 t.Errorf("Wanted %d peer, got %d", want, got)
448 }
449}