blob: dd5e5df8c55f74e09a18287901148d0776572c29 [file] [log] [blame]
Tim Windelschmidt6d33a432025-02-04 14:34:25 +01001// Copyright The Monogon Project Authors.
Lorenz Brunf042e6f2020-06-24 16:46:09 +02002// SPDX-License-Identifier: Apache-2.0
Lorenz Brunf042e6f2020-06-24 16:46:09 +02003
Serge Bazanski216fe7b2021-05-21 18:36:16 +02004// Package clusternet implements a WireGuard-based overlay network for
5// Kubernetes. It relies on controller-manager's IPAM to assign IP ranges to
6// nodes and on Kubernetes' Node objects to distribute the Node IPs and public
7// keys.
Lorenz Brunf042e6f2020-06-24 16:46:09 +02008//
Serge Bazanski216fe7b2021-05-21 18:36:16 +02009// It sets up a single WireGuard network interface and routes the entire
10// ClusterCIDR into that network interface, relying on WireGuard's AllowedIPs
11// mechanism to look up the correct peer node to send the traffic to. This
12// means that the routing table doesn't change and doesn't have to be
13// separately managed. When clusternet is started it annotates its WireGuard
14// public key onto its node object.
15// For each node object that's created or updated on the K8s apiserver it
16// checks if a public key annotation is set and if yes a peer with that public
17// key, its InternalIP as endpoint and the CIDR for that node as AllowedIPs is
18// created.
Lorenz Brunf042e6f2020-06-24 16:46:09 +020019package clusternet
20
21import (
22 "context"
Serge Bazanski79208522023-03-28 20:14:58 +020023 "net/netip"
24 "time"
Lorenz Brunf042e6f2020-06-24 16:46:09 +020025
Lorenz Brunf042e6f2020-06-24 16:46:09 +020026 corev1 "k8s.io/api/core/v1"
Serge Bazanski79208522023-03-28 20:14:58 +020027 "k8s.io/apimachinery/pkg/fields"
Serge Bazanski77cb6c52020-12-19 00:09:22 +010028 "k8s.io/client-go/kubernetes"
Lorenz Brunf042e6f2020-06-24 16:46:09 +020029 "k8s.io/client-go/tools/cache"
30
Serge Bazanski3c5d0632024-09-12 10:49:12 +000031 "source.monogon.dev/go/logging"
Lorenz Bruncb76c842025-08-11 12:54:28 +020032 "source.monogon.dev/metropolis/node/core/network/ipam"
Tim Windelschmidt9f21f532024-05-07 15:14:20 +020033 "source.monogon.dev/osbase/event"
Tim Windelschmidt9f21f532024-05-07 15:14:20 +020034 "source.monogon.dev/osbase/supervisor"
Lorenz Brunf042e6f2020-06-24 16:46:09 +020035)
36
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020037type Service struct {
Serge Bazanski79208522023-03-28 20:14:58 +020038 NodeName string
39 Kubernetes kubernetes.Interface
Lorenz Bruncb76c842025-08-11 12:54:28 +020040 Prefixes event.Value[*ipam.Prefixes]
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020041
Serge Bazanski3c5d0632024-09-12 10:49:12 +000042 logger logging.Leveled
Lorenz Brunf042e6f2020-06-24 16:46:09 +020043}
44
Serge Bazanski79208522023-03-28 20:14:58 +020045// ensureNode is called any time the node that this Service is running on gets
46// updated. It uses this data to update this node's prefixes in the Curator.
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020047func (s *Service) ensureNode(newNode *corev1.Node) error {
Serge Bazanski79208522023-03-28 20:14:58 +020048 if newNode.Name != s.NodeName {
49 // We only care about our own node
Lorenz Brunf042e6f2020-06-24 16:46:09 +020050 return nil
51 }
Serge Bazanski79208522023-03-28 20:14:58 +020052
Lorenz Bruncb76c842025-08-11 12:54:28 +020053 var prefixes ipam.Prefixes
Lorenz Brunf042e6f2020-06-24 16:46:09 +020054 for _, podNetStr := range newNode.Spec.PodCIDRs {
Serge Bazanski79208522023-03-28 20:14:58 +020055 prefix, err := netip.ParsePrefix(podNetStr)
Lorenz Brunf042e6f2020-06-24 16:46:09 +020056 if err != nil {
Serge Bazanskic7359672020-10-30 16:38:57 +010057 s.logger.Warningf("Node %s PodCIDR failed to parse, ignored: %v", newNode.Name, err)
Lorenz Brunf042e6f2020-06-24 16:46:09 +020058 continue
59 }
Serge Bazanski79208522023-03-28 20:14:58 +020060 prefixes = append(prefixes, prefix)
Lorenz Brunf042e6f2020-06-24 16:46:09 +020061 }
Lorenz Brunf042e6f2020-06-24 16:46:09 +020062
Serge Bazanski79208522023-03-28 20:14:58 +020063 s.logger.V(1).Infof("Updating locally originated prefixes: %+v", prefixes)
64 s.Prefixes.Set(&prefixes)
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020065 return nil
Lorenz Brunf042e6f2020-06-24 16:46:09 +020066}
67
68// Run runs the ClusterNet service. See package description for what it does.
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020069func (s *Service) Run(ctx context.Context) error {
70 logger := supervisor.Logger(ctx)
Lorenz Brunca24cfa2020-08-18 13:49:37 +020071 s.logger = logger
Lorenz Brunf042e6f2020-06-24 16:46:09 +020072
Serge Bazanski79208522023-03-28 20:14:58 +020073 // Make a 'shared' informer. It's shared by name, but we don't actually share it
74 // - instead we have to use it as the standard Informer API does not support
75 // error handling. And we want to use a dedicated informer because we want to
76 // only watch our own node.
77 lw := cache.NewListWatchFromClient(
78 s.Kubernetes.CoreV1().RESTClient(),
79 "nodes", "",
80 fields.OneTermEqualSelector("metadata.name", s.NodeName),
81 )
82 ni := cache.NewSharedInformer(lw, &corev1.Node{}, time.Second*5)
83 ni.AddEventHandler(cache.ResourceEventHandlerFuncs{
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020084 AddFunc: func(new interface{}) {
85 newNode, ok := new.(*corev1.Node)
86 if !ok {
Serge Bazanskic7359672020-10-30 16:38:57 +010087 logger.Errorf("Received non-node item %+v in node event handler", new)
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020088 return
89 }
90 if err := s.ensureNode(newNode); err != nil {
Serge Bazanskic7359672020-10-30 16:38:57 +010091 logger.Warningf("Failed to sync node: %v", err)
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020092 }
93 },
94 UpdateFunc: func(old, new interface{}) {
95 newNode, ok := new.(*corev1.Node)
96 if !ok {
Serge Bazanskic7359672020-10-30 16:38:57 +010097 logger.Errorf("Received non-node item %+v in node event handler", new)
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020098 return
99 }
100 if err := s.ensureNode(newNode); err != nil {
Serge Bazanskic7359672020-10-30 16:38:57 +0100101 logger.Warningf("Failed to sync node: %v", err)
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200102 }
103 },
Serge Bazanski79208522023-03-28 20:14:58 +0200104 })
105 ni.SetWatchErrorHandler(func(_ *cache.Reflector, err error) {
106 supervisor.Logger(ctx).Errorf("node informer watch error: %v", err)
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200107 })
108
109 supervisor.Signal(ctx, supervisor.SignalHealthy)
Serge Bazanski79208522023-03-28 20:14:58 +0200110 ni.Run(ctx.Done())
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200111 return ctx.Err()
Lorenz Brunf042e6f2020-06-24 16:46:09 +0200112}