blob: f4974eb9719f58b51fba7edddddebf169e61771a [file] [log] [blame]
Lorenz Brunf042e6f2020-06-24 16:46:09 +02001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
Serge Bazanski216fe7b2021-05-21 18:36:16 +020017// Package clusternet implements a WireGuard-based overlay network for
18// Kubernetes. It relies on controller-manager's IPAM to assign IP ranges to
19// nodes and on Kubernetes' Node objects to distribute the Node IPs and public
20// keys.
Lorenz Brunf042e6f2020-06-24 16:46:09 +020021//
Serge Bazanski216fe7b2021-05-21 18:36:16 +020022// It sets up a single WireGuard network interface and routes the entire
23// ClusterCIDR into that network interface, relying on WireGuard's AllowedIPs
24// mechanism to look up the correct peer node to send the traffic to. This
25// means that the routing table doesn't change and doesn't have to be
26// separately managed. When clusternet is started it annotates its WireGuard
27// public key onto its node object.
28// For each node object that's created or updated on the K8s apiserver it
29// checks if a public key annotation is set and if yes a peer with that public
30// key, its InternalIP as endpoint and the CIDR for that node as AllowedIPs is
31// created.
Lorenz Brunf042e6f2020-06-24 16:46:09 +020032package clusternet
33
34import (
35 "context"
Lorenz Brunf042e6f2020-06-24 16:46:09 +020036 "errors"
Serge Bazanski79208522023-03-28 20:14:58 +020037 "net/netip"
38 "time"
Lorenz Brunf042e6f2020-06-24 16:46:09 +020039
Lorenz Brunf042e6f2020-06-24 16:46:09 +020040 corev1 "k8s.io/api/core/v1"
Serge Bazanski79208522023-03-28 20:14:58 +020041 "k8s.io/apimachinery/pkg/fields"
Serge Bazanski77cb6c52020-12-19 00:09:22 +010042 "k8s.io/client-go/kubernetes"
Lorenz Brunf042e6f2020-06-24 16:46:09 +020043 "k8s.io/client-go/tools/cache"
44
Serge Bazanski79208522023-03-28 20:14:58 +020045 oclusternet "source.monogon.dev/metropolis/node/core/clusternet"
46 "source.monogon.dev/metropolis/pkg/event"
Serge Bazanski216fe7b2021-05-21 18:36:16 +020047 "source.monogon.dev/metropolis/pkg/logtree"
Serge Bazanski31370b02021-01-07 16:31:14 +010048 "source.monogon.dev/metropolis/pkg/supervisor"
Lorenz Brunf042e6f2020-06-24 16:46:09 +020049)
50
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020051type Service struct {
Serge Bazanski79208522023-03-28 20:14:58 +020052 NodeName string
53 Kubernetes kubernetes.Interface
54 Prefixes event.Value[*oclusternet.Prefixes]
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020055
Serge Bazanski79208522023-03-28 20:14:58 +020056 logger logtree.LeveledLogger
Lorenz Brunf042e6f2020-06-24 16:46:09 +020057}
58
Serge Bazanski79208522023-03-28 20:14:58 +020059// ensureNode is called any time the node that this Service is running on gets
60// updated. It uses this data to update this node's prefixes in the Curator.
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020061func (s *Service) ensureNode(newNode *corev1.Node) error {
Serge Bazanski79208522023-03-28 20:14:58 +020062 if newNode.Name != s.NodeName {
63 // We only care about our own node
Lorenz Brunf042e6f2020-06-24 16:46:09 +020064 return nil
65 }
Serge Bazanski79208522023-03-28 20:14:58 +020066
67 var internalIP netip.Addr
Lorenz Brunf042e6f2020-06-24 16:46:09 +020068 for _, addr := range newNode.Status.Addresses {
69 if addr.Type == corev1.NodeInternalIP {
Serge Bazanski79208522023-03-28 20:14:58 +020070 if internalIP.IsUnspecified() {
Serge Bazanskic7359672020-10-30 16:38:57 +010071 s.logger.Warningf("More than one NodeInternalIP specified, using the first one")
Lorenz Brunf042e6f2020-06-24 16:46:09 +020072 break
73 }
Serge Bazanski79208522023-03-28 20:14:58 +020074 ip, err := netip.ParseAddr(addr.Address)
75 if err != nil {
Serge Bazanskic7359672020-10-30 16:38:57 +010076 s.logger.Warningf("Failed to parse Internal IP %s", addr.Address)
Serge Bazanski79208522023-03-28 20:14:58 +020077 continue
Lorenz Brunf042e6f2020-06-24 16:46:09 +020078 }
Serge Bazanski79208522023-03-28 20:14:58 +020079 internalIP = ip
Lorenz Brunf042e6f2020-06-24 16:46:09 +020080 }
81 }
Serge Bazanski79208522023-03-28 20:14:58 +020082 if internalIP.IsUnspecified() {
Lorenz Brunf042e6f2020-06-24 16:46:09 +020083 return errors.New("node has no Internal IP")
84 }
Serge Bazanski79208522023-03-28 20:14:58 +020085
86 var prefixes oclusternet.Prefixes
Lorenz Brunf042e6f2020-06-24 16:46:09 +020087 for _, podNetStr := range newNode.Spec.PodCIDRs {
Serge Bazanski79208522023-03-28 20:14:58 +020088 prefix, err := netip.ParsePrefix(podNetStr)
Lorenz Brunf042e6f2020-06-24 16:46:09 +020089 if err != nil {
Serge Bazanskic7359672020-10-30 16:38:57 +010090 s.logger.Warningf("Node %s PodCIDR failed to parse, ignored: %v", newNode.Name, err)
Lorenz Brunf042e6f2020-06-24 16:46:09 +020091 continue
92 }
Serge Bazanski79208522023-03-28 20:14:58 +020093 prefixes = append(prefixes, prefix)
Lorenz Brunf042e6f2020-06-24 16:46:09 +020094 }
Serge Bazanski79208522023-03-28 20:14:58 +020095 prefixes = append(prefixes, netip.PrefixFrom(internalIP, 32))
Lorenz Brunf042e6f2020-06-24 16:46:09 +020096
Serge Bazanski79208522023-03-28 20:14:58 +020097 s.logger.V(1).Infof("Updating locally originated prefixes: %+v", prefixes)
98 s.Prefixes.Set(&prefixes)
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020099 return nil
Lorenz Brunf042e6f2020-06-24 16:46:09 +0200100}
101
102// Run runs the ClusterNet service. See package description for what it does.
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200103func (s *Service) Run(ctx context.Context) error {
104 logger := supervisor.Logger(ctx)
Lorenz Brunca24cfa2020-08-18 13:49:37 +0200105 s.logger = logger
Lorenz Brunf042e6f2020-06-24 16:46:09 +0200106
Serge Bazanski79208522023-03-28 20:14:58 +0200107 // Make a 'shared' informer. It's shared by name, but we don't actually share it
108 // - instead we have to use it as the standard Informer API does not support
109 // error handling. And we want to use a dedicated informer because we want to
110 // only watch our own node.
111 lw := cache.NewListWatchFromClient(
112 s.Kubernetes.CoreV1().RESTClient(),
113 "nodes", "",
114 fields.OneTermEqualSelector("metadata.name", s.NodeName),
115 )
116 ni := cache.NewSharedInformer(lw, &corev1.Node{}, time.Second*5)
117 ni.AddEventHandler(cache.ResourceEventHandlerFuncs{
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200118 AddFunc: func(new interface{}) {
119 newNode, ok := new.(*corev1.Node)
120 if !ok {
Serge Bazanskic7359672020-10-30 16:38:57 +0100121 logger.Errorf("Received non-node item %+v in node event handler", new)
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200122 return
123 }
124 if err := s.ensureNode(newNode); err != nil {
Serge Bazanskic7359672020-10-30 16:38:57 +0100125 logger.Warningf("Failed to sync node: %v", err)
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200126 }
127 },
128 UpdateFunc: func(old, new interface{}) {
129 newNode, ok := new.(*corev1.Node)
130 if !ok {
Serge Bazanskic7359672020-10-30 16:38:57 +0100131 logger.Errorf("Received non-node item %+v in node event handler", new)
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200132 return
133 }
134 if err := s.ensureNode(newNode); err != nil {
Serge Bazanskic7359672020-10-30 16:38:57 +0100135 logger.Warningf("Failed to sync node: %v", err)
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200136 }
137 },
Serge Bazanski79208522023-03-28 20:14:58 +0200138 })
139 ni.SetWatchErrorHandler(func(_ *cache.Reflector, err error) {
140 supervisor.Logger(ctx).Errorf("node informer watch error: %v", err)
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200141 })
142
143 supervisor.Signal(ctx, supervisor.SignalHealthy)
Serge Bazanski79208522023-03-28 20:14:58 +0200144 ni.Run(ctx.Done())
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200145 return ctx.Err()
Lorenz Brunf042e6f2020-06-24 16:46:09 +0200146}