Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 1 | // Copyright 2020 The Monogon Project Authors. |
| 2 | // |
| 3 | // SPDX-License-Identifier: Apache-2.0 |
| 4 | // |
| 5 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | // you may not use this file except in compliance with the License. |
| 7 | // You may obtain a copy of the License at |
| 8 | // |
| 9 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | // |
| 11 | // Unless required by applicable law or agreed to in writing, software |
| 12 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | // See the License for the specific language governing permissions and |
| 15 | // limitations under the License. |
| 16 | |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 17 | // Package clusternet implements a WireGuard-based overlay network for |
| 18 | // Kubernetes. It relies on controller-manager's IPAM to assign IP ranges to |
| 19 | // nodes and on Kubernetes' Node objects to distribute the Node IPs and public |
| 20 | // keys. |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 21 | // |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 22 | // It sets up a single WireGuard network interface and routes the entire |
| 23 | // ClusterCIDR into that network interface, relying on WireGuard's AllowedIPs |
| 24 | // mechanism to look up the correct peer node to send the traffic to. This |
| 25 | // means that the routing table doesn't change and doesn't have to be |
| 26 | // separately managed. When clusternet is started it annotates its WireGuard |
| 27 | // public key onto its node object. |
| 28 | // For each node object that's created or updated on the K8s apiserver it |
| 29 | // checks if a public key annotation is set and if yes a peer with that public |
| 30 | // key, its InternalIP as endpoint and the CIDR for that node as AllowedIPs is |
| 31 | // created. |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 32 | package clusternet |
| 33 | |
| 34 | import ( |
| 35 | "context" |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 36 | "errors" |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame^] | 37 | "net/netip" |
| 38 | "time" |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 39 | |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 40 | corev1 "k8s.io/api/core/v1" |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame^] | 41 | "k8s.io/apimachinery/pkg/fields" |
Serge Bazanski | 77cb6c5 | 2020-12-19 00:09:22 +0100 | [diff] [blame] | 42 | "k8s.io/client-go/kubernetes" |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 43 | "k8s.io/client-go/tools/cache" |
| 44 | |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame^] | 45 | oclusternet "source.monogon.dev/metropolis/node/core/clusternet" |
| 46 | "source.monogon.dev/metropolis/pkg/event" |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 47 | "source.monogon.dev/metropolis/pkg/logtree" |
Serge Bazanski | 31370b0 | 2021-01-07 16:31:14 +0100 | [diff] [blame] | 48 | "source.monogon.dev/metropolis/pkg/supervisor" |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 49 | ) |
| 50 | |
Serge Bazanski | c2c7ad9 | 2020-07-13 17:20:09 +0200 | [diff] [blame] | 51 | type Service struct { |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame^] | 52 | NodeName string |
| 53 | Kubernetes kubernetes.Interface |
| 54 | Prefixes event.Value[*oclusternet.Prefixes] |
Serge Bazanski | c2c7ad9 | 2020-07-13 17:20:09 +0200 | [diff] [blame] | 55 | |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame^] | 56 | logger logtree.LeveledLogger |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 57 | } |
| 58 | |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame^] | 59 | // ensureNode is called any time the node that this Service is running on gets |
| 60 | // updated. It uses this data to update this node's prefixes in the Curator. |
Serge Bazanski | c2c7ad9 | 2020-07-13 17:20:09 +0200 | [diff] [blame] | 61 | func (s *Service) ensureNode(newNode *corev1.Node) error { |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame^] | 62 | if newNode.Name != s.NodeName { |
| 63 | // We only care about our own node |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 64 | return nil |
| 65 | } |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame^] | 66 | |
| 67 | var internalIP netip.Addr |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 68 | for _, addr := range newNode.Status.Addresses { |
| 69 | if addr.Type == corev1.NodeInternalIP { |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame^] | 70 | if internalIP.IsUnspecified() { |
Serge Bazanski | c735967 | 2020-10-30 16:38:57 +0100 | [diff] [blame] | 71 | s.logger.Warningf("More than one NodeInternalIP specified, using the first one") |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 72 | break |
| 73 | } |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame^] | 74 | ip, err := netip.ParseAddr(addr.Address) |
| 75 | if err != nil { |
Serge Bazanski | c735967 | 2020-10-30 16:38:57 +0100 | [diff] [blame] | 76 | s.logger.Warningf("Failed to parse Internal IP %s", addr.Address) |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame^] | 77 | continue |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 78 | } |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame^] | 79 | internalIP = ip |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 80 | } |
| 81 | } |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame^] | 82 | if internalIP.IsUnspecified() { |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 83 | return errors.New("node has no Internal IP") |
| 84 | } |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame^] | 85 | |
| 86 | var prefixes oclusternet.Prefixes |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 87 | for _, podNetStr := range newNode.Spec.PodCIDRs { |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame^] | 88 | prefix, err := netip.ParsePrefix(podNetStr) |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 89 | if err != nil { |
Serge Bazanski | c735967 | 2020-10-30 16:38:57 +0100 | [diff] [blame] | 90 | s.logger.Warningf("Node %s PodCIDR failed to parse, ignored: %v", newNode.Name, err) |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 91 | continue |
| 92 | } |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame^] | 93 | prefixes = append(prefixes, prefix) |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 94 | } |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame^] | 95 | prefixes = append(prefixes, netip.PrefixFrom(internalIP, 32)) |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 96 | |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame^] | 97 | s.logger.V(1).Infof("Updating locally originated prefixes: %+v", prefixes) |
| 98 | s.Prefixes.Set(&prefixes) |
Serge Bazanski | c2c7ad9 | 2020-07-13 17:20:09 +0200 | [diff] [blame] | 99 | return nil |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 100 | } |
| 101 | |
| 102 | // Run runs the ClusterNet service. See package description for what it does. |
Serge Bazanski | c2c7ad9 | 2020-07-13 17:20:09 +0200 | [diff] [blame] | 103 | func (s *Service) Run(ctx context.Context) error { |
| 104 | logger := supervisor.Logger(ctx) |
Lorenz Brun | ca24cfa | 2020-08-18 13:49:37 +0200 | [diff] [blame] | 105 | s.logger = logger |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 106 | |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame^] | 107 | // Make a 'shared' informer. It's shared by name, but we don't actually share it |
| 108 | // - instead we have to use it as the standard Informer API does not support |
| 109 | // error handling. And we want to use a dedicated informer because we want to |
| 110 | // only watch our own node. |
| 111 | lw := cache.NewListWatchFromClient( |
| 112 | s.Kubernetes.CoreV1().RESTClient(), |
| 113 | "nodes", "", |
| 114 | fields.OneTermEqualSelector("metadata.name", s.NodeName), |
| 115 | ) |
| 116 | ni := cache.NewSharedInformer(lw, &corev1.Node{}, time.Second*5) |
| 117 | ni.AddEventHandler(cache.ResourceEventHandlerFuncs{ |
Serge Bazanski | c2c7ad9 | 2020-07-13 17:20:09 +0200 | [diff] [blame] | 118 | AddFunc: func(new interface{}) { |
| 119 | newNode, ok := new.(*corev1.Node) |
| 120 | if !ok { |
Serge Bazanski | c735967 | 2020-10-30 16:38:57 +0100 | [diff] [blame] | 121 | logger.Errorf("Received non-node item %+v in node event handler", new) |
Serge Bazanski | c2c7ad9 | 2020-07-13 17:20:09 +0200 | [diff] [blame] | 122 | return |
| 123 | } |
| 124 | if err := s.ensureNode(newNode); err != nil { |
Serge Bazanski | c735967 | 2020-10-30 16:38:57 +0100 | [diff] [blame] | 125 | logger.Warningf("Failed to sync node: %v", err) |
Serge Bazanski | c2c7ad9 | 2020-07-13 17:20:09 +0200 | [diff] [blame] | 126 | } |
| 127 | }, |
| 128 | UpdateFunc: func(old, new interface{}) { |
| 129 | newNode, ok := new.(*corev1.Node) |
| 130 | if !ok { |
Serge Bazanski | c735967 | 2020-10-30 16:38:57 +0100 | [diff] [blame] | 131 | logger.Errorf("Received non-node item %+v in node event handler", new) |
Serge Bazanski | c2c7ad9 | 2020-07-13 17:20:09 +0200 | [diff] [blame] | 132 | return |
| 133 | } |
| 134 | if err := s.ensureNode(newNode); err != nil { |
Serge Bazanski | c735967 | 2020-10-30 16:38:57 +0100 | [diff] [blame] | 135 | logger.Warningf("Failed to sync node: %v", err) |
Serge Bazanski | c2c7ad9 | 2020-07-13 17:20:09 +0200 | [diff] [blame] | 136 | } |
| 137 | }, |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame^] | 138 | }) |
| 139 | ni.SetWatchErrorHandler(func(_ *cache.Reflector, err error) { |
| 140 | supervisor.Logger(ctx).Errorf("node informer watch error: %v", err) |
Serge Bazanski | c2c7ad9 | 2020-07-13 17:20:09 +0200 | [diff] [blame] | 141 | }) |
| 142 | |
| 143 | supervisor.Signal(ctx, supervisor.SignalHealthy) |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame^] | 144 | ni.Run(ctx.Done()) |
Serge Bazanski | c2c7ad9 | 2020-07-13 17:20:09 +0200 | [diff] [blame] | 145 | return ctx.Err() |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 146 | } |