Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 1 | // Copyright 2020 The Monogon Project Authors. |
| 2 | // |
| 3 | // SPDX-License-Identifier: Apache-2.0 |
| 4 | // |
| 5 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | // you may not use this file except in compliance with the License. |
| 7 | // You may obtain a copy of the License at |
| 8 | // |
| 9 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | // |
| 11 | // Unless required by applicable law or agreed to in writing, software |
| 12 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | // See the License for the specific language governing permissions and |
| 15 | // limitations under the License. |
| 16 | |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 17 | // Package clusternet implements a WireGuard-based overlay network for |
| 18 | // Kubernetes. It relies on controller-manager's IPAM to assign IP ranges to |
| 19 | // nodes and on Kubernetes' Node objects to distribute the Node IPs and public |
| 20 | // keys. |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 21 | // |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 22 | // It sets up a single WireGuard network interface and routes the entire |
| 23 | // ClusterCIDR into that network interface, relying on WireGuard's AllowedIPs |
| 24 | // mechanism to look up the correct peer node to send the traffic to. This |
| 25 | // means that the routing table doesn't change and doesn't have to be |
| 26 | // separately managed. When clusternet is started it annotates its WireGuard |
| 27 | // public key onto its node object. |
| 28 | // For each node object that's created or updated on the K8s apiserver it |
| 29 | // checks if a public key annotation is set and if yes a peer with that public |
| 30 | // key, its InternalIP as endpoint and the CIDR for that node as AllowedIPs is |
| 31 | // created. |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 32 | package clusternet |
| 33 | |
| 34 | import ( |
| 35 | "context" |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame] | 36 | "net/netip" |
| 37 | "time" |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 38 | |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 39 | corev1 "k8s.io/api/core/v1" |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame] | 40 | "k8s.io/apimachinery/pkg/fields" |
Serge Bazanski | 77cb6c5 | 2020-12-19 00:09:22 +0100 | [diff] [blame] | 41 | "k8s.io/client-go/kubernetes" |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 42 | "k8s.io/client-go/tools/cache" |
| 43 | |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame] | 44 | oclusternet "source.monogon.dev/metropolis/node/core/clusternet" |
| 45 | "source.monogon.dev/metropolis/pkg/event" |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 46 | "source.monogon.dev/metropolis/pkg/logtree" |
Serge Bazanski | 31370b0 | 2021-01-07 16:31:14 +0100 | [diff] [blame] | 47 | "source.monogon.dev/metropolis/pkg/supervisor" |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 48 | ) |
| 49 | |
Serge Bazanski | c2c7ad9 | 2020-07-13 17:20:09 +0200 | [diff] [blame] | 50 | type Service struct { |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame] | 51 | NodeName string |
| 52 | Kubernetes kubernetes.Interface |
| 53 | Prefixes event.Value[*oclusternet.Prefixes] |
Serge Bazanski | c2c7ad9 | 2020-07-13 17:20:09 +0200 | [diff] [blame] | 54 | |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame] | 55 | logger logtree.LeveledLogger |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 56 | } |
| 57 | |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame] | 58 | // ensureNode is called any time the node that this Service is running on gets |
| 59 | // updated. It uses this data to update this node's prefixes in the Curator. |
Serge Bazanski | c2c7ad9 | 2020-07-13 17:20:09 +0200 | [diff] [blame] | 60 | func (s *Service) ensureNode(newNode *corev1.Node) error { |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame] | 61 | if newNode.Name != s.NodeName { |
| 62 | // We only care about our own node |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 63 | return nil |
| 64 | } |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame] | 65 | |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame] | 66 | var prefixes oclusternet.Prefixes |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 67 | for _, podNetStr := range newNode.Spec.PodCIDRs { |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame] | 68 | prefix, err := netip.ParsePrefix(podNetStr) |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 69 | if err != nil { |
Serge Bazanski | c735967 | 2020-10-30 16:38:57 +0100 | [diff] [blame] | 70 | s.logger.Warningf("Node %s PodCIDR failed to parse, ignored: %v", newNode.Name, err) |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 71 | continue |
| 72 | } |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame] | 73 | prefixes = append(prefixes, prefix) |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 74 | } |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 75 | |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame] | 76 | s.logger.V(1).Infof("Updating locally originated prefixes: %+v", prefixes) |
| 77 | s.Prefixes.Set(&prefixes) |
Serge Bazanski | c2c7ad9 | 2020-07-13 17:20:09 +0200 | [diff] [blame] | 78 | return nil |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 79 | } |
| 80 | |
| 81 | // Run runs the ClusterNet service. See package description for what it does. |
Serge Bazanski | c2c7ad9 | 2020-07-13 17:20:09 +0200 | [diff] [blame] | 82 | func (s *Service) Run(ctx context.Context) error { |
| 83 | logger := supervisor.Logger(ctx) |
Lorenz Brun | ca24cfa | 2020-08-18 13:49:37 +0200 | [diff] [blame] | 84 | s.logger = logger |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 85 | |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame] | 86 | // Make a 'shared' informer. It's shared by name, but we don't actually share it |
| 87 | // - instead we have to use it as the standard Informer API does not support |
| 88 | // error handling. And we want to use a dedicated informer because we want to |
| 89 | // only watch our own node. |
| 90 | lw := cache.NewListWatchFromClient( |
| 91 | s.Kubernetes.CoreV1().RESTClient(), |
| 92 | "nodes", "", |
| 93 | fields.OneTermEqualSelector("metadata.name", s.NodeName), |
| 94 | ) |
| 95 | ni := cache.NewSharedInformer(lw, &corev1.Node{}, time.Second*5) |
| 96 | ni.AddEventHandler(cache.ResourceEventHandlerFuncs{ |
Serge Bazanski | c2c7ad9 | 2020-07-13 17:20:09 +0200 | [diff] [blame] | 97 | AddFunc: func(new interface{}) { |
| 98 | newNode, ok := new.(*corev1.Node) |
| 99 | if !ok { |
Serge Bazanski | c735967 | 2020-10-30 16:38:57 +0100 | [diff] [blame] | 100 | logger.Errorf("Received non-node item %+v in node event handler", new) |
Serge Bazanski | c2c7ad9 | 2020-07-13 17:20:09 +0200 | [diff] [blame] | 101 | return |
| 102 | } |
| 103 | if err := s.ensureNode(newNode); err != nil { |
Serge Bazanski | c735967 | 2020-10-30 16:38:57 +0100 | [diff] [blame] | 104 | logger.Warningf("Failed to sync node: %v", err) |
Serge Bazanski | c2c7ad9 | 2020-07-13 17:20:09 +0200 | [diff] [blame] | 105 | } |
| 106 | }, |
| 107 | UpdateFunc: func(old, new interface{}) { |
| 108 | newNode, ok := new.(*corev1.Node) |
| 109 | if !ok { |
Serge Bazanski | c735967 | 2020-10-30 16:38:57 +0100 | [diff] [blame] | 110 | logger.Errorf("Received non-node item %+v in node event handler", new) |
Serge Bazanski | c2c7ad9 | 2020-07-13 17:20:09 +0200 | [diff] [blame] | 111 | return |
| 112 | } |
| 113 | if err := s.ensureNode(newNode); err != nil { |
Serge Bazanski | c735967 | 2020-10-30 16:38:57 +0100 | [diff] [blame] | 114 | logger.Warningf("Failed to sync node: %v", err) |
Serge Bazanski | c2c7ad9 | 2020-07-13 17:20:09 +0200 | [diff] [blame] | 115 | } |
| 116 | }, |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame] | 117 | }) |
| 118 | ni.SetWatchErrorHandler(func(_ *cache.Reflector, err error) { |
| 119 | supervisor.Logger(ctx).Errorf("node informer watch error: %v", err) |
Serge Bazanski | c2c7ad9 | 2020-07-13 17:20:09 +0200 | [diff] [blame] | 120 | }) |
| 121 | |
| 122 | supervisor.Signal(ctx, supervisor.SignalHealthy) |
Serge Bazanski | 7920852 | 2023-03-28 20:14:58 +0200 | [diff] [blame] | 123 | ni.Run(ctx.Done()) |
Serge Bazanski | c2c7ad9 | 2020-07-13 17:20:09 +0200 | [diff] [blame] | 124 | return ctx.Err() |
Lorenz Brun | f042e6f | 2020-06-24 16:46:09 +0200 | [diff] [blame] | 125 | } |