| // Copyright 2020 The Monogon Project Authors. |
| // |
| // SPDX-License-Identifier: Apache-2.0 |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // Package clusternet implements a WireGuard-based overlay network for Kubernetes. It relies on controller-manager's |
| // IPAM to assign IP ranges to nodes and on Kubernetes' Node objects to distribute the Node IPs and public keys. |
| // |
| // It sets up a single WireGuard network interface and routes the entire ClusterCIDR into that network interface, |
| // relying on WireGuard's AllowedIPs mechanism to look up the correct peer node to send the traffic to. This means |
| // that the routing table doesn't change and doesn't have to be separately managed. When clusternet is started |
| // it annotates its WireGuard public key onto its node object. |
| // For each node object that's created or updated on the K8s apiserver it checks if a public key annotation is set and |
| // if yes a peer with that public key, its InternalIP as endpoint and the CIDR for that node as AllowedIPs is created. |
| package clusternet |
| |
| import ( |
| "context" |
| "encoding/json" |
| "errors" |
| "fmt" |
| "net" |
| "os" |
| |
| "k8s.io/client-go/informers" |
| |
| "k8s.io/client-go/kubernetes" |
| |
| "github.com/vishvananda/netlink" |
| "go.uber.org/zap" |
| "golang.zx2c4.com/wireguard/wgctrl" |
| "golang.zx2c4.com/wireguard/wgctrl/wgtypes" |
| corev1 "k8s.io/api/core/v1" |
| metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" |
| "k8s.io/apimachinery/pkg/types" |
| "k8s.io/client-go/tools/cache" |
| |
| "git.monogon.dev/source/nexantic.git/core/internal/common" |
| "git.monogon.dev/source/nexantic.git/core/internal/common/supervisor" |
| "git.monogon.dev/source/nexantic.git/core/internal/localstorage" |
| "git.monogon.dev/source/nexantic.git/core/pkg/jsonpatch" |
| ) |
| |
| const ( |
| clusterNetDeviceName = "clusternet" |
| publicKeyAnnotation = "node.smalltown.nexantic.com/wg-pubkey" |
| ) |
| |
| type Service struct { |
| NodeName string |
| Kubernetes kubernetes.Interface |
| ClusterNet net.IPNet |
| InformerFactory informers.SharedInformerFactory |
| DataDirectory *localstorage.DataKubernetesClusterNetworkingDirectory |
| |
| wgClient *wgctrl.Client |
| privKey wgtypes.Key |
| logger *zap.Logger |
| } |
| |
| // ensureNode creates/updates the corresponding WireGuard peer entry for the given node objet |
| func (s *Service) ensureNode(newNode *corev1.Node) error { |
| if newNode.Name == s.NodeName { |
| // Node doesn't need to connect to itself |
| return nil |
| } |
| pubKeyRaw := newNode.Annotations[publicKeyAnnotation] |
| if pubKeyRaw == "" { |
| return nil |
| } |
| pubKey, err := wgtypes.ParseKey(pubKeyRaw) |
| if err != nil { |
| return fmt.Errorf("failed to parse public-key annotation: %w", err) |
| } |
| var internalIP net.IP |
| for _, addr := range newNode.Status.Addresses { |
| if addr.Type == corev1.NodeInternalIP { |
| if internalIP != nil { |
| s.logger.Warn("More than one NodeInternalIP specified, using the first one") |
| break |
| } |
| internalIP = net.ParseIP(addr.Address) |
| if internalIP == nil { |
| s.logger.Warn("failed to parse Internal IP") |
| } |
| } |
| } |
| if internalIP == nil { |
| return errors.New("node has no Internal IP") |
| } |
| var allowedIPs []net.IPNet |
| for _, podNetStr := range newNode.Spec.PodCIDRs { |
| _, podNet, err := net.ParseCIDR(podNetStr) |
| if err != nil { |
| s.logger.Warn("Node PodCIDR failed to parse, ignored", zap.Error(err), zap.String("node", newNode.Name)) |
| continue |
| } |
| allowedIPs = append(allowedIPs, *podNet) |
| } |
| s.logger.Debug("Adding/Updating WireGuard peer node", zap.String("node", newNode.Name), |
| zap.String("endpointIP", internalIP.String()), zap.Any("allowedIPs", allowedIPs)) |
| // WireGuard's kernel side has create/update semantics on peers by default. So we can just add the peer multiple |
| // times to update it. |
| err = s.wgClient.ConfigureDevice(clusterNetDeviceName, wgtypes.Config{ |
| Peers: []wgtypes.PeerConfig{{ |
| PublicKey: pubKey, |
| Endpoint: &net.UDPAddr{Port: common.WireGuardPort, IP: internalIP}, |
| ReplaceAllowedIPs: true, |
| AllowedIPs: allowedIPs, |
| }}, |
| }) |
| if err != nil { |
| return fmt.Errorf("failed to add WireGuard peer node: %w", err) |
| } |
| return nil |
| } |
| |
| // removeNode removes the corresponding WireGuard peer entry for the given node object |
| func (s *Service) removeNode(oldNode *corev1.Node) error { |
| if oldNode.Name == s.NodeName { |
| // Node doesn't need to connect to itself |
| return nil |
| } |
| pubKeyRaw := oldNode.Annotations[publicKeyAnnotation] |
| if pubKeyRaw == "" { |
| return nil |
| } |
| pubKey, err := wgtypes.ParseKey(pubKeyRaw) |
| if err != nil { |
| return fmt.Errorf("node public-key annotation not decodable: %w", err) |
| } |
| err = s.wgClient.ConfigureDevice(clusterNetDeviceName, wgtypes.Config{ |
| Peers: []wgtypes.PeerConfig{{ |
| PublicKey: pubKey, |
| Remove: true, |
| }}, |
| }) |
| if err != nil { |
| return fmt.Errorf("failed to remove WireGuard peer node: %w", err) |
| } |
| return nil |
| } |
| |
| // ensureOnDiskKey loads the private key from disk or (if none exists) generates one and persists it. |
| func (s *Service) ensureOnDiskKey() error { |
| keyRaw, err := s.DataDirectory.Key.Read() |
| if os.IsNotExist(err) { |
| key, err := wgtypes.GeneratePrivateKey() |
| if err != nil { |
| return fmt.Errorf("failed to generate private key: %w", err) |
| } |
| if err := s.DataDirectory.Key.Write([]byte(key.String()), 0600); err != nil { |
| return fmt.Errorf("failed to store newly generated key: %w", err) |
| } |
| |
| s.privKey = key |
| return nil |
| } else if err != nil { |
| return fmt.Errorf("failed to load on-disk key: %w", err) |
| } |
| |
| key, err := wgtypes.ParseKey(string(keyRaw)) |
| if err != nil { |
| return fmt.Errorf("invalid private key in file: %w", err) |
| } |
| s.privKey = key |
| return nil |
| } |
| |
| // annotateThisNode annotates the node (as defined by NodeName) with the wireguard public key of this node. |
| func (s *Service) annotateThisNode(ctx context.Context) error { |
| patch := []jsonpatch.JsonPatchOp{{ |
| Operation: "add", |
| Path: "/metadata/annotations/" + jsonpatch.EncodeJSONRefToken(publicKeyAnnotation), |
| Value: s.privKey.PublicKey().String(), |
| }} |
| |
| patchRaw, err := json.Marshal(patch) |
| if err != nil { |
| return fmt.Errorf("failed to encode JSONPatch: %w", err) |
| } |
| |
| if _, err := s.Kubernetes.CoreV1().Nodes().Patch(ctx, s.NodeName, types.JSONPatchType, patchRaw, metav1.PatchOptions{}); err != nil { |
| return fmt.Errorf("failed to patch resource: %w", err) |
| } |
| |
| return nil |
| } |
| |
| // Run runs the ClusterNet service. See package description for what it does. |
| func (s *Service) Run(ctx context.Context) error { |
| logger := supervisor.Logger(ctx) |
| |
| wgClient, err := wgctrl.New() |
| if err != nil { |
| return fmt.Errorf("failed to connect to netlink's WireGuard config endpoint: %w", err) |
| } |
| |
| wgInterface := &Wireguard{LinkAttrs: netlink.LinkAttrs{Name: clusterNetDeviceName, Flags: net.FlagUp}} |
| if err := netlink.LinkAdd(wgInterface); err != nil { |
| return fmt.Errorf("failed to add WireGuard network interfacee: %w", err) |
| } |
| defer netlink.LinkDel(wgInterface) |
| |
| listenPort := common.WireGuardPort |
| if err := wgClient.ConfigureDevice(clusterNetDeviceName, wgtypes.Config{ |
| PrivateKey: &s.privKey, |
| ListenPort: &listenPort, |
| }); err != nil { |
| return fmt.Errorf("failed to set up WireGuard interface: %w", err) |
| } |
| |
| if err := netlink.RouteAdd(&netlink.Route{ |
| Dst: &s.ClusterNet, |
| LinkIndex: wgInterface.Index, |
| }); err != nil && !os.IsExist(err) { |
| return fmt.Errorf("failed to add cluster net route to Wireguard interface: %w", err) |
| } |
| |
| if err := s.annotateThisNode(ctx); err != nil { |
| return fmt.Errorf("when annotating this node with public key: %w", err) |
| } |
| |
| nodeInformer := s.InformerFactory.Core().V1().Nodes() |
| nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ |
| AddFunc: func(new interface{}) { |
| newNode, ok := new.(*corev1.Node) |
| if !ok { |
| logger.Error("Received non-node item in node event handler", zap.Reflect("item", new)) |
| return |
| } |
| if err := s.ensureNode(newNode); err != nil { |
| logger.Warn("Failed to sync node", zap.Error(err)) |
| } |
| }, |
| UpdateFunc: func(old, new interface{}) { |
| newNode, ok := new.(*corev1.Node) |
| if !ok { |
| logger.Error("Received non-node item in node event handler", zap.Reflect("item", new)) |
| return |
| } |
| if err := s.ensureNode(newNode); err != nil { |
| logger.Warn("Failed to sync node", zap.Error(err)) |
| } |
| }, |
| DeleteFunc: func(old interface{}) { |
| oldNode, ok := old.(*corev1.Node) |
| if !ok { |
| logger.Error("Received non-node item in node event handler", zap.Reflect("item", oldNode)) |
| return |
| } |
| if err := s.removeNode(oldNode); err != nil { |
| logger.Warn("Failed to sync node", zap.Error(err)) |
| } |
| }, |
| }) |
| |
| supervisor.Signal(ctx, supervisor.SignalHealthy) |
| nodeInformer.Informer().Run(ctx.Done()) |
| return ctx.Err() |
| } |