core -> metropolis

Smalltown is now called Metropolis!

This is the first commit in a series of cleanup commits that prepare us
for an open source release. This one just some Bazel packages around to
follow a stricter directory layout.

All of Metropolis now lives in `//metropolis`.

All of Metropolis Node code now lives in `//metropolis/node`.

All of the main /init now lives in `//m/n/core`.

All of the Kubernetes functionality/glue now lives in `//m/n/kubernetes`.

Next steps:
     - hunt down all references to Smalltown and replace them appropriately
     - narrow down visibility rules
     - document new code organization
     - move `//build/toolchain` to `//monogon/build/toolchain`
     - do another cleanup pass between `//golibs` and
       `//monogon/node/{core,common}`.
     - remove `//delta` and `//anubis`

Fixes T799.

Test Plan: Just a very large refactor. CI should help us out here.

Bug: T799

X-Origin-Diff: phab/D667
GitOrigin-RevId: 6029b8d4edc42325d50042596b639e8b122d0ded
diff --git a/metropolis/node/kubernetes/clusternet/BUILD.bazel b/metropolis/node/kubernetes/clusternet/BUILD.bazel
new file mode 100644
index 0000000..9e9cc01
--- /dev/null
+++ b/metropolis/node/kubernetes/clusternet/BUILD.bazel
@@ -0,0 +1,27 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+    name = "go_default_library",
+    srcs = [
+        "clusternet.go",
+        "netlink_compat.go",
+    ],
+    importpath = "git.monogon.dev/source/nexantic.git/metropolis/node/kubernetes/clusternet",
+    visibility = ["//metropolis/node/kubernetes:__subpackages__"],
+    deps = [
+        "//metropolis/node:go_default_library",
+        "//metropolis/node/common/jsonpatch:go_default_library",
+        "//metropolis/node/common/supervisor:go_default_library",
+        "//metropolis/node/core/localstorage:go_default_library",
+        "//metropolis/node/core/logtree:go_default_library",
+        "@com_github_vishvananda_netlink//:go_default_library",
+        "@com_zx2c4_golang_wireguard_wgctrl//:go_default_library",
+        "@com_zx2c4_golang_wireguard_wgctrl//wgtypes:go_default_library",
+        "@io_k8s_api//core/v1:go_default_library",
+        "@io_k8s_apimachinery//pkg/apis/meta/v1:go_default_library",
+        "@io_k8s_apimachinery//pkg/types:go_default_library",
+        "@io_k8s_client_go//informers:go_default_library",
+        "@io_k8s_client_go//kubernetes:go_default_library",
+        "@io_k8s_client_go//tools/cache:go_default_library",
+    ],
+)
diff --git a/metropolis/node/kubernetes/clusternet/clusternet.go b/metropolis/node/kubernetes/clusternet/clusternet.go
new file mode 100644
index 0000000..d8dc7ad
--- /dev/null
+++ b/metropolis/node/kubernetes/clusternet/clusternet.go
@@ -0,0 +1,276 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package clusternet implements a WireGuard-based overlay network for Kubernetes. It relies on controller-manager's
+// IPAM to assign IP ranges to nodes and on Kubernetes' Node objects to distribute the Node IPs and public keys.
+//
+// It sets up a single WireGuard network interface and routes the entire ClusterCIDR into that network interface,
+// relying on WireGuard's AllowedIPs mechanism to look up the correct peer node to send the traffic to. This means
+// that the routing table doesn't change and doesn't have to be separately managed. When clusternet is started
+// it annotates its WireGuard public key onto its node object.
+// For each node object that's created or updated on the K8s apiserver it checks if a public key annotation is set and
+// if yes a peer with that public key, its InternalIP as endpoint and the CIDR for that node as AllowedIPs is created.
+package clusternet
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net"
+	"os"
+
+	"github.com/vishvananda/netlink"
+	"golang.zx2c4.com/wireguard/wgctrl"
+	"golang.zx2c4.com/wireguard/wgctrl/wgtypes"
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/client-go/informers"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/tools/cache"
+
+	common "git.monogon.dev/source/nexantic.git/metropolis/node"
+	"git.monogon.dev/source/nexantic.git/metropolis/node/common/jsonpatch"
+	"git.monogon.dev/source/nexantic.git/metropolis/node/common/supervisor"
+	"git.monogon.dev/source/nexantic.git/metropolis/node/core/localstorage"
+	"git.monogon.dev/source/nexantic.git/metropolis/node/core/logtree"
+)
+
+const (
+	clusterNetDeviceName = "clusternet"
+	publicKeyAnnotation  = "node.smalltown.nexantic.com/wg-pubkey"
+)
+
+type Service struct {
+	NodeName        string
+	Kubernetes      kubernetes.Interface
+	ClusterNet      net.IPNet
+	InformerFactory informers.SharedInformerFactory
+	DataDirectory   *localstorage.DataKubernetesClusterNetworkingDirectory
+
+	wgClient *wgctrl.Client
+	privKey  wgtypes.Key
+	logger   logtree.LeveledLogger
+}
+
+// ensureNode creates/updates the corresponding WireGuard peer entry for the given node objet
+func (s *Service) ensureNode(newNode *corev1.Node) error {
+	if newNode.Name == s.NodeName {
+		// Node doesn't need to connect to itself
+		return nil
+	}
+	pubKeyRaw := newNode.Annotations[publicKeyAnnotation]
+	if pubKeyRaw == "" {
+		return nil
+	}
+	pubKey, err := wgtypes.ParseKey(pubKeyRaw)
+	if err != nil {
+		return fmt.Errorf("failed to parse public-key annotation: %w", err)
+	}
+	var internalIP net.IP
+	for _, addr := range newNode.Status.Addresses {
+		if addr.Type == corev1.NodeInternalIP {
+			if internalIP != nil {
+				s.logger.Warningf("More than one NodeInternalIP specified, using the first one")
+				break
+			}
+			internalIP = net.ParseIP(addr.Address)
+			if internalIP == nil {
+				s.logger.Warningf("Failed to parse Internal IP %s", addr.Address)
+			}
+		}
+	}
+	if internalIP == nil {
+		return errors.New("node has no Internal IP")
+	}
+	var allowedIPs []net.IPNet
+	for _, podNetStr := range newNode.Spec.PodCIDRs {
+		_, podNet, err := net.ParseCIDR(podNetStr)
+		if err != nil {
+			s.logger.Warningf("Node %s PodCIDR failed to parse, ignored: %v", newNode.Name, err)
+			continue
+		}
+		allowedIPs = append(allowedIPs, *podNet)
+	}
+	allowedIPs = append(allowedIPs, net.IPNet{IP: internalIP, Mask: net.CIDRMask(32, 32)})
+	s.logger.V(1).Infof("Adding/Updating WireGuard peer node %s, endpoint %s, allowedIPs %+v", newNode.Name, internalIP.String(), allowedIPs)
+	// WireGuard's kernel side has create/update semantics on peers by default. So we can just add the peer multiple
+	// times to update it.
+	err = s.wgClient.ConfigureDevice(clusterNetDeviceName, wgtypes.Config{
+		Peers: []wgtypes.PeerConfig{{
+			PublicKey:         pubKey,
+			Endpoint:          &net.UDPAddr{Port: common.WireGuardPort, IP: internalIP},
+			ReplaceAllowedIPs: true,
+			AllowedIPs:        allowedIPs,
+		}},
+	})
+	if err != nil {
+		return fmt.Errorf("failed to add WireGuard peer node: %w", err)
+	}
+	return nil
+}
+
+// removeNode removes the corresponding WireGuard peer entry for the given node object
+func (s *Service) removeNode(oldNode *corev1.Node) error {
+	if oldNode.Name == s.NodeName {
+		// Node doesn't need to connect to itself
+		return nil
+	}
+	pubKeyRaw := oldNode.Annotations[publicKeyAnnotation]
+	if pubKeyRaw == "" {
+		return nil
+	}
+	pubKey, err := wgtypes.ParseKey(pubKeyRaw)
+	if err != nil {
+		return fmt.Errorf("node public-key annotation not decodable: %w", err)
+	}
+	err = s.wgClient.ConfigureDevice(clusterNetDeviceName, wgtypes.Config{
+		Peers: []wgtypes.PeerConfig{{
+			PublicKey: pubKey,
+			Remove:    true,
+		}},
+	})
+	if err != nil {
+		return fmt.Errorf("failed to remove WireGuard peer node: %w", err)
+	}
+	return nil
+}
+
+// ensureOnDiskKey loads the private key from disk or (if none exists) generates one and persists it.
+func (s *Service) ensureOnDiskKey() error {
+	keyRaw, err := s.DataDirectory.Key.Read()
+	if os.IsNotExist(err) {
+		key, err := wgtypes.GeneratePrivateKey()
+		if err != nil {
+			return fmt.Errorf("failed to generate private key: %w", err)
+		}
+		if err := s.DataDirectory.Key.Write([]byte(key.String()), 0600); err != nil {
+			return fmt.Errorf("failed to store newly generated key: %w", err)
+		}
+
+		s.privKey = key
+		return nil
+	} else if err != nil {
+		return fmt.Errorf("failed to load on-disk key: %w", err)
+	}
+
+	key, err := wgtypes.ParseKey(string(keyRaw))
+	if err != nil {
+		return fmt.Errorf("invalid private key in file: %w", err)
+	}
+	s.privKey = key
+	return nil
+}
+
+// annotateThisNode annotates the node (as defined by NodeName) with the wireguard public key of this node.
+func (s *Service) annotateThisNode(ctx context.Context) error {
+	patch := []jsonpatch.JsonPatchOp{{
+		Operation: "add",
+		Path:      "/metadata/annotations/" + jsonpatch.EncodeJSONRefToken(publicKeyAnnotation),
+		Value:     s.privKey.PublicKey().String(),
+	}}
+
+	patchRaw, err := json.Marshal(patch)
+	if err != nil {
+		return fmt.Errorf("failed to encode JSONPatch: %w", err)
+	}
+
+	if _, err := s.Kubernetes.CoreV1().Nodes().Patch(ctx, s.NodeName, types.JSONPatchType, patchRaw, metav1.PatchOptions{}); err != nil {
+		return fmt.Errorf("failed to patch resource: %w", err)
+	}
+
+	return nil
+}
+
+// Run runs the ClusterNet service. See package description for what it does.
+func (s *Service) Run(ctx context.Context) error {
+	logger := supervisor.Logger(ctx)
+	s.logger = logger
+
+	wgClient, err := wgctrl.New()
+	if err != nil {
+		return fmt.Errorf("failed to connect to netlink's WireGuard config endpoint: %w", err)
+	}
+	s.wgClient = wgClient
+
+	if err := s.ensureOnDiskKey(); err != nil {
+		return fmt.Errorf("failed to ensure on-disk key: %w", err)
+	}
+
+	wgInterface := &Wireguard{LinkAttrs: netlink.LinkAttrs{Name: clusterNetDeviceName, Flags: net.FlagUp}}
+	if err := netlink.LinkAdd(wgInterface); err != nil {
+		return fmt.Errorf("failed to add WireGuard network interfacee: %w", err)
+	}
+	defer netlink.LinkDel(wgInterface)
+
+	listenPort := common.WireGuardPort
+	if err := wgClient.ConfigureDevice(clusterNetDeviceName, wgtypes.Config{
+		PrivateKey: &s.privKey,
+		ListenPort: &listenPort,
+	}); err != nil {
+		return fmt.Errorf("failed to set up WireGuard interface: %w", err)
+	}
+
+	if err := netlink.RouteAdd(&netlink.Route{
+		Dst:       &s.ClusterNet,
+		LinkIndex: wgInterface.Index,
+	}); err != nil && !os.IsExist(err) {
+		return fmt.Errorf("failed to add cluster net route to Wireguard interface: %w", err)
+	}
+
+	if err := s.annotateThisNode(ctx); err != nil {
+		return fmt.Errorf("when annotating this node with public key: %w", err)
+	}
+
+	nodeInformer := s.InformerFactory.Core().V1().Nodes()
+	nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
+		AddFunc: func(new interface{}) {
+			newNode, ok := new.(*corev1.Node)
+			if !ok {
+				logger.Errorf("Received non-node item %+v in node event handler", new)
+				return
+			}
+			if err := s.ensureNode(newNode); err != nil {
+				logger.Warningf("Failed to sync node: %v", err)
+			}
+		},
+		UpdateFunc: func(old, new interface{}) {
+			newNode, ok := new.(*corev1.Node)
+			if !ok {
+				logger.Errorf("Received non-node item %+v in node event handler", new)
+				return
+			}
+			if err := s.ensureNode(newNode); err != nil {
+				logger.Warningf("Failed to sync node: %v", err)
+			}
+		},
+		DeleteFunc: func(old interface{}) {
+			oldNode, ok := old.(*corev1.Node)
+			if !ok {
+				logger.Errorf("Received non-node item %+v in node event handler", oldNode)
+				return
+			}
+			if err := s.removeNode(oldNode); err != nil {
+				logger.Warningf("Failed to sync node: %v", err)
+			}
+		},
+	})
+
+	supervisor.Signal(ctx, supervisor.SignalHealthy)
+	nodeInformer.Informer().Run(ctx.Done())
+	return ctx.Err()
+}
diff --git a/metropolis/node/kubernetes/clusternet/netlink_compat.go b/metropolis/node/kubernetes/clusternet/netlink_compat.go
new file mode 100644
index 0000000..a90cc47
--- /dev/null
+++ b/metropolis/node/kubernetes/clusternet/netlink_compat.go
@@ -0,0 +1,33 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Can be removed once https://github.com/vishvananda/netlink/pull/464 lands
+package clusternet
+
+import "github.com/vishvananda/netlink"
+
+// Wireguard represent links of type "wireguard", see https://www.wireguard.com/
+type Wireguard struct {
+	netlink.LinkAttrs
+}
+
+func (wg *Wireguard) Attrs() *netlink.LinkAttrs {
+	return &wg.LinkAttrs
+}
+
+func (wg *Wireguard) Type() string {
+	return "wireguard"
+}