m/node: implement container networking ourselves

This change gets rid of the CNI mechanism for configuring container
networking in favour of a split approach where the network service is
extended by a gRPC workload network service which handles all of the
work as well as a library which exposes just enough of go-cni's
interface to be a drop-in replacement in containerd, which then talks
to the workload network service.

This is a rather unconventional approach do doing things as CNI itself
is a pluggable interface. The reason for doing it this way is that the
binary executing interface of CNI has a huge spec which is also horrible
to convert into decent Go types and being a binary-calling interface has
inherent lifecycle, complexity and image size disadvantages. The part of
CNI that is actually used by containerd is tiny and its arguments are
well-specified and have decent Go types. It also avoids the whole CNI
caching mechanic which adds further unnecessary complexity.

The reason for the split service model instead of implementing
everything in cniproxy is to allow for more complex logic and Monogon
control plane interfacing from the workload network service. Also this
will allow offloading the actual service to things like DPUs.

Right now there is some uglyness left to make this self-contained. Two
obvious examples are the piping through of the pod network event value
and the exclusion of the first (non-network) IP from the IP allocator.
These will eventually go away but are necessary to get this to work as a
standalone change.

Change-Id: I46c604b7dfd58da9e6ddd0a29241680d25a2a745
Reviewed-on: https://review.monogon.dev/c/monogon/+/4496
Reviewed-by: Jan Schär <jan@monogon.tech>
Tested-by: Jenkins CI
diff --git a/metropolis/node/kubernetes/containerd/cniproxy/BUILD.bazel b/metropolis/node/kubernetes/containerd/cniproxy/BUILD.bazel
new file mode 100644
index 0000000..a53db4d
--- /dev/null
+++ b/metropolis/node/kubernetes/containerd/cniproxy/BUILD.bazel
@@ -0,0 +1,18 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+    name = "cniproxy",
+    srcs = [
+        "cniproxy.go",
+        "cnitypes.go",
+        "opts.go",
+    ],
+    importpath = "source.monogon.dev/metropolis/node/kubernetes/containerd/cniproxy",
+    visibility = ["//visibility:public"],
+    deps = [
+        "//metropolis/node/core/network/workloads/spec",
+        "@com_github_containernetworking_cni//pkg/types",
+        "@org_golang_google_grpc//:grpc",
+        "@org_golang_google_grpc//credentials/insecure",
+    ],
+)
diff --git a/metropolis/node/kubernetes/containerd/cniproxy/cniproxy.go b/metropolis/node/kubernetes/containerd/cniproxy/cniproxy.go
new file mode 100644
index 0000000..eabcfd3
--- /dev/null
+++ b/metropolis/node/kubernetes/containerd/cniproxy/cniproxy.go
@@ -0,0 +1,161 @@
+// Copyright The Monogon Project Authors.
+// SPDX-License-Identifier: Apache-2.0
+
+// Package cni implements an adapter between the go-cni interface and
+// the Monogon gRPC Workload Attachment interface. As we do not intend to
+// actually implement a CNI-compliant plugin it makes more sense to just cut
+// out as much unnecessary logic and take over at the containerd API boundary.
+package cni
+
+import (
+	"context"
+	"fmt"
+	"net"
+
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/credentials/insecure"
+
+	wlapi "source.monogon.dev/metropolis/node/core/network/workloads/spec"
+)
+
+func New(_ ...Opt) (CNI, error) {
+	conn, err := grpc.NewClient("unix:/ephemeral/workloadnet.sock", grpc.WithTransportCredentials(insecure.NewCredentials()))
+	if err != nil {
+		panic(err)
+	}
+	wlClient := wlapi.NewWorkloadNetworkingClient(conn)
+	return &adapter{
+		client: wlClient,
+	}, nil
+}
+
+type NamespaceOpts func(n *Namespace) error
+
+// Namespace differs significantly from upstream as we do not have the actual
+// underlying CNI interface and thus we do not need to transform the data into
+// JSON keys.
+type Namespace struct {
+	labels      map[string]string
+	annotations map[string]string
+	portMapping []PortMapping
+	bandwidth   BandWidth
+	dns         DNS
+	cgroupPath  string
+}
+
+func WithLabels(labels map[string]string) NamespaceOpts {
+	return func(n *Namespace) error {
+		n.labels = labels
+		return nil
+	}
+}
+
+func WithCapability(name string, capability interface{}) NamespaceOpts {
+	return func(n *Namespace) error {
+		if name == "io.kubernetes.cri.pod-annotations" {
+			n.annotations = capability.(map[string]string)
+		}
+		return nil
+	}
+}
+
+func WithCapabilityPortMap(portMapping []PortMapping) NamespaceOpts {
+	return func(c *Namespace) error {
+		c.portMapping = portMapping
+		return nil
+	}
+}
+
+func WithCapabilityBandWidth(bandWidth BandWidth) NamespaceOpts {
+	return func(c *Namespace) error {
+		c.bandwidth = bandWidth
+		return nil
+	}
+}
+
+func WithCapabilityDNS(dns DNS) NamespaceOpts {
+	return func(c *Namespace) error {
+		c.dns = dns
+		return nil
+	}
+}
+
+func WithCapabilityCgroupPath(cgroupPath string) NamespaceOpts {
+	return func(c *Namespace) error {
+		c.cgroupPath = cgroupPath
+		return nil
+	}
+}
+
+type adapter struct {
+	client wlapi.WorkloadNetworkingClient
+}
+
+func (s *adapter) Setup(ctx context.Context, id string, path string, opts ...NamespaceOpts) (*Result, error) {
+	var n Namespace
+	for _, opt := range opts {
+		opt(&n)
+	}
+	res, err := s.client.Attach(ctx, &wlapi.AttachRequest{
+		WorkloadId: n.labels["K8S_POD_UID"],
+		Netns: &wlapi.NetNSAttachment{
+			NetnsPath: path,
+			IfName:    "eth0",
+		},
+	})
+	if err != nil {
+		return nil, fmt.Errorf("while requesting workload network attachment: %w", err)
+	}
+	// Provide IP to containerd/CRI, rest is ignored anyways.
+	var ipConfigs []*IPConfig
+	for _, ip := range res.Ip {
+		ipConfigs = append(ipConfigs, &IPConfig{IP: net.IP(ip)})
+	}
+	return &Result{
+		Interfaces: map[string]*Config{
+			"eth0": {
+				IPConfigs: ipConfigs,
+			},
+		},
+	}, nil
+}
+
+func (s *adapter) SetupSerially(ctx context.Context, id string, path string, opts ...NamespaceOpts) (*Result, error) {
+	// We do not support multiple plugins, the distinction between serial or
+	// parallel does not exist. Just forward the call.
+	return s.Setup(ctx, id, path, opts...)
+}
+
+func (s *adapter) Remove(ctx context.Context, id string, path string, opts ...NamespaceOpts) error {
+	var n Namespace
+	for _, opt := range opts {
+		opt(&n)
+	}
+
+	_, err := s.client.Detach(ctx, &wlapi.DetachRequest{
+		WorkloadId: n.labels["K8S_POD_UID"],
+		Netns: &wlapi.NetNSAttachment{
+			NetnsPath: path,
+			IfName:    "eth0",
+		},
+	})
+	return err
+}
+
+func (s *adapter) Check(ctx context.Context, id string, path string, opts ...NamespaceOpts) error {
+	return nil
+}
+
+func (s *adapter) Load(opts ...Opt) error {
+	// Stub, we do not actually have any CNI config.
+	return nil
+}
+
+func (s *adapter) Status() error {
+	_, err := s.client.Status(context.Background(), &wlapi.StatusRequest{})
+	return err
+}
+
+func (s *adapter) GetConfig() *ConfigResult {
+	return &ConfigResult{}
+}
diff --git a/metropolis/node/kubernetes/containerd/cniproxy/cnitypes.go b/metropolis/node/kubernetes/containerd/cniproxy/cnitypes.go
new file mode 100644
index 0000000..27bac36
--- /dev/null
+++ b/metropolis/node/kubernetes/containerd/cniproxy/cnitypes.go
@@ -0,0 +1,81 @@
+// Copyright The Monogon Project Authors.
+// Copyright The containerd Authors.
+// SPDX-License-Identifier: Apache-2.0
+
+package cni
+
+// This file contains types mostly or entirely lifted from go-cni but copied
+// here to allow API compatibility. Redefining these is not viable as their
+// references to other types would point to go-cni's types.
+
+import (
+	"context"
+	"net"
+
+	"github.com/containernetworking/cni/pkg/types"
+)
+
+type CNI interface {
+	// Setup setup the network for the namespace
+	Setup(ctx context.Context, id string, path string, opts ...NamespaceOpts) (*Result, error)
+	// SetupSerially sets up each of the network interfaces for the namespace in serial
+	SetupSerially(ctx context.Context, id string, path string, opts ...NamespaceOpts) (*Result, error)
+	// Remove tears down the network of the namespace.
+	Remove(ctx context.Context, id string, path string, opts ...NamespaceOpts) error
+	// Check checks if the network is still in desired state
+	Check(ctx context.Context, id string, path string, opts ...NamespaceOpts) error
+	// Load loads the cni network config
+	Load(opts ...Opt) error
+	// Status checks the status of the cni initialization
+	Status() error
+	// GetConfig returns a copy of the CNI plugin configurations as parsed by CNI
+	GetConfig() *ConfigResult
+}
+
+type PortMapping struct {
+	HostPort      int32
+	ContainerPort int32
+	Protocol      string
+	HostIP        string
+}
+
+// BandWidth defines the ingress/egress rate and burst limits
+type BandWidth struct {
+	IngressRate  uint64
+	IngressBurst uint64
+	EgressRate   uint64
+	EgressBurst  uint64
+}
+
+// DNS defines the dns config
+type DNS struct {
+	// List of DNS servers of the cluster.
+	Servers []string
+	// List of DNS search domains of the cluster.
+	Searches []string
+	// List of DNS options.
+	Options []string
+}
+
+type IPConfig struct {
+	IP      net.IP
+	Gateway net.IP
+}
+
+type Config struct {
+	IPConfigs  []*IPConfig
+	Mac        string
+	Sandbox    string
+	PciID      string
+	SocketPath string
+}
+
+type Result struct {
+	Interfaces map[string]*Config
+	DNS        []types.DNS
+	Routes     []*types.Route
+}
+
+// ConfigResult is not used by containerd and it's a complex type, leave it
+// for now.
+type ConfigResult struct{}
diff --git a/metropolis/node/kubernetes/containerd/cniproxy/opts.go b/metropolis/node/kubernetes/containerd/cniproxy/opts.go
new file mode 100644
index 0000000..0316276
--- /dev/null
+++ b/metropolis/node/kubernetes/containerd/cniproxy/opts.go
@@ -0,0 +1,58 @@
+// Copyright The Monogon Project Authors.
+// SPDX-License-Identifier: Apache-2.0
+
+package cni
+
+// Opt doesn't do anything as all configuration is ignored.
+type Opt func() error
+
+func noopOpt() error {
+	return nil
+}
+
+func WithConf(bytes []byte) Opt {
+	return noopOpt
+}
+
+func WithConfFile(fileName string) Opt {
+	return noopOpt
+}
+
+func WithConfIndex(bytes []byte, index int) Opt {
+	return noopOpt
+}
+
+func WithConfListBytes(bytes []byte) Opt {
+	return noopOpt
+}
+
+func WithConfListFile(fileName string) Opt {
+	return noopOpt
+}
+
+func WithInterfacePrefix(prefix string) Opt {
+	return noopOpt
+}
+
+func WithMinNetworkCount(count int) Opt {
+	return noopOpt
+}
+
+func WithPluginConfDir(dir string) Opt {
+	return noopOpt
+}
+
+func WithPluginDir(dirs []string) Opt {
+	return noopOpt
+}
+
+func WithPluginMaxConfNum(max int) Opt {
+	return noopOpt
+}
+
+func WithDefaultConf() error {
+	return nil
+}
+func WithLoNetwork() error {
+	return nil
+}