m/node: implement container networking ourselves

This change gets rid of the CNI mechanism for configuring container
networking in favour of a split approach where the network service is
extended by a gRPC workload network service which handles all of the
work as well as a library which exposes just enough of go-cni's
interface to be a drop-in replacement in containerd, which then talks
to the workload network service.

This is a rather unconventional approach do doing things as CNI itself
is a pluggable interface. The reason for doing it this way is that the
binary executing interface of CNI has a huge spec which is also horrible
to convert into decent Go types and being a binary-calling interface has
inherent lifecycle, complexity and image size disadvantages. The part of
CNI that is actually used by containerd is tiny and its arguments are
well-specified and have decent Go types. It also avoids the whole CNI
caching mechanic which adds further unnecessary complexity.

The reason for the split service model instead of implementing
everything in cniproxy is to allow for more complex logic and Monogon
control plane interfacing from the workload network service. Also this
will allow offloading the actual service to things like DPUs.

Right now there is some uglyness left to make this self-contained. Two
obvious examples are the piping through of the pod network event value
and the exclusion of the first (non-network) IP from the IP allocator.
These will eventually go away but are necessary to get this to work as a
standalone change.

Change-Id: I46c604b7dfd58da9e6ddd0a29241680d25a2a745
Reviewed-on: https://review.monogon.dev/c/monogon/+/4496
Reviewed-by: Jan Schär <jan@monogon.tech>
Tested-by: Jenkins CI
diff --git a/metropolis/node/kubernetes/containerd/cniproxy/cniproxy.go b/metropolis/node/kubernetes/containerd/cniproxy/cniproxy.go
new file mode 100644
index 0000000..eabcfd3
--- /dev/null
+++ b/metropolis/node/kubernetes/containerd/cniproxy/cniproxy.go
@@ -0,0 +1,161 @@
+// Copyright The Monogon Project Authors.
+// SPDX-License-Identifier: Apache-2.0
+
+// Package cni implements an adapter between the go-cni interface and
+// the Monogon gRPC Workload Attachment interface. As we do not intend to
+// actually implement a CNI-compliant plugin it makes more sense to just cut
+// out as much unnecessary logic and take over at the containerd API boundary.
+package cni
+
+import (
+	"context"
+	"fmt"
+	"net"
+
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/credentials/insecure"
+
+	wlapi "source.monogon.dev/metropolis/node/core/network/workloads/spec"
+)
+
+func New(_ ...Opt) (CNI, error) {
+	conn, err := grpc.NewClient("unix:/ephemeral/workloadnet.sock", grpc.WithTransportCredentials(insecure.NewCredentials()))
+	if err != nil {
+		panic(err)
+	}
+	wlClient := wlapi.NewWorkloadNetworkingClient(conn)
+	return &adapter{
+		client: wlClient,
+	}, nil
+}
+
+type NamespaceOpts func(n *Namespace) error
+
+// Namespace differs significantly from upstream as we do not have the actual
+// underlying CNI interface and thus we do not need to transform the data into
+// JSON keys.
+type Namespace struct {
+	labels      map[string]string
+	annotations map[string]string
+	portMapping []PortMapping
+	bandwidth   BandWidth
+	dns         DNS
+	cgroupPath  string
+}
+
+func WithLabels(labels map[string]string) NamespaceOpts {
+	return func(n *Namespace) error {
+		n.labels = labels
+		return nil
+	}
+}
+
+func WithCapability(name string, capability interface{}) NamespaceOpts {
+	return func(n *Namespace) error {
+		if name == "io.kubernetes.cri.pod-annotations" {
+			n.annotations = capability.(map[string]string)
+		}
+		return nil
+	}
+}
+
+func WithCapabilityPortMap(portMapping []PortMapping) NamespaceOpts {
+	return func(c *Namespace) error {
+		c.portMapping = portMapping
+		return nil
+	}
+}
+
+func WithCapabilityBandWidth(bandWidth BandWidth) NamespaceOpts {
+	return func(c *Namespace) error {
+		c.bandwidth = bandWidth
+		return nil
+	}
+}
+
+func WithCapabilityDNS(dns DNS) NamespaceOpts {
+	return func(c *Namespace) error {
+		c.dns = dns
+		return nil
+	}
+}
+
+func WithCapabilityCgroupPath(cgroupPath string) NamespaceOpts {
+	return func(c *Namespace) error {
+		c.cgroupPath = cgroupPath
+		return nil
+	}
+}
+
+type adapter struct {
+	client wlapi.WorkloadNetworkingClient
+}
+
+func (s *adapter) Setup(ctx context.Context, id string, path string, opts ...NamespaceOpts) (*Result, error) {
+	var n Namespace
+	for _, opt := range opts {
+		opt(&n)
+	}
+	res, err := s.client.Attach(ctx, &wlapi.AttachRequest{
+		WorkloadId: n.labels["K8S_POD_UID"],
+		Netns: &wlapi.NetNSAttachment{
+			NetnsPath: path,
+			IfName:    "eth0",
+		},
+	})
+	if err != nil {
+		return nil, fmt.Errorf("while requesting workload network attachment: %w", err)
+	}
+	// Provide IP to containerd/CRI, rest is ignored anyways.
+	var ipConfigs []*IPConfig
+	for _, ip := range res.Ip {
+		ipConfigs = append(ipConfigs, &IPConfig{IP: net.IP(ip)})
+	}
+	return &Result{
+		Interfaces: map[string]*Config{
+			"eth0": {
+				IPConfigs: ipConfigs,
+			},
+		},
+	}, nil
+}
+
+func (s *adapter) SetupSerially(ctx context.Context, id string, path string, opts ...NamespaceOpts) (*Result, error) {
+	// We do not support multiple plugins, the distinction between serial or
+	// parallel does not exist. Just forward the call.
+	return s.Setup(ctx, id, path, opts...)
+}
+
+func (s *adapter) Remove(ctx context.Context, id string, path string, opts ...NamespaceOpts) error {
+	var n Namespace
+	for _, opt := range opts {
+		opt(&n)
+	}
+
+	_, err := s.client.Detach(ctx, &wlapi.DetachRequest{
+		WorkloadId: n.labels["K8S_POD_UID"],
+		Netns: &wlapi.NetNSAttachment{
+			NetnsPath: path,
+			IfName:    "eth0",
+		},
+	})
+	return err
+}
+
+func (s *adapter) Check(ctx context.Context, id string, path string, opts ...NamespaceOpts) error {
+	return nil
+}
+
+func (s *adapter) Load(opts ...Opt) error {
+	// Stub, we do not actually have any CNI config.
+	return nil
+}
+
+func (s *adapter) Status() error {
+	_, err := s.client.Status(context.Background(), &wlapi.StatusRequest{})
+	return err
+}
+
+func (s *adapter) GetConfig() *ConfigResult {
+	return &ConfigResult{}
+}