m/node: implement container networking ourselves
This change gets rid of the CNI mechanism for configuring container
networking in favour of a split approach where the network service is
extended by a gRPC workload network service which handles all of the
work as well as a library which exposes just enough of go-cni's
interface to be a drop-in replacement in containerd, which then talks
to the workload network service.
This is a rather unconventional approach do doing things as CNI itself
is a pluggable interface. The reason for doing it this way is that the
binary executing interface of CNI has a huge spec which is also horrible
to convert into decent Go types and being a binary-calling interface has
inherent lifecycle, complexity and image size disadvantages. The part of
CNI that is actually used by containerd is tiny and its arguments are
well-specified and have decent Go types. It also avoids the whole CNI
caching mechanic which adds further unnecessary complexity.
The reason for the split service model instead of implementing
everything in cniproxy is to allow for more complex logic and Monogon
control plane interfacing from the workload network service. Also this
will allow offloading the actual service to things like DPUs.
Right now there is some uglyness left to make this self-contained. Two
obvious examples are the piping through of the pod network event value
and the exclusion of the first (non-network) IP from the IP allocator.
These will eventually go away but are necessary to get this to work as a
standalone change.
Change-Id: I46c604b7dfd58da9e6ddd0a29241680d25a2a745
Reviewed-on: https://review.monogon.dev/c/monogon/+/4496
Reviewed-by: Jan Schär <jan@monogon.tech>
Tested-by: Jenkins CI
diff --git a/third_party/com_github_containerd_containerd_v2/cni-adapter.patch b/third_party/com_github_containerd_containerd_v2/cni-adapter.patch
new file mode 100644
index 0000000..cee8f0b
--- /dev/null
+++ b/third_party/com_github_containerd_containerd_v2/cni-adapter.patch
@@ -0,0 +1,195 @@
+From 9bd63f7ca7d4a248f31600bb09fe1c828d40115a Mon Sep 17 00:00:00 2001
+From: Lorenz Brun <lorenz@monogon.tech>
+Date: Wed, 6 Aug 2025 04:07:56 +0200
+Subject: [PATCH] Use our CNI adapter instead of go-cni
+
+We're not really using CNI here, just straight-up intercepting the Go
+calls and redirecting them to us.
+---
+ cmd/ctr/commands/run/run.go | 2 +-
+ cmd/ctr/commands/tasks/kill.go | 2 +-
+ internal/cri/server/cni_conf_syncer.go | 2 +-
+ internal/cri/server/sandbox_run.go | 2 +-
+ internal/cri/server/sandbox_run_test.go | 2 +-
+ internal/cri/server/service.go | 2 +-
+ internal/cri/server/service_linux.go | 2 +-
+ internal/cri/server/service_other.go | 2 +-
+ internal/cri/server/service_test.go | 2 +-
+ internal/cri/server/service_windows.go | 2 +-
+ internal/cri/store/sandbox/metadata.go | 2 +-
+ internal/cri/testing/fake_cni_plugin.go | 2 +-
+ internal/cri/types/sandbox_info.go | 2 +-
+ 13 files changed, 13 insertions(+), 13 deletions(-)
+
+diff --git a/cmd/ctr/commands/run/run.go b/cmd/ctr/commands/run/run.go
+index 365f77746..298f004f2 100644
+--- a/cmd/ctr/commands/run/run.go
++++ b/cmd/ctr/commands/run/run.go
+@@ -24,7 +24,7 @@ import (
+ "strings"
+
+ "github.com/containerd/console"
+- gocni "github.com/containerd/go-cni"
++ gocni "source.monogon.dev/metropolis/node/kubernetes/containerd/cniproxy"
+ specs "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/urfave/cli/v2"
+
+diff --git a/cmd/ctr/commands/tasks/kill.go b/cmd/ctr/commands/tasks/kill.go
+index 78b5584e2..d03a2aa48 100644
+--- a/cmd/ctr/commands/tasks/kill.go
++++ b/cmd/ctr/commands/tasks/kill.go
+@@ -23,7 +23,7 @@ import (
+
+ containerd "github.com/containerd/containerd/v2/client"
+ "github.com/containerd/containerd/v2/cmd/ctr/commands"
+- gocni "github.com/containerd/go-cni"
++ gocni "source.monogon.dev/metropolis/node/kubernetes/containerd/cniproxy"
+ "github.com/containerd/log"
+ "github.com/containerd/typeurl/v2"
+ "github.com/moby/sys/signal"
+diff --git a/internal/cri/server/cni_conf_syncer.go b/internal/cri/server/cni_conf_syncer.go
+index 822e7e54a..646286c4e 100644
+--- a/internal/cri/server/cni_conf_syncer.go
++++ b/internal/cri/server/cni_conf_syncer.go
+@@ -22,7 +22,7 @@ import (
+ "path/filepath"
+ "sync"
+
+- "github.com/containerd/go-cni"
++ "source.monogon.dev/metropolis/node/kubernetes/containerd/cniproxy"
+ "github.com/containerd/log"
+ "github.com/fsnotify/fsnotify"
+ )
+diff --git a/internal/cri/server/sandbox_run.go b/internal/cri/server/sandbox_run.go
+index 3142efb72..1e26479f6 100644
+--- a/internal/cri/server/sandbox_run.go
++++ b/internal/cri/server/sandbox_run.go
+@@ -26,7 +26,7 @@ import (
+ "strings"
+ "time"
+
+- "github.com/containerd/go-cni"
++ "source.monogon.dev/metropolis/node/kubernetes/containerd/cniproxy"
+ "github.com/containerd/log"
+ "github.com/containerd/typeurl/v2"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
+diff --git a/internal/cri/server/sandbox_run_test.go b/internal/cri/server/sandbox_run_test.go
+index 3b3b01187..7c643f7c7 100644
+--- a/internal/cri/server/sandbox_run_test.go
++++ b/internal/cri/server/sandbox_run_test.go
+@@ -21,7 +21,7 @@ import (
+ "net"
+ "testing"
+
+- "github.com/containerd/go-cni"
++ "source.monogon.dev/metropolis/node/kubernetes/containerd/cniproxy"
+ "github.com/stretchr/testify/assert"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
+ )
+diff --git a/internal/cri/server/service.go b/internal/cri/server/service.go
+index 8b65b1465..4ffe73acc 100644
+--- a/internal/cri/server/service.go
++++ b/internal/cri/server/service.go
+@@ -26,7 +26,7 @@ import (
+ "sync/atomic"
+ "time"
+
+- "github.com/containerd/go-cni"
++ "source.monogon.dev/metropolis/node/kubernetes/containerd/cniproxy"
+ "github.com/containerd/log"
+ "github.com/containerd/typeurl/v2"
+ imagespec "github.com/opencontainers/image-spec/specs-go/v1"
+diff --git a/internal/cri/server/service_linux.go b/internal/cri/server/service_linux.go
+index a115e5f67..c9451e41e 100644
+--- a/internal/cri/server/service_linux.go
++++ b/internal/cri/server/service_linux.go
+@@ -25,7 +25,7 @@ import (
+
+ "github.com/containerd/containerd/v2/pkg/cap"
+ "github.com/containerd/containerd/v2/pkg/kernelversion"
+- "github.com/containerd/go-cni"
++ "source.monogon.dev/metropolis/node/kubernetes/containerd/cniproxy"
+ "github.com/containerd/log"
+ )
+
+diff --git a/internal/cri/server/service_other.go b/internal/cri/server/service_other.go
+index 70e050f11..4b027730a 100644
+--- a/internal/cri/server/service_other.go
++++ b/internal/cri/server/service_other.go
+@@ -19,7 +19,7 @@
+ package server
+
+ import (
+- "github.com/containerd/go-cni"
++ "source.monogon.dev/metropolis/node/kubernetes/containerd/cniproxy"
+ )
+
+ // initPlatform handles initialization of the CRI service for non-windows
+diff --git a/internal/cri/server/service_test.go b/internal/cri/server/service_test.go
+index b6d9ecfb3..7dc40781d 100644
+--- a/internal/cri/server/service_test.go
++++ b/internal/cri/server/service_test.go
+@@ -20,7 +20,7 @@ import (
+ "context"
+
+ "github.com/containerd/errdefs"
+- "github.com/containerd/go-cni"
++ "source.monogon.dev/metropolis/node/kubernetes/containerd/cniproxy"
+ "github.com/containerd/platforms"
+ imagespec "github.com/opencontainers/image-spec/specs-go/v1"
+
+diff --git a/internal/cri/server/service_windows.go b/internal/cri/server/service_windows.go
+index 1bf71413f..0ebae652d 100644
+--- a/internal/cri/server/service_windows.go
++++ b/internal/cri/server/service_windows.go
+@@ -19,7 +19,7 @@ package server
+ import (
+ "fmt"
+
+- "github.com/containerd/go-cni"
++ "source.monogon.dev/metropolis/node/kubernetes/containerd/cniproxy"
+ )
+
+ // windowsNetworkAttachCount is the minimum number of networks the PodSandbox
+diff --git a/internal/cri/store/sandbox/metadata.go b/internal/cri/store/sandbox/metadata.go
+index 20fe2f1d1..8a2d22aa5 100644
+--- a/internal/cri/store/sandbox/metadata.go
++++ b/internal/cri/store/sandbox/metadata.go
+@@ -20,7 +20,7 @@ import (
+ "encoding/json"
+ "fmt"
+
+- cni "github.com/containerd/go-cni"
++ cni "source.monogon.dev/metropolis/node/kubernetes/containerd/cniproxy"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
+ )
+
+diff --git a/internal/cri/testing/fake_cni_plugin.go b/internal/cri/testing/fake_cni_plugin.go
+index fcc060f1c..cac2d5173 100644
+--- a/internal/cri/testing/fake_cni_plugin.go
++++ b/internal/cri/testing/fake_cni_plugin.go
+@@ -19,7 +19,7 @@ package testing
+ import (
+ "context"
+
+- cni "github.com/containerd/go-cni"
++ cni "source.monogon.dev/metropolis/node/kubernetes/containerd/cniproxy"
+ )
+
+ // FakeCNIPlugin is a fake plugin used for test.
+diff --git a/internal/cri/types/sandbox_info.go b/internal/cri/types/sandbox_info.go
+index 49acadae8..fbe6af925 100644
+--- a/internal/cri/types/sandbox_info.go
++++ b/internal/cri/types/sandbox_info.go
+@@ -17,7 +17,7 @@
+ package types
+
+ import (
+- "github.com/containerd/go-cni"
++ "source.monogon.dev/metropolis/node/kubernetes/containerd/cniproxy"
+ "github.com/opencontainers/runtime-spec/specs-go"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
+
+--
+2.49.0
+
diff --git a/third_party/com_github_containernetworking_cni/BUILD.bazel b/third_party/com_github_containernetworking_cni/BUILD.bazel
deleted file mode 100644
index e69de29..0000000
--- a/third_party/com_github_containernetworking_cni/BUILD.bazel
+++ /dev/null
diff --git a/third_party/com_github_containernetworking_cni/cni-fix-cachepath.patch b/third_party/com_github_containernetworking_cni/cni-fix-cachepath.patch
deleted file mode 100644
index 06cf210..0000000
--- a/third_party/com_github_containernetworking_cni/cni-fix-cachepath.patch
+++ /dev/null
@@ -1,29 +0,0 @@
-From 3646de78ed303e1c84c78b676859df9c2db33863 Mon Sep 17 00:00:00 2001
-From: Lorenz Brun <lorenz@brun.one>
-Date: Mon, 25 Jan 2021 18:20:01 +0100
-Subject: [PATCH] Point CacheDir to the correct location for Metropolis
-
-This is arguably an ugly hack, but they hardcoded it and the fastest way to
-access anything resembling a config is through three different repos:
-containernetworking/cni -> containerd/go-cni -> containerd/cri ->
-containerd/containerd.
----
- libcni/api.go | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/libcni/api.go b/libcni/api.go
-index 5a43219..5d71426 100644
---- a/libcni/api.go
-+++ b/libcni/api.go
-@@ -38,7 +38,7 @@ import (
- )
-
- var (
-- CacheDir = "/var/lib/cni"
-+ CacheDir = "/ephemeral/containerd/cni-cache"
- // slightly awkward wording to preserve anyone matching on error strings
- ErrorCheckNotSupp = fmt.Errorf("does not support the CHECK command")
- )
---
-2.44.1
-
diff --git a/third_party/com_github_containernetworking_plugins/BUILD.bazel b/third_party/com_github_containernetworking_plugins/BUILD.bazel
deleted file mode 100644
index e69de29..0000000
--- a/third_party/com_github_containernetworking_plugins/BUILD.bazel
+++ /dev/null
diff --git a/third_party/com_github_containernetworking_plugins/cniplugins-add-linkgroup.patch b/third_party/com_github_containernetworking_plugins/cniplugins-add-linkgroup.patch
deleted file mode 100644
index ec83ddd..0000000
--- a/third_party/com_github_containernetworking_plugins/cniplugins-add-linkgroup.patch
+++ /dev/null
@@ -1,55 +0,0 @@
-From a2c65ec075a9376e3b8e9fb72a96db36a613a1d0 Mon Sep 17 00:00:00 2001
-From: Lorenz Brun <lorenz@brun.one>
-Date: Thu, 2 Jan 2025 00:36:05 +0100
-Subject: [PATCH] Add linkGroup setting
-
----
- plugins/main/ptp/ptp.go | 11 +++++++++--
- 1 file changed, 9 insertions(+), 2 deletions(-)
-
-diff --git a/plugins/main/ptp/ptp.go b/plugins/main/ptp/ptp.go
-index 9c88d901..e0b283c5 100644
---- a/plugins/main/ptp/ptp.go
-+++ b/plugins/main/ptp/ptp.go
-@@ -47,6 +47,7 @@ type NetConf struct {
- IPMasq bool `json:"ipMasq"`
- IPMasqBackend *string `json:"ipMasqBackend,omitempty"`
- MTU int `json:"mtu"`
-+ LinkGroup int `json:"linkGroup,omitempty"`
- }
-
- func setupContainerVeth(netns ns.NetNS, ifName string, mtu int, pr *current.Result) (*current.Interface, *current.Interface, error) {
-@@ -146,7 +147,7 @@ func setupContainerVeth(netns ns.NetNS, ifName string, mtu int, pr *current.Resu
- return hostInterface, containerInterface, nil
- }
-
--func setupHostVeth(vethName string, result *current.Result) error {
-+func setupHostVeth(vethName string, group int, result *current.Result) error {
- // hostVeth moved namespaces and may have a new ifindex
- veth, err := netlinksafe.LinkByName(vethName)
- if err != nil {
-@@ -178,6 +179,12 @@ func setupHostVeth(vethName string, result *current.Result) error {
- }
- }
-
-+ if group != 0 {
-+ if err := netlink.LinkSetGroup(veth, group); err != nil {
-+ return fmt.Errorf("failed to set link group for if %q: %v", vethName, err)
-+ }
-+ }
-+
- return nil
- }
-
-@@ -225,7 +232,7 @@ func cmdAdd(args *skel.CmdArgs) error {
- return err
- }
-
-- if err = setupHostVeth(hostInterface.Name, result); err != nil {
-+ if err = setupHostVeth(hostInterface.Name, conf.LinkGroup, result); err != nil {
- return err
- }
-
---
-2.47.1
-