Add service proxy
This adds a service proxy based on nfproxy and changes to the service IP allocation to make it work.
Also adds support for masquerading outbound traffic for outbound network connectivity.
Test Plan:
Currently manually tested by creating an alpine pod and running 'apk add curl && curl -k https://192.168.188.1:443/'.
Will be covered later by CTS.
Bug: T810
X-Origin-Diff: phab/D580
GitOrigin-RevId: cace863fd8c2f045560f8abf84c40cc77bc275d4
diff --git a/build/fietsje/main.go b/build/fietsje/main.go
index f227d9b..c62d6c7 100644
--- a/build/fietsje/main.go
+++ b/build/fietsje/main.go
@@ -110,6 +110,13 @@
"github.com/mdlayher/genetlink",
)
+ p.collect(
+ "github.com/sbezverk/nfproxy", "7fac5f39824e7f34228b08ba8b7640770ca6a9f4",
+ patches("nfproxy.patch"),
+ ).use(
+ "github.com/sbezverk/nftableslib",
+ )
+
// First generate the repositories starlark rule into memory. This is because rendering will lock all unlocked
// dependencies, which might take a while. If a use were to interrupt it now, they would end up with an incomplete
// repositories.bzl and would have to restore from git.
diff --git a/core/internal/kubernetes/BUILD.bazel b/core/internal/kubernetes/BUILD.bazel
index 3bcbe6a..6b5d652 100644
--- a/core/internal/kubernetes/BUILD.bazel
+++ b/core/internal/kubernetes/BUILD.bazel
@@ -17,6 +17,7 @@
"//core/internal/common:go_default_library",
"//core/internal/common/supervisor:go_default_library",
"//core/internal/kubernetes/clusternet:go_default_library",
+ "//core/internal/kubernetes/nfproxy:go_default_library",
"//core/internal/kubernetes/pki:go_default_library",
"//core/internal/kubernetes/reconciler:go_default_library",
"//core/internal/localstorage:go_default_library",
diff --git a/core/internal/kubernetes/clusternet/clusternet.go b/core/internal/kubernetes/clusternet/clusternet.go
index 5c42bb8..e41ba8a 100644
--- a/core/internal/kubernetes/clusternet/clusternet.go
+++ b/core/internal/kubernetes/clusternet/clusternet.go
@@ -108,6 +108,7 @@
}
allowedIPs = append(allowedIPs, *podNet)
}
+ allowedIPs = append(allowedIPs, net.IPNet{IP: internalIP, Mask: net.CIDRMask(32, 32)})
s.logger.Debug("Adding/Updating WireGuard peer node", zap.String("node", newNode.Name),
zap.String("endpointIP", internalIP.String()), zap.Any("allowedIPs", allowedIPs))
// WireGuard's kernel side has create/update semantics on peers by default. So we can just add the peer multiple
diff --git a/core/internal/kubernetes/nfproxy/BUILD.bazel b/core/internal/kubernetes/nfproxy/BUILD.bazel
new file mode 100644
index 0000000..4bc7ab7
--- /dev/null
+++ b/core/internal/kubernetes/nfproxy/BUILD.bazel
@@ -0,0 +1,22 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+ name = "go_default_library",
+ srcs = ["nfproxy.go"],
+ importpath = "git.monogon.dev/source/nexantic.git/core/internal/kubernetes/nfproxy",
+ visibility = ["//core:__subpackages__"],
+ deps = [
+ "//core/internal/common/supervisor:go_default_library",
+ "@com_github_sbezverk_nfproxy//pkg/controller:go_default_library",
+ "@com_github_sbezverk_nfproxy//pkg/nftables:go_default_library",
+ "@com_github_sbezverk_nfproxy//pkg/proxy:go_default_library",
+ "@io_k8s_api//core/v1:go_default_library",
+ "@io_k8s_apimachinery//pkg/apis/meta/v1:go_default_library",
+ "@io_k8s_apimachinery//pkg/labels:go_default_library",
+ "@io_k8s_apimachinery//pkg/selection:go_default_library",
+ "@io_k8s_client_go//informers:go_default_library",
+ "@io_k8s_client_go//kubernetes:go_default_library",
+ "@io_k8s_client_go//kubernetes/scheme:go_default_library",
+ "@io_k8s_client_go//tools/record:go_default_library",
+ ],
+)
diff --git a/core/internal/kubernetes/nfproxy/nfproxy.go b/core/internal/kubernetes/nfproxy/nfproxy.go
new file mode 100644
index 0000000..25962bf
--- /dev/null
+++ b/core/internal/kubernetes/nfproxy/nfproxy.go
@@ -0,0 +1,104 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package nfproxy is a Kubernetes Service IP proxy based exclusively on the Linux nftables interface.
+// It uses netfilter's NAT capabilities to accept traffic on service IPs and DNAT it to the respective endpoint.
+package nfproxy
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "net"
+ "os"
+ "time"
+
+ "git.monogon.dev/source/nexantic.git/core/internal/common/supervisor"
+
+ "github.com/sbezverk/nfproxy/pkg/controller"
+ "github.com/sbezverk/nfproxy/pkg/nftables"
+ "github.com/sbezverk/nfproxy/pkg/proxy"
+ v1 "k8s.io/api/core/v1"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/labels"
+ "k8s.io/apimachinery/pkg/selection"
+ kubeinformers "k8s.io/client-go/informers"
+ "k8s.io/client-go/kubernetes"
+ "k8s.io/client-go/kubernetes/scheme"
+ "k8s.io/client-go/tools/record"
+)
+
+type Service struct {
+ // Traffic in ClusterCIDR is assumed to be originated inside the cluster and will not be SNATed
+ ClusterCIDR net.IPNet
+ // A Kubernetes ClientSet with read access to endpoints and services
+ ClientSet kubernetes.Interface
+}
+
+func (s *Service) Run(ctx context.Context) error {
+ var ipv4ClusterCIDR string
+ var ipv6ClusterCIDR string
+ if s.ClusterCIDR.IP.To4() == nil && s.ClusterCIDR.IP.To16() != nil {
+ ipv6ClusterCIDR = s.ClusterCIDR.String()
+ } else if s.ClusterCIDR.IP.To4() != nil {
+ ipv4ClusterCIDR = s.ClusterCIDR.String()
+ } else {
+ return errors.New("invalid ClusterCIDR")
+ }
+ nfti, err := nftables.InitNFTables(ipv4ClusterCIDR, ipv6ClusterCIDR)
+ if err != nil {
+ return fmt.Errorf("failed to initialize nftables with error: %w", err)
+ }
+
+ // Create event recorder to report events into K8s
+ hostname, err := os.Hostname()
+ if err != nil {
+ return fmt.Errorf("failed to get local host name with error: %w", err)
+ }
+ eventBroadcaster := record.NewBroadcaster()
+ recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "nfproxy", Host: hostname})
+
+ // Create new proxy controller with endpoint slices enabled
+ // https://kubernetes.io/docs/concepts/services-networking/endpoint-slices/
+ nfproxy := proxy.NewProxy(nfti, hostname, recorder, true)
+
+ // Create special informer which doesn't track headless services
+ noHeadlessEndpoints, err := labels.NewRequirement(v1.IsHeadlessService, selection.DoesNotExist, nil)
+ if err != nil {
+ return fmt.Errorf("failed to create Requirement for noHeadlessEndpoints: %w", err)
+ }
+ labelSelector := labels.NewSelector()
+ labelSelector = labelSelector.Add(*noHeadlessEndpoints)
+
+ kubeInformerFactory := kubeinformers.NewSharedInformerFactoryWithOptions(s.ClientSet, time.Minute*5,
+ kubeinformers.WithTweakListOptions(func(options *metav1.ListOptions) {
+ options.LabelSelector = labelSelector.String()
+ }))
+
+ svcController := controller.NewServiceController(nfproxy, s.ClientSet, kubeInformerFactory.Core().V1().Services())
+ ep := controller.NewEndpointSliceController(nfproxy, s.ClientSet, kubeInformerFactory.Discovery().V1beta1().EndpointSlices())
+ kubeInformerFactory.Start(ctx.Done())
+
+ if err = svcController.Start(ctx.Done()); err != nil {
+ return fmt.Errorf("error running Service controller: %w", err)
+ }
+ if err = ep.Start(ctx.Done()); err != nil {
+ return fmt.Errorf("error running endpoint controller: %w", err)
+ }
+ supervisor.Signal(ctx, supervisor.SignalHealthy)
+ supervisor.Signal(ctx, supervisor.SignalDone)
+ return nil
+}
diff --git a/core/internal/kubernetes/pki/kubernetes.go b/core/internal/kubernetes/pki/kubernetes.go
index 48ce6e9..0de8f6d 100644
--- a/core/internal/kubernetes/pki/kubernetes.go
+++ b/core/internal/kubernetes/pki/kubernetes.go
@@ -103,7 +103,7 @@
"kubernetes.default.svc.cluster.local",
"localhost",
},
- []net.IP{{127, 0, 0, 1}}, // TODO(q3k): add service network internal apiserver address
+ []net.IP{{10, 0, 255, 1}, {127, 0, 0, 1}}, // TODO(q3k): add service network internal apiserver address
))
make(IdCA, KubeletClient, Client("smalltown:apiserver-kubelet-client", nil))
make(IdCA, ControllerManagerClient, Client("system:kube-controller-manager", nil))
diff --git a/core/internal/kubernetes/service.go b/core/internal/kubernetes/service.go
index 2396066..a22b6b9 100644
--- a/core/internal/kubernetes/service.go
+++ b/core/internal/kubernetes/service.go
@@ -33,6 +33,7 @@
"git.monogon.dev/source/nexantic.git/core/internal/common/supervisor"
"git.monogon.dev/source/nexantic.git/core/internal/kubernetes/clusternet"
+ "git.monogon.dev/source/nexantic.git/core/internal/kubernetes/nfproxy"
"git.monogon.dev/source/nexantic.git/core/internal/kubernetes/pki"
"git.monogon.dev/source/nexantic.git/core/internal/kubernetes/reconciler"
"git.monogon.dev/source/nexantic.git/core/internal/localstorage"
@@ -156,6 +157,11 @@
DataDirectory: &s.c.Root.Data.Kubernetes.ClusterNetworking,
}
+ nfproxy := nfproxy.Service{
+ ClusterCIDR: s.c.ClusterNet,
+ ClientSet: clientSet,
+ }
+
for _, sub := range []struct {
name string
runnable supervisor.Runnable
@@ -168,6 +174,7 @@
{"csi-plugin", csiPlugin.Run},
{"csi-provisioner", csiProvisioner.Run},
{"clusternet", clusternet.Run},
+ {"nfproxy", nfproxy.Run},
} {
err := supervisor.Run(ctx, sub.name, sub.runnable)
if err != nil {
diff --git a/core/internal/network/BUILD.bazel b/core/internal/network/BUILD.bazel
index 9eefc1b..ad7de74 100644
--- a/core/internal/network/BUILD.bazel
+++ b/core/internal/network/BUILD.bazel
@@ -8,6 +8,8 @@
deps = [
"//core/internal/common/supervisor:go_default_library",
"//core/internal/network/dhcp:go_default_library",
+ "@com_github_google_nftables//:go_default_library",
+ "@com_github_google_nftables//expr:go_default_library",
"@com_github_vishvananda_netlink//:go_default_library",
"@org_golang_x_sys//unix:go_default_library",
"@org_uber_go_zap//:go_default_library",
diff --git a/core/internal/network/main.go b/core/internal/network/main.go
index ac9ce46..c92b21a 100644
--- a/core/internal/network/main.go
+++ b/core/internal/network/main.go
@@ -23,6 +23,9 @@
"net"
"os"
+ "github.com/google/nftables"
+ "github.com/google/nftables/expr"
+
"github.com/vishvananda/netlink"
"go.uber.org/zap"
"golang.org/x/sys/unix"
@@ -97,6 +100,13 @@
return nil
}
+// nfifname converts an interface name into 16 bytes padded with zeroes (for nftables)
+func nfifname(n string) []byte {
+ b := make([]byte, 16)
+ copy(b, []byte(n+"\x00"))
+ return b
+}
+
func (s *Service) useInterface(ctx context.Context, iface netlink.Link) error {
err := supervisor.Run(ctx, "dhcp", s.dhcp.Run(iface))
if err != nil {
@@ -115,6 +125,40 @@
s.logger.Warn("failed to add routes", zap.Error(err))
}
+ c := nftables.Conn{}
+
+ nat := c.AddTable(&nftables.Table{
+ Family: nftables.TableFamilyIPv4,
+ Name: "nat",
+ })
+
+ postrouting := c.AddChain(&nftables.Chain{
+ Name: "postrouting",
+ Hooknum: nftables.ChainHookPostrouting,
+ Priority: nftables.ChainPriorityNATSource,
+ Table: nat,
+ Type: nftables.ChainTypeNAT,
+ })
+
+ // Masquerade/SNAT all traffic going out of the external interface
+ c.AddRule(&nftables.Rule{
+ Table: nat,
+ Chain: postrouting,
+ Exprs: []expr.Any{
+ &expr.Meta{Key: expr.MetaKeyOIFNAME, Register: 1},
+ &expr.Cmp{
+ Op: expr.CmpOpEq,
+ Register: 1,
+ Data: nfifname(iface.Attrs().Name),
+ },
+ &expr.Masq{},
+ },
+ })
+
+ if err := c.Flush(); err != nil {
+ panic(err)
+ }
+
return nil
}
diff --git a/nogo_config.json b/nogo_config.json
index f3f47ff..19e74f5 100644
--- a/nogo_config.json
+++ b/nogo_config.json
@@ -76,7 +76,8 @@
"exclude_files": {
"external/io_k8s_kubernetes/": "third_party",
"external/runc/vendor/github.com/vishvananda/netlink": "third_party",
- "external/com_github_google_gvisor/": "third_party"
+ "external/com_github_google_gvisor/": "third_party",
+ "external/com_github_sbezverk_nfproxy/": "third_party"
}
},
"structtag": {
diff --git a/third_party/go/patches/nfproxy.patch b/third_party/go/patches/nfproxy.patch
new file mode 100644
index 0000000..307d3d4
--- /dev/null
+++ b/third_party/go/patches/nfproxy.patch
@@ -0,0 +1,36 @@
+Copyright 2020 The Monogon Project Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+
+Fix nfproxy compatibility with our version of Kubernetes/utilproxy
+--- com_github_sbezverk_nfproxy.orig/pkg/proxy/proxy_service.go 2020-07-16 14:24:06.901176302 +0200
++++ com_github_sbezverk_nfproxy/pkg/proxy/proxy_service.go 2020-07-16 14:08:34.118927035 +0200
+@@ -22,7 +22,6 @@
+ utilnftables "github.com/google/nftables"
+ "github.com/sbezverk/nfproxy/pkg/nftables"
+ v1 "k8s.io/api/core/v1"
+- "k8s.io/apimachinery/pkg/types"
+ "k8s.io/klog"
+ utilproxy "k8s.io/kubernetes/pkg/proxy/util"
+ utilnet "k8s.io/utils/net"
+@@ -44,8 +43,7 @@
+ stickySeconds := int(*svc.Spec.SessionAffinityConfig.ClientIP.TimeoutSeconds)
+ klog.V(5).Infof("Service %s/%s has SessionAffinity set for %d seconds", svc.Namespace, svc.Name, stickySeconds)
+ }
+- svcName := types.NamespacedName{Namespace: svc.Namespace, Name: svc.Name}
+- if utilproxy.ShouldSkipService(svcName, svc) {
++ if utilproxy.ShouldSkipService(svc) {
+ return
+ }
+ for i := range svc.Spec.Ports {
diff --git a/third_party/go/repositories.bzl b/third_party/go/repositories.bzl
index c4e21c6..042aadb 100644
--- a/third_party/go/repositories.bzl
+++ b/third_party/go/repositories.bzl
@@ -1258,6 +1258,22 @@
sum = "h1:0U2s5loxrTy6/VgfVoLuVLFJcURKLH49ie0zSch7gh4=",
)
go_repository(
+ name = "com_github_sbezverk_nfproxy",
+ importpath = "github.com/sbezverk/nfproxy",
+ version = "v0.0.0-20200514180651-7fac5f39824e",
+ sum = "h1:fJ2lHQ7ZUjmgJbvVQ509ioBmrGHcbvlwfjUieExw/dU=",
+ patches = [
+ "//third_party/go/patches:nfproxy.patch",
+ ],
+ patch_args = ["-p1"],
+ )
+ go_repository(
+ name = "com_github_sbezverk_nftableslib",
+ importpath = "github.com/sbezverk/nftableslib",
+ version = "v0.0.0-20200402150358-c20bed91f482",
+ sum = "h1:k7gEZ/EwJhHDTRXFUZQlE4/p1cmoha7zL7PWCDG3ZHQ=",
+ )
+ go_repository(
name = "com_github_seccomp_libseccomp_golang",
importpath = "github.com/seccomp/libseccomp-golang",
version = "v0.9.1",
diff --git a/third_party/go/shelf.pb.text b/third_party/go/shelf.pb.text
index 85372be..9d77503 100644
--- a/third_party/go/shelf.pb.text
+++ b/third_party/go/shelf.pb.text
@@ -1959,6 +1959,20 @@
semver: "v0.2.1-0.20190427202633-1595213edefa"
>
entry: <
+ import_path: "github.com/sbezverk/nfproxy"
+ version: "7fac5f39824e7f34228b08ba8b7640770ca6a9f4"
+ bazel_name: "com_github_sbezverk_nfproxy"
+ sum: "h1:fJ2lHQ7ZUjmgJbvVQ509ioBmrGHcbvlwfjUieExw/dU="
+ semver: "v0.0.0-20200514180651-7fac5f39824e"
+>
+entry: <
+ import_path: "github.com/sbezverk/nftableslib"
+ version: "v0.0.0-20200402150358-c20bed91f482"
+ bazel_name: "com_github_sbezverk_nftableslib"
+ sum: "h1:k7gEZ/EwJhHDTRXFUZQlE4/p1cmoha7zL7PWCDG3ZHQ="
+ semver: "v0.0.0-20200402150358-c20bed91f482"
+>
+entry: <
import_path: "github.com/seccomp/libseccomp-golang"
version: "689e3c1541a84461afc49c1c87352a6cedf72e9c"
bazel_name: "com_github_seccomp_libseccomp_golang"
diff --git a/third_party/linux/linux-smalltown.config b/third_party/linux/linux-smalltown.config
index b4f2df1..e409a05 100644
--- a/third_party/linux/linux-smalltown.config
+++ b/third_party/linux/linux-smalltown.config
@@ -974,6 +974,8 @@
CONFIG_NFT_REJECT=y
CONFIG_NFT_REJECT_INET=y
CONFIG_NFT_HASH=y
+CONFIG_NFT_FIB=y
+# CONFIG_NFT_FIB_INET is not set
CONFIG_NFT_SOCKET=y
CONFIG_NFT_OSF=y
CONFIG_NFT_TPROXY=y
@@ -981,6 +983,7 @@
CONFIG_NF_DUP_NETDEV=y
CONFIG_NFT_DUP_NETDEV=y
CONFIG_NFT_FWD_NETDEV=y
+# CONFIG_NFT_FIB_NETDEV is not set
CONFIG_NF_FLOW_TABLE_INET=y
CONFIG_NF_FLOW_TABLE=y
# CONFIG_NETFILTER_XTABLES is not set
@@ -997,11 +1000,11 @@
CONFIG_NF_TPROXY_IPV4=y
CONFIG_NF_TABLES_IPV4=y
CONFIG_NFT_REJECT_IPV4=y
-# CONFIG_NFT_DUP_IPV4 is not set
-# CONFIG_NFT_FIB_IPV4 is not set
+CONFIG_NFT_DUP_IPV4=y
+CONFIG_NFT_FIB_IPV4=y
# CONFIG_NF_TABLES_ARP is not set
# CONFIG_NF_FLOW_TABLE_IPV4 is not set
-# CONFIG_NF_DUP_IPV4 is not set
+CONFIG_NF_DUP_IPV4=y
# CONFIG_NF_LOG_ARP is not set
# CONFIG_NF_LOG_IPV4 is not set
CONFIG_NF_REJECT_IPV4=y
@@ -1016,10 +1019,10 @@
CONFIG_NF_TPROXY_IPV6=y
CONFIG_NF_TABLES_IPV6=y
CONFIG_NFT_REJECT_IPV6=y
-# CONFIG_NFT_DUP_IPV6 is not set
-# CONFIG_NFT_FIB_IPV6 is not set
+CONFIG_NFT_DUP_IPV6=y
+CONFIG_NFT_FIB_IPV6=y
# CONFIG_NF_FLOW_TABLE_IPV6 is not set
-# CONFIG_NF_DUP_IPV6 is not set
+CONFIG_NF_DUP_IPV6=y
CONFIG_NF_REJECT_IPV6=y
# CONFIG_NF_LOG_IPV6 is not set
# CONFIG_IP6_NF_IPTABLES is not set