m/node: clean up DNS service
The primary change in here is that CoreDNS now only listens on the
loopback interface by default.
This fixes #217 as it cannot be accessed from the outside anymore.
Since the containers do not share the host network namespace, they can
now no longer access the DNS service. This is solved by introducing a
new Network Service API to add listener IPs and using a link-local IP,
169.254.77.53 for the container DNS.
While at it, I cleaned up various parts of the DNS code.
Change-Id: Id7b618f62690032db335e8478b9de84410c210a1
Reviewed-on: https://review.monogon.dev/c/monogon/+/1759
Tested-by: Jenkins CI
Reviewed-by: Serge Bazanski <serge@monogon.tech>
diff --git a/metropolis/node/core/main.go b/metropolis/node/core/main.go
index 13d735f..dc50269 100644
--- a/metropolis/node/core/main.go
+++ b/metropolis/node/core/main.go
@@ -20,12 +20,14 @@
"context"
"fmt"
"io"
+ "net"
"os"
"strings"
"time"
"golang.org/x/sys/unix"
+ "source.monogon.dev/metropolis/node"
"source.monogon.dev/metropolis/node/core/cluster"
"source.monogon.dev/metropolis/node/core/localstorage"
"source.monogon.dev/metropolis/node/core/localstorage/declarative"
@@ -115,6 +117,7 @@
networkSvc := network.New(nil)
networkSvc.DHCPVendorClassID = "dev.monogon.metropolis.node.v1"
+ networkSvc.ExtraDNSListenerIPs = []net.IP{node.ContainerDNSIP}
timeSvc := timesvc.New()
// This function initializes a headless Delve if this is a debug build or
diff --git a/metropolis/node/core/network/dns/coredns.go b/metropolis/node/core/network/dns/coredns.go
index f865403..c539f44 100644
--- a/metropolis/node/core/network/dns/coredns.go
+++ b/metropolis/node/core/network/dns/coredns.go
@@ -21,6 +21,7 @@
"bytes"
"context"
"fmt"
+ "net"
"os"
"os/exec"
"strings"
@@ -45,6 +46,8 @@
`
type Service struct {
+ // Extra IPs the DNS service listens on and serves requests from.
+ ExtraListenerIPs []net.IP
directiveRegistration chan *ExtraDirective
directives map[string]ExtraDirective
cmd *exec.Cmd
@@ -54,7 +57,8 @@
stateMu sync.Mutex
}
-// New creates a new CoreDNS service.
+// New creates a new CoreDNS service. By default it only listens on the loopback
+// IPs ::1/127.0.0.1, but extra listener IPs can be added in extraListenerIPs.
// The given channel can then be used to dynamically register and unregister
// directives in the configuaration.
// To register a new directive, send an ExtraDirective on the channel. To
@@ -70,6 +74,11 @@
func (s *Service) makeCorefile(fargs *fileargs.FileArgs) []byte {
corefile := bytes.Buffer{}
corefile.WriteString(corefileBase)
+ bindIPs := []string{"127.0.0.1", "::1"}
+ for _, ip := range s.ExtraListenerIPs {
+ bindIPs = append(bindIPs, ip.String())
+ }
+ fmt.Fprintf(&corefile, "\tbind %s\n", strings.Join(bindIPs, " "))
for _, dir := range s.directives {
resolvedDir := dir.directive
for fname, fcontent := range dir.files {
@@ -92,6 +101,16 @@
// Run runs the DNS service consisting of the CoreDNS process and the directive
// registration process
func (s *Service) Run(ctx context.Context) error {
+ s.stateMu.Lock()
+ if s.args == nil {
+ args, err := fileargs.New()
+ if err != nil {
+ s.stateMu.Unlock()
+ return fmt.Errorf("failed to create fileargs: %w", err)
+ }
+ s.args = args
+ }
+ s.stateMu.Unlock()
supervisor.Run(ctx, "coredns", s.runCoreDNS)
supervisor.Run(ctx, "registration", s.runRegistration)
supervisor.Signal(ctx, supervisor.SignalHealthy)
@@ -102,21 +121,13 @@
// runCoreDNS runs the CoreDNS proceess
func (s *Service) runCoreDNS(ctx context.Context) error {
s.stateMu.Lock()
- args, err := fileargs.New()
- if err != nil {
- s.stateMu.Unlock()
- return fmt.Errorf("failed to create fileargs: %w", err)
- }
- defer args.Close()
- s.args = args
-
s.cmd = exec.CommandContext(ctx, "/kubernetes/bin/coredns",
- args.FileOpt("-conf", "Corefile", s.makeCorefile(args)),
+ s.args.FileOpt("-conf", "Corefile", s.makeCorefile(s.args)),
)
- if args.Error() != nil {
+ if s.args.Error() != nil {
s.stateMu.Unlock()
- return fmt.Errorf("failed to use fileargs: %w", err)
+ return fmt.Errorf("failed to use fileargs: %w", s.args.Error())
}
s.stateMu.Unlock()
diff --git a/metropolis/node/core/network/dns/directives.go b/metropolis/node/core/network/dns/directives.go
index ff25ae8..a9570e8 100644
--- a/metropolis/node/core/network/dns/directives.go
+++ b/metropolis/node/core/network/dns/directives.go
@@ -66,11 +66,13 @@
// Service Discovery" compliant service under clusterDomain. The given
// kubeconfig needs at least read access to services, endpoints and
// endpointslices.
-// [1] https://github.com/kubernetes/dns/blob/master/docs/specification.md
+//
+// [1] https://github.com/kubernetes/dns/blob/master/docs/specification.md
func NewKubernetesDirective(clusterDomain string, kubeconfig []byte) *ExtraDirective {
+ var prefix string
return &ExtraDirective{
ID: "k8s-clusterdns",
- directive: fmt.Sprintf(kubernetesDirective, clusterDomain),
+ directive: prefix + fmt.Sprintf(kubernetesDirective, clusterDomain),
files: map[string][]byte{
"kubeconfig": kubeconfig,
},
diff --git a/metropolis/node/core/network/main.go b/metropolis/node/core/network/main.go
index 466c076..956bb02 100644
--- a/metropolis/node/core/network/main.go
+++ b/metropolis/node/core/network/main.go
@@ -49,6 +49,13 @@
// autoconfiguration.
StaticConfig *netpb.Net
+ // List of IPs which get configured onto the loopback interface and the
+ // integrated DNS server is serving on. Cannot be changed at runtime.
+ // This is a hack to work around CoreDNS not being able to change listeners
+ // on-the-fly without breaking everything. This will go away once its
+ // frontend got replaced by something which can do that.
+ ExtraDNSListenerIPs []net.IP
+
// Vendor Class identifier of the system
DHCPVendorClassID string
@@ -111,6 +118,30 @@
s.dnsReg <- d
}
+func singleIPtoNetlinkAddr(ip net.IP, label string) *netlink.Addr {
+ var mask net.IPMask
+ if ip.To4() == nil {
+ mask = net.CIDRMask(128, 128) // IPv6 /128
+ } else {
+ mask = net.CIDRMask(32, 32) // IPv4 /32
+ }
+ scope := netlink.SCOPE_UNIVERSE
+ if ip.IsLinkLocalUnicast() {
+ scope = netlink.SCOPE_LINK
+ }
+ if ip.IsLoopback() {
+ scope = netlink.SCOPE_HOST
+ }
+ return &netlink.Addr{
+ IPNet: &net.IPNet{
+ IP: ip,
+ Mask: mask,
+ },
+ Label: label,
+ Scope: int(scope),
+ }
+}
+
// nfifname converts an interface name into 16 bytes padded with zeroes (for
// nftables)
func nfifname(n string) []byte {
@@ -209,6 +240,7 @@
func (s *Service) Run(ctx context.Context) error {
logger := supervisor.Logger(ctx)
+ s.dnsSvc.ExtraListenerIPs = s.ExtraDNSListenerIPs
supervisor.Run(ctx, "dns", s.dnsSvc.Run)
earlySysctlOpts := sysctlOptions{
@@ -297,6 +329,11 @@
if err := netlink.LinkSetUp(link); err != nil {
logger.Errorf("Failed to bring up loopback interface: %v", err)
}
+ for _, addr := range s.ExtraDNSListenerIPs {
+ if err := netlink.AddrAdd(link, singleIPtoNetlinkAddr(addr, "")); err != nil {
+ logger.Errorf("Failed to assign extra loopback IP: %v", err)
+ }
+ }
}
}
if len(ethernetLinks) != 1 {
diff --git a/metropolis/node/core/network/static.go b/metropolis/node/core/network/static.go
index 4d04ff3..a9a5176 100644
--- a/metropolis/node/core/network/static.go
+++ b/metropolis/node/core/network/static.go
@@ -36,10 +36,21 @@
return err
}
- hostDevices, err := listHostDeviceIfaces()
+ hostDevices, loopbackLink, err := listHostDeviceIfaces()
if err != nil {
return err
}
+ if loopbackLink == nil {
+ return errors.New("No loopback interface present, weird/broken kernel?")
+ }
+ if err := netlink.LinkSetUp(loopbackLink); err != nil {
+ l.Error("Failed to enable loopback interface: %w", err)
+ }
+ for _, addr := range s.ExtraDNSListenerIPs {
+ if err := netlink.AddrAdd(loopbackLink, singleIPtoNetlinkAddr(addr, "")); err != nil {
+ l.Errorf("Failed to assign extra loopback IP: %v", err)
+ }
+ }
var hasIPv4Autoconfig bool
@@ -222,17 +233,23 @@
driver string
}
-func listHostDeviceIfaces() ([]deviceIfData, error) {
+func listHostDeviceIfaces() ([]deviceIfData, netlink.Link, error) {
links, err := netlink.LinkList()
if err != nil {
- return nil, fmt.Errorf("failed to list network links: %w", err)
+ return nil, nil, fmt.Errorf("failed to list network links: %w", err)
}
var hostDevices []deviceIfData
+ var loopbackLink netlink.Link
+
for _, link := range links {
- if link.Attrs().EncapType == "loopback" {
- netlink.LinkSetUp(link)
+ // Modern Linux versions always create a loopback device named "lo" with
+ // constant interface index 1 in every network namespace. Since Linux
+ // 3.6 there is a BUG_ON in the loopback driver, asserting that this is
+ // true for every loopback interface created.
+ if link.Attrs().Index == 1 {
+ loopbackLink = link
}
d, ok := link.(*netlink.Device)
if !ok {
@@ -248,7 +265,7 @@
driver: driver,
})
}
- return hostDevices, nil
+ return hostDevices, loopbackLink, nil
}
func deviceIfaceFromSpec(it *netpb.Interface_Device, hostDevices []deviceIfData, l logtree.LeveledLogger) (*netlink.Device, error) {