blob: 000ea1f899817cbbe8a05f330cb1306d64ed8aea [file] [log] [blame]
Tim Windelschmidt6d33a432025-02-04 14:34:25 +01001// Copyright The Monogon Project Authors.
2// SPDX-License-Identifier: Apache-2.0
3
Jan Schära48bd3c2024-07-29 17:22:18 +02004package kubernetes
5
6import (
7 "math/rand/v2"
8 "net"
9 "net/netip"
10
11 "github.com/miekg/dns"
12
13 netDNS "source.monogon.dev/osbase/net/dns"
14 "source.monogon.dev/osbase/net/dns/kubernetes/object"
15)
16
17const (
18 // DNSSchemaVersion is the schema version: https://github.com/kubernetes/dns/blob/master/docs/specification.md
19 DNSSchemaVersion = "1.1.0"
20 // defaultTTL to apply to all answers.
21 defaultTTL = 5
22)
23
24func (k *Kubernetes) HandleDNS(r *netDNS.Request) {
25 if netDNS.IsSubDomain(k.clusterDomain, r.QnameCanonical) {
26 r.SetAuthoritative()
27
28 subdomain := r.QnameCanonical[:len(r.QnameCanonical)-len(k.clusterDomain)]
29 subdomain, last := netDNS.SplitLastLabel(subdomain)
30 if last == "svc" {
31 k.handleService(r, subdomain)
32 } else if last == "" {
33 if r.Qtype == dns.TypeSOA || r.Qtype == dns.TypeANY {
34 r.Reply.Answer = append(r.Reply.Answer, k.makeSOA(r.Qname))
35 }
36 if r.Qtype == dns.TypeNS || r.Qtype == dns.TypeANY {
37 r.Reply.Answer = append(r.Reply.Answer, k.makeNS(r.Qname))
38 }
39 } else if last == "dns-version" && subdomain == "" {
40 if r.Qtype == dns.TypeTXT || r.Qtype == dns.TypeANY {
41 rr := new(dns.TXT)
42 rr.Hdr = dns.RR_Header{Name: r.Qname, Rrtype: dns.TypeTXT, Class: dns.ClassINET, Ttl: defaultTTL}
43 rr.Txt = []string{DNSSchemaVersion}
44 r.Reply.Answer = append(r.Reply.Answer, rr)
45 }
46 } else if last == "dns" && (subdomain == "" || subdomain == "ns.") {
47 // Name exists but has no records.
48 } else {
49 r.Reply.Rcode = dns.RcodeNameError
50 }
51
52 if r.Handled {
53 return
54 }
55 if len(r.Reply.Answer) == 0 {
56 zone := r.Qname[len(r.Qname)-len(k.clusterDomain):]
57 r.Reply.Ns = []dns.RR{k.makeSOA(zone)}
58 }
59 r.SendReply()
60 return
61 }
62
63 reverseIP, reverseBits, extra := netDNS.ParseReverse(r.QnameCanonical)
64 if reverseIP.IsValid() {
65 for _, ipRange := range k.ipRanges {
66 if !ipRange.Contains(reverseIP) || reverseBits < ipRange.Bits() {
67 continue
68 }
69
70 r.SetAuthoritative()
71
72 zoneBits := 0
73 if reverseIP.BitLen() == 32 {
74 zoneBits = (ipRange.Bits() + 7) & ^7
75 } else {
76 zoneBits = (ipRange.Bits() + 3) & ^3
77 }
78
79 if extra {
80 // Name with extra labels does not exist.
81 r.Reply.Rcode = dns.RcodeNameError
82 } else {
83 if reverseBits == reverseIP.BitLen() {
84 k.handleReverse(r, reverseIP)
85 }
86 if reverseBits == zoneBits {
87 if r.Qtype == dns.TypeSOA || r.Qtype == dns.TypeANY {
88 r.Reply.Answer = append(r.Reply.Answer, k.makeSOA(r.Qname))
89 }
90 if r.Qtype == dns.TypeNS || r.Qtype == dns.TypeANY {
91 r.Reply.Answer = append(r.Reply.Answer, k.makeNS(r.Qname))
92 }
93 }
94 }
95
96 if len(r.Reply.Answer) == 0 {
97 zoneDots := 0
98 if reverseIP.BitLen() == 32 {
99 zoneDots = 3 + zoneBits/8
100 } else {
101 zoneDots = 3 + zoneBits/4
102 }
103 zoneStart := len(r.Qname)
104 for zoneStart > 0 {
105 if r.Qname[zoneStart-1] == '.' {
106 zoneDots--
107 if zoneDots == 0 {
108 break
109 }
110 }
111 zoneStart--
112 }
113 zone := r.Qname[zoneStart:]
114 r.Reply.Ns = []dns.RR{k.makeSOA(zone)}
115 }
116 r.SendReply()
117 return
118 }
119 }
120}
121
122func (k *Kubernetes) handleService(r *netDNS.Request, subdomain string) {
123 if subdomain == "" {
124 // Name exists but has no records.
125 return
126 }
127
128 rest, namespace := netDNS.SplitLastLabel(subdomain)
129 if rest == "" {
130 // Name exists if the namespace exists, and has no records.
131 if !k.apiConn.NamespaceExists(namespace) {
132 k.notFound(r)
133 }
134 return
135 }
136
137 serviceSub, _ := netDNS.SplitLastLabel(rest)
138 rest, hostnameOrProto := netDNS.SplitLastLabel(serviceSub)
139
140 var proto string
141 var portName string
142 var hostname string
143 switch hostnameOrProto {
144 case "_tcp", "_udp", "_sctp":
145 proto = hostnameOrProto[1:]
146 rest, portName = netDNS.SplitLastLabel(rest)
147 if len(portName) >= 2 && portName[0] == '_' {
148 portName = portName[1:]
149 } else if portName != "" {
150 r.Reply.Rcode = dns.RcodeNameError
151 return
152 }
153 // If portName is empty, the name exists if the parent exists,
154 // but has no records.
155 default:
156 hostname = hostnameOrProto
157 }
158
159 if rest != "" {
160 // The query name has too many labels.
161 r.Reply.Rcode = dns.RcodeNameError
162 return
163 }
164
165 // serviceKey is "<service>.<ns>"
166 serviceKey := subdomain[len(serviceSub) : len(subdomain)-1]
167 service := k.apiConn.GetSvc(serviceKey)
168 if service == nil {
169 k.notFound(r)
170 return
171 }
172
173 // External service
174 if service.ExternalName != "" {
175 if serviceSub != "" {
176 // External services don't have subdomains.
177 r.Reply.Rcode = dns.RcodeNameError
178 return
179 }
180 if service.ExternalName == object.ExternalNameInvalid {
181 // The service has an invalid ExternalName, return an error.
182 r.AddExtendedError(dns.ExtendedErrorCodeInvalidData, "Kubernetes service has invalid externalName")
183 r.Reply.Rcode = dns.RcodeServerFailure
184 return
185 }
186 // We already ensure that ExternalName is valid and fully qualified
187 // when constructing the object.Service.
188 r.AddCNAME(service.ExternalName, defaultTTL)
189 return
190 }
191
192 // Headless service.
193 if service.Headless {
194 found := false
195 haveIP := make(map[string]struct{})
196 haveSRV := make(map[srvItem]struct{})
197 existingAnswer := len(r.Reply.Answer)
198 existingExtra := len(r.Reply.Extra)
199 for _, ep := range k.apiConn.EpIndex(serviceKey) {
200 if portName != "" {
201 // _<port>._<proto>.<service>.<ns>.svc.
202 var portNumber uint16
203 for _, p := range ep.Ports {
204 if p.Name == portName && p.Protocol == proto {
205 portNumber = p.Port
206 break
207 }
208 }
209 if portNumber == 0 {
210 continue
211 }
212 for _, addr := range ep.Addresses {
213 found = true
214 if r.Qtype == dns.TypeSRV || r.Qtype == dns.TypeANY {
215 targetName := addr.Hostname + r.Qname[len(serviceSub)-1:]
216 if !isDuplicateSRV(haveSRV, addr.Hostname, "", portNumber) {
217 addSRV(r, portNumber, targetName)
218 }
219 if !isDuplicateSRV(haveSRV, addr.Hostname, addr.IP, 0) {
220 addAddrExtra(r, targetName, net.IP(addr.IP))
221 }
222 }
223 }
224 } else {
225 // <service>.<ns>.svc. or <hostname>.<service>.<ns>.svc.
226 for _, addr := range ep.Addresses {
227 if hostname != "" && hostname != addr.Hostname {
228 continue
229 }
230 found = true
231 if proto != "" {
232 // _<proto>.<service>.<ns>.svc. has no records
233 // and exists if its parent exists.
234 break
235 }
236 if _, ok := haveIP[addr.IP]; !ok {
237 haveIP[addr.IP] = struct{}{}
238 addAddr(r, net.IP(addr.IP))
239 }
240 }
241 }
242 }
243 shuffleRRs(r.Reply.Answer[existingAnswer:])
244 shuffleRRs(r.Reply.Extra[existingExtra:])
245 if !found {
246 k.notFound(r)
247 }
248 return
249 }
250
251 if hostname != "" {
252 // Non-headless services don't have hostname records.
253 r.Reply.Rcode = dns.RcodeNameError
254 return
255 }
256
257 // ClusterIP service
258 if proto == "" {
259 // <service>.<ns>.svc. for ClusterIP service.
260 for _, ip := range service.ClusterIPs {
261 addAddr(r, net.IP(ip))
262 }
263 // The specification does not define what to return if the service has
264 // no (valid) clusterIP. We return an empty response with no error.
265 return
266 }
267
268 if portName == "" {
269 // _<proto>.<service>.<ns>.svc. exists but has no records.
270 return
271 }
272
273 // _<port>._<proto>.<service>.<ns>.svc. for ClusterIP service.
274 var portNumber uint16
275 for _, p := range service.Ports {
276 if p.Name == portName && p.Protocol == proto {
277 portNumber = p.Port
278 break
279 }
280 }
281 if portNumber == 0 {
282 r.Reply.Rcode = dns.RcodeNameError
283 return
284 }
285 if r.Qtype == dns.TypeSRV || r.Qtype == dns.TypeANY {
286 targetName := r.Qname[len(serviceSub):]
287 addSRV(r, portNumber, targetName)
288 for _, ip := range service.ClusterIPs {
289 addAddrExtra(r, targetName, net.IP(ip))
290 }
291 }
292}
293
294func (k *Kubernetes) handleReverse(r *netDNS.Request, ip netip.Addr) {
295 stringIP := string(ip.AsSlice())
296 found := false
297 for _, service := range k.apiConn.SvcIndexReverse(stringIP) {
298 found = true
299 if r.Qtype == dns.TypePTR || r.Qtype == dns.TypeANY {
300 rr := new(dns.PTR)
301 rr.Hdr = dns.RR_Header{Name: r.Qname, Rrtype: dns.TypePTR, Class: dns.ClassINET, Ttl: defaultTTL}
302 rr.Ptr = service.Name + "." + service.Namespace + ".svc." + k.clusterDomain
303 r.Reply.Answer = append(r.Reply.Answer, rr)
304 }
305 }
306 haveName := make(map[string]struct{})
307 for _, ep := range k.apiConn.EpIndexReverse(stringIP) {
308 for _, addr := range ep.Addresses {
309 if addr.IP == stringIP {
310 found = true
311 if r.Qtype == dns.TypePTR || r.Qtype == dns.TypeANY {
312 ptr := addr.Hostname + "." + ep.Index + ".svc." + k.clusterDomain
313 if _, ok := haveName[ptr]; ok {
314 continue
315 }
316 haveName[ptr] = struct{}{}
317 rr := new(dns.PTR)
318 rr.Hdr = dns.RR_Header{Name: r.Qname, Rrtype: dns.TypePTR, Class: dns.ClassINET, Ttl: defaultTTL}
319 rr.Ptr = ptr
320 r.Reply.Answer = append(r.Reply.Answer, rr)
321 }
322 }
323 }
324 }
325 if !found {
326 k.notFound(r)
327 }
328}
329
330func (k *Kubernetes) makeSOA(zone string) *dns.SOA {
331 header := dns.RR_Header{Name: zone, Rrtype: dns.TypeSOA, Class: dns.ClassINET, Ttl: defaultTTL}
332 return &dns.SOA{
333 Hdr: header,
334 Mbox: "nobody.invalid.",
335 Ns: k.nsDomain,
336 Serial: uint32(k.apiConn.Modified()),
337 Refresh: 7200,
338 Retry: 1800,
339 Expire: 86400,
340 Minttl: defaultTTL,
341 }
342}
343
344func (k *Kubernetes) makeNS(zone string) *dns.NS {
345 rr := new(dns.NS)
346 rr.Hdr = dns.RR_Header{Name: zone, Rrtype: dns.TypeNS, Class: dns.ClassINET, Ttl: defaultTTL}
347 rr.Ns = k.nsDomain
348 return rr
349}
350
351func addAddr(r *netDNS.Request, ip net.IP) {
352 if len(ip) == net.IPv4len && (r.Qtype == dns.TypeA || r.Qtype == dns.TypeANY) {
353 rr := new(dns.A)
354 rr.Hdr = dns.RR_Header{Name: r.Qname, Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: defaultTTL}
355 rr.A = ip
356 r.Reply.Answer = append(r.Reply.Answer, rr)
357 }
358 if len(ip) == net.IPv6len && (r.Qtype == dns.TypeAAAA || r.Qtype == dns.TypeANY) {
359 rr := new(dns.AAAA)
360 rr.Hdr = dns.RR_Header{Name: r.Qname, Rrtype: dns.TypeAAAA, Class: dns.ClassINET, Ttl: defaultTTL}
361 rr.AAAA = ip
362 r.Reply.Answer = append(r.Reply.Answer, rr)
363 }
364}
365
366func addAddrExtra(r *netDNS.Request, name string, ip net.IP) {
367 if len(ip) == net.IPv4len {
368 rr := new(dns.A)
369 rr.Hdr = dns.RR_Header{Name: name, Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: defaultTTL}
370 rr.A = ip
371 r.Reply.Extra = append(r.Reply.Extra, rr)
372 }
373 if len(ip) == net.IPv6len {
374 rr := new(dns.AAAA)
375 rr.Hdr = dns.RR_Header{Name: name, Rrtype: dns.TypeAAAA, Class: dns.ClassINET, Ttl: defaultTTL}
376 rr.AAAA = ip
377 r.Reply.Extra = append(r.Reply.Extra, rr)
378 }
379}
380
381func addSRV(r *netDNS.Request, portNumber uint16, targetName string) {
382 rr := new(dns.SRV)
383 rr.Hdr = dns.RR_Header{Name: r.Qname, Rrtype: dns.TypeSRV, Class: dns.ClassINET, Ttl: defaultTTL}
384 rr.Priority = 0
385 rr.Weight = 0
386 rr.Port = portNumber
387 rr.Target = targetName
388 r.Reply.Answer = append(r.Reply.Answer, rr)
389}
390
391// notFound should be called if a name was not found, but could exist
392// if there are Kubernetes object that are not yet available locally.
393func (k *Kubernetes) notFound(r *netDNS.Request) {
394 if !k.apiConn.HasSynced() {
395 // We don't know if the name exists or not, so return an error.
396 r.AddExtendedError(dns.ExtendedErrorCodeNotReady, "Kubernetes objects not yet synced")
397 r.Reply.Rcode = dns.RcodeServerFailure
398 } else {
399 r.Reply.Rcode = dns.RcodeNameError
400 }
401}
402
403type srvItem struct {
404 name string
405 addr string
406 port uint16
407}
408
409// isDuplicateSRV returns true if the (name, addr, port) combination already
410// exists in m, and adds it to m if not.
411func isDuplicateSRV(m map[srvItem]struct{}, name, addr string, port uint16) bool {
412 _, ok := m[srvItem{name, addr, port}]
413 if !ok {
414 m[srvItem{name, addr, port}] = struct{}{}
415 }
416 return ok
417}
418
419// shuffleRRs shuffles a slice of RRs for some load balancing.
420func shuffleRRs(rrs []dns.RR) {
421 rand.Shuffle(len(rrs), func(i, j int) {
422 rrs[i], rrs[j] = rrs[j], rrs[i]
423 })
424}