blob: e6adf8487de6ad9949e0bffb29a2387880b4f05e [file] [log] [blame]
Jan Schära48bd3c2024-07-29 17:22:18 +02001package kubernetes
2
3import (
4 "math/rand/v2"
5 "net"
6 "net/netip"
7
8 "github.com/miekg/dns"
9
10 netDNS "source.monogon.dev/osbase/net/dns"
11 "source.monogon.dev/osbase/net/dns/kubernetes/object"
12)
13
14const (
15 // DNSSchemaVersion is the schema version: https://github.com/kubernetes/dns/blob/master/docs/specification.md
16 DNSSchemaVersion = "1.1.0"
17 // defaultTTL to apply to all answers.
18 defaultTTL = 5
19)
20
21func (k *Kubernetes) HandleDNS(r *netDNS.Request) {
22 if netDNS.IsSubDomain(k.clusterDomain, r.QnameCanonical) {
23 r.SetAuthoritative()
24
25 subdomain := r.QnameCanonical[:len(r.QnameCanonical)-len(k.clusterDomain)]
26 subdomain, last := netDNS.SplitLastLabel(subdomain)
27 if last == "svc" {
28 k.handleService(r, subdomain)
29 } else if last == "" {
30 if r.Qtype == dns.TypeSOA || r.Qtype == dns.TypeANY {
31 r.Reply.Answer = append(r.Reply.Answer, k.makeSOA(r.Qname))
32 }
33 if r.Qtype == dns.TypeNS || r.Qtype == dns.TypeANY {
34 r.Reply.Answer = append(r.Reply.Answer, k.makeNS(r.Qname))
35 }
36 } else if last == "dns-version" && subdomain == "" {
37 if r.Qtype == dns.TypeTXT || r.Qtype == dns.TypeANY {
38 rr := new(dns.TXT)
39 rr.Hdr = dns.RR_Header{Name: r.Qname, Rrtype: dns.TypeTXT, Class: dns.ClassINET, Ttl: defaultTTL}
40 rr.Txt = []string{DNSSchemaVersion}
41 r.Reply.Answer = append(r.Reply.Answer, rr)
42 }
43 } else if last == "dns" && (subdomain == "" || subdomain == "ns.") {
44 // Name exists but has no records.
45 } else {
46 r.Reply.Rcode = dns.RcodeNameError
47 }
48
49 if r.Handled {
50 return
51 }
52 if len(r.Reply.Answer) == 0 {
53 zone := r.Qname[len(r.Qname)-len(k.clusterDomain):]
54 r.Reply.Ns = []dns.RR{k.makeSOA(zone)}
55 }
56 r.SendReply()
57 return
58 }
59
60 reverseIP, reverseBits, extra := netDNS.ParseReverse(r.QnameCanonical)
61 if reverseIP.IsValid() {
62 for _, ipRange := range k.ipRanges {
63 if !ipRange.Contains(reverseIP) || reverseBits < ipRange.Bits() {
64 continue
65 }
66
67 r.SetAuthoritative()
68
69 zoneBits := 0
70 if reverseIP.BitLen() == 32 {
71 zoneBits = (ipRange.Bits() + 7) & ^7
72 } else {
73 zoneBits = (ipRange.Bits() + 3) & ^3
74 }
75
76 if extra {
77 // Name with extra labels does not exist.
78 r.Reply.Rcode = dns.RcodeNameError
79 } else {
80 if reverseBits == reverseIP.BitLen() {
81 k.handleReverse(r, reverseIP)
82 }
83 if reverseBits == zoneBits {
84 if r.Qtype == dns.TypeSOA || r.Qtype == dns.TypeANY {
85 r.Reply.Answer = append(r.Reply.Answer, k.makeSOA(r.Qname))
86 }
87 if r.Qtype == dns.TypeNS || r.Qtype == dns.TypeANY {
88 r.Reply.Answer = append(r.Reply.Answer, k.makeNS(r.Qname))
89 }
90 }
91 }
92
93 if len(r.Reply.Answer) == 0 {
94 zoneDots := 0
95 if reverseIP.BitLen() == 32 {
96 zoneDots = 3 + zoneBits/8
97 } else {
98 zoneDots = 3 + zoneBits/4
99 }
100 zoneStart := len(r.Qname)
101 for zoneStart > 0 {
102 if r.Qname[zoneStart-1] == '.' {
103 zoneDots--
104 if zoneDots == 0 {
105 break
106 }
107 }
108 zoneStart--
109 }
110 zone := r.Qname[zoneStart:]
111 r.Reply.Ns = []dns.RR{k.makeSOA(zone)}
112 }
113 r.SendReply()
114 return
115 }
116 }
117}
118
119func (k *Kubernetes) handleService(r *netDNS.Request, subdomain string) {
120 if subdomain == "" {
121 // Name exists but has no records.
122 return
123 }
124
125 rest, namespace := netDNS.SplitLastLabel(subdomain)
126 if rest == "" {
127 // Name exists if the namespace exists, and has no records.
128 if !k.apiConn.NamespaceExists(namespace) {
129 k.notFound(r)
130 }
131 return
132 }
133
134 serviceSub, _ := netDNS.SplitLastLabel(rest)
135 rest, hostnameOrProto := netDNS.SplitLastLabel(serviceSub)
136
137 var proto string
138 var portName string
139 var hostname string
140 switch hostnameOrProto {
141 case "_tcp", "_udp", "_sctp":
142 proto = hostnameOrProto[1:]
143 rest, portName = netDNS.SplitLastLabel(rest)
144 if len(portName) >= 2 && portName[0] == '_' {
145 portName = portName[1:]
146 } else if portName != "" {
147 r.Reply.Rcode = dns.RcodeNameError
148 return
149 }
150 // If portName is empty, the name exists if the parent exists,
151 // but has no records.
152 default:
153 hostname = hostnameOrProto
154 }
155
156 if rest != "" {
157 // The query name has too many labels.
158 r.Reply.Rcode = dns.RcodeNameError
159 return
160 }
161
162 // serviceKey is "<service>.<ns>"
163 serviceKey := subdomain[len(serviceSub) : len(subdomain)-1]
164 service := k.apiConn.GetSvc(serviceKey)
165 if service == nil {
166 k.notFound(r)
167 return
168 }
169
170 // External service
171 if service.ExternalName != "" {
172 if serviceSub != "" {
173 // External services don't have subdomains.
174 r.Reply.Rcode = dns.RcodeNameError
175 return
176 }
177 if service.ExternalName == object.ExternalNameInvalid {
178 // The service has an invalid ExternalName, return an error.
179 r.AddExtendedError(dns.ExtendedErrorCodeInvalidData, "Kubernetes service has invalid externalName")
180 r.Reply.Rcode = dns.RcodeServerFailure
181 return
182 }
183 // We already ensure that ExternalName is valid and fully qualified
184 // when constructing the object.Service.
185 r.AddCNAME(service.ExternalName, defaultTTL)
186 return
187 }
188
189 // Headless service.
190 if service.Headless {
191 found := false
192 haveIP := make(map[string]struct{})
193 haveSRV := make(map[srvItem]struct{})
194 existingAnswer := len(r.Reply.Answer)
195 existingExtra := len(r.Reply.Extra)
196 for _, ep := range k.apiConn.EpIndex(serviceKey) {
197 if portName != "" {
198 // _<port>._<proto>.<service>.<ns>.svc.
199 var portNumber uint16
200 for _, p := range ep.Ports {
201 if p.Name == portName && p.Protocol == proto {
202 portNumber = p.Port
203 break
204 }
205 }
206 if portNumber == 0 {
207 continue
208 }
209 for _, addr := range ep.Addresses {
210 found = true
211 if r.Qtype == dns.TypeSRV || r.Qtype == dns.TypeANY {
212 targetName := addr.Hostname + r.Qname[len(serviceSub)-1:]
213 if !isDuplicateSRV(haveSRV, addr.Hostname, "", portNumber) {
214 addSRV(r, portNumber, targetName)
215 }
216 if !isDuplicateSRV(haveSRV, addr.Hostname, addr.IP, 0) {
217 addAddrExtra(r, targetName, net.IP(addr.IP))
218 }
219 }
220 }
221 } else {
222 // <service>.<ns>.svc. or <hostname>.<service>.<ns>.svc.
223 for _, addr := range ep.Addresses {
224 if hostname != "" && hostname != addr.Hostname {
225 continue
226 }
227 found = true
228 if proto != "" {
229 // _<proto>.<service>.<ns>.svc. has no records
230 // and exists if its parent exists.
231 break
232 }
233 if _, ok := haveIP[addr.IP]; !ok {
234 haveIP[addr.IP] = struct{}{}
235 addAddr(r, net.IP(addr.IP))
236 }
237 }
238 }
239 }
240 shuffleRRs(r.Reply.Answer[existingAnswer:])
241 shuffleRRs(r.Reply.Extra[existingExtra:])
242 if !found {
243 k.notFound(r)
244 }
245 return
246 }
247
248 if hostname != "" {
249 // Non-headless services don't have hostname records.
250 r.Reply.Rcode = dns.RcodeNameError
251 return
252 }
253
254 // ClusterIP service
255 if proto == "" {
256 // <service>.<ns>.svc. for ClusterIP service.
257 for _, ip := range service.ClusterIPs {
258 addAddr(r, net.IP(ip))
259 }
260 // The specification does not define what to return if the service has
261 // no (valid) clusterIP. We return an empty response with no error.
262 return
263 }
264
265 if portName == "" {
266 // _<proto>.<service>.<ns>.svc. exists but has no records.
267 return
268 }
269
270 // _<port>._<proto>.<service>.<ns>.svc. for ClusterIP service.
271 var portNumber uint16
272 for _, p := range service.Ports {
273 if p.Name == portName && p.Protocol == proto {
274 portNumber = p.Port
275 break
276 }
277 }
278 if portNumber == 0 {
279 r.Reply.Rcode = dns.RcodeNameError
280 return
281 }
282 if r.Qtype == dns.TypeSRV || r.Qtype == dns.TypeANY {
283 targetName := r.Qname[len(serviceSub):]
284 addSRV(r, portNumber, targetName)
285 for _, ip := range service.ClusterIPs {
286 addAddrExtra(r, targetName, net.IP(ip))
287 }
288 }
289}
290
291func (k *Kubernetes) handleReverse(r *netDNS.Request, ip netip.Addr) {
292 stringIP := string(ip.AsSlice())
293 found := false
294 for _, service := range k.apiConn.SvcIndexReverse(stringIP) {
295 found = true
296 if r.Qtype == dns.TypePTR || r.Qtype == dns.TypeANY {
297 rr := new(dns.PTR)
298 rr.Hdr = dns.RR_Header{Name: r.Qname, Rrtype: dns.TypePTR, Class: dns.ClassINET, Ttl: defaultTTL}
299 rr.Ptr = service.Name + "." + service.Namespace + ".svc." + k.clusterDomain
300 r.Reply.Answer = append(r.Reply.Answer, rr)
301 }
302 }
303 haveName := make(map[string]struct{})
304 for _, ep := range k.apiConn.EpIndexReverse(stringIP) {
305 for _, addr := range ep.Addresses {
306 if addr.IP == stringIP {
307 found = true
308 if r.Qtype == dns.TypePTR || r.Qtype == dns.TypeANY {
309 ptr := addr.Hostname + "." + ep.Index + ".svc." + k.clusterDomain
310 if _, ok := haveName[ptr]; ok {
311 continue
312 }
313 haveName[ptr] = struct{}{}
314 rr := new(dns.PTR)
315 rr.Hdr = dns.RR_Header{Name: r.Qname, Rrtype: dns.TypePTR, Class: dns.ClassINET, Ttl: defaultTTL}
316 rr.Ptr = ptr
317 r.Reply.Answer = append(r.Reply.Answer, rr)
318 }
319 }
320 }
321 }
322 if !found {
323 k.notFound(r)
324 }
325}
326
327func (k *Kubernetes) makeSOA(zone string) *dns.SOA {
328 header := dns.RR_Header{Name: zone, Rrtype: dns.TypeSOA, Class: dns.ClassINET, Ttl: defaultTTL}
329 return &dns.SOA{
330 Hdr: header,
331 Mbox: "nobody.invalid.",
332 Ns: k.nsDomain,
333 Serial: uint32(k.apiConn.Modified()),
334 Refresh: 7200,
335 Retry: 1800,
336 Expire: 86400,
337 Minttl: defaultTTL,
338 }
339}
340
341func (k *Kubernetes) makeNS(zone string) *dns.NS {
342 rr := new(dns.NS)
343 rr.Hdr = dns.RR_Header{Name: zone, Rrtype: dns.TypeNS, Class: dns.ClassINET, Ttl: defaultTTL}
344 rr.Ns = k.nsDomain
345 return rr
346}
347
348func addAddr(r *netDNS.Request, ip net.IP) {
349 if len(ip) == net.IPv4len && (r.Qtype == dns.TypeA || r.Qtype == dns.TypeANY) {
350 rr := new(dns.A)
351 rr.Hdr = dns.RR_Header{Name: r.Qname, Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: defaultTTL}
352 rr.A = ip
353 r.Reply.Answer = append(r.Reply.Answer, rr)
354 }
355 if len(ip) == net.IPv6len && (r.Qtype == dns.TypeAAAA || r.Qtype == dns.TypeANY) {
356 rr := new(dns.AAAA)
357 rr.Hdr = dns.RR_Header{Name: r.Qname, Rrtype: dns.TypeAAAA, Class: dns.ClassINET, Ttl: defaultTTL}
358 rr.AAAA = ip
359 r.Reply.Answer = append(r.Reply.Answer, rr)
360 }
361}
362
363func addAddrExtra(r *netDNS.Request, name string, ip net.IP) {
364 if len(ip) == net.IPv4len {
365 rr := new(dns.A)
366 rr.Hdr = dns.RR_Header{Name: name, Rrtype: dns.TypeA, Class: dns.ClassINET, Ttl: defaultTTL}
367 rr.A = ip
368 r.Reply.Extra = append(r.Reply.Extra, rr)
369 }
370 if len(ip) == net.IPv6len {
371 rr := new(dns.AAAA)
372 rr.Hdr = dns.RR_Header{Name: name, Rrtype: dns.TypeAAAA, Class: dns.ClassINET, Ttl: defaultTTL}
373 rr.AAAA = ip
374 r.Reply.Extra = append(r.Reply.Extra, rr)
375 }
376}
377
378func addSRV(r *netDNS.Request, portNumber uint16, targetName string) {
379 rr := new(dns.SRV)
380 rr.Hdr = dns.RR_Header{Name: r.Qname, Rrtype: dns.TypeSRV, Class: dns.ClassINET, Ttl: defaultTTL}
381 rr.Priority = 0
382 rr.Weight = 0
383 rr.Port = portNumber
384 rr.Target = targetName
385 r.Reply.Answer = append(r.Reply.Answer, rr)
386}
387
388// notFound should be called if a name was not found, but could exist
389// if there are Kubernetes object that are not yet available locally.
390func (k *Kubernetes) notFound(r *netDNS.Request) {
391 if !k.apiConn.HasSynced() {
392 // We don't know if the name exists or not, so return an error.
393 r.AddExtendedError(dns.ExtendedErrorCodeNotReady, "Kubernetes objects not yet synced")
394 r.Reply.Rcode = dns.RcodeServerFailure
395 } else {
396 r.Reply.Rcode = dns.RcodeNameError
397 }
398}
399
400type srvItem struct {
401 name string
402 addr string
403 port uint16
404}
405
406// isDuplicateSRV returns true if the (name, addr, port) combination already
407// exists in m, and adds it to m if not.
408func isDuplicateSRV(m map[srvItem]struct{}, name, addr string, port uint16) bool {
409 _, ok := m[srvItem{name, addr, port}]
410 if !ok {
411 m[srvItem{name, addr, port}] = struct{}{}
412 }
413 return ok
414}
415
416// shuffleRRs shuffles a slice of RRs for some load balancing.
417func shuffleRRs(rrs []dns.RR) {
418 rand.Shuffle(len(rrs), func(i, j int) {
419 rrs[i], rrs[j] = rrs[j], rrs[i]
420 })
421}