blob: 71283b044d27dd2ae692338f1fc65d10c82480b7 [file] [log] [blame]
Tim Windelschmidt6d33a432025-02-04 14:34:25 +01001// Copyright The Monogon Project Authors.
2// SPDX-License-Identifier: Apache-2.0
3
Serge Bazanskicf23ebc2023-03-14 17:02:04 +01004package core
5
6import (
Serge Bazanski568c38c2024-02-05 14:40:39 +01007 "context"
Serge Bazanskicf23ebc2023-03-14 17:02:04 +01008 "crypto/ed25519"
9 "crypto/rand"
Serge Bazanski7eeef0f2024-02-05 14:40:15 +010010 "crypto/tls"
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010011 "crypto/x509"
12 "encoding/pem"
13 "errors"
14 "fmt"
Serge Bazanski568c38c2024-02-05 14:40:39 +010015 "log"
Serge Bazanski1f8cad72023-03-20 16:58:10 +010016 "net"
17 "net/url"
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010018 "os"
19 "path/filepath"
20
Serge Bazanski568c38c2024-02-05 14:40:39 +010021 "golang.org/x/net/proxy"
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010022 clientauthentication "k8s.io/client-go/pkg/apis/clientauthentication/v1"
23 "k8s.io/client-go/tools/clientcmd"
24 clientapi "k8s.io/client-go/tools/clientcmd/api"
Serge Bazanski1f8cad72023-03-20 16:58:10 +010025
Serge Bazanskica8d9512024-09-12 14:20:57 +020026 "source.monogon.dev/go/logging"
Jan Schär0f8ce4c2025-09-04 13:27:50 +020027 "source.monogon.dev/metropolis/node/allocs"
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010028)
29
30const (
31 // OwnerKeyFileName is the filename of the owner key in a metroctl config
32 // directory.
33 OwnerKeyFileName = "owner-key.pem"
34 // OwnerCertificateFileName is the filename of the owner certificate in a
35 // metroctl config directory.
36 OwnerCertificateFileName = "owner.pem"
Serge Bazanski7eeef0f2024-02-05 14:40:15 +010037 // CACertificateFileName is the filename of the cluster CA certificate in a
38 // metroctl config directory.
39 CACertificateFileName = "ca.pem"
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010040)
41
Tim Windelschmidt513df182024-04-18 23:44:50 +020042var (
43 // ErrNoCredentials indicates that the requested datum (eg. owner key or owner
44 // certificate) is not present in the requested directory.
45 ErrNoCredentials = errors.New("owner certificate or key does not exist")
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010046
Tim Windelschmidt513df182024-04-18 23:44:50 +020047 ErrNoCACertificate = errors.New("no cluster CA certificate while secure connection was requested")
48)
Serge Bazanski7eeef0f2024-02-05 14:40:15 +010049
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010050// A PEM block type for a Metropolis initial owner private key
51const ownerKeyType = "METROPOLIS INITIAL OWNER PRIVATE KEY"
52
53// GetOrMakeOwnerKey returns the owner key for a given metroctl configuration
54// directory path, generating and saving it first if it doesn't exist.
55func GetOrMakeOwnerKey(path string) (ed25519.PrivateKey, error) {
56 existing, err := GetOwnerKey(path)
Tim Windelschmidtd5f851b2024-04-23 14:59:37 +020057 switch {
58 case err == nil:
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010059 return existing, nil
Tim Windelschmidt513df182024-04-18 23:44:50 +020060 case errors.Is(err, ErrNoCredentials):
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010061 default:
62 return nil, err
63 }
64
65 _, priv, err := ed25519.GenerateKey(rand.Reader)
66 if err != nil {
67 return nil, fmt.Errorf("when generating key: %w", err)
68 }
69 if err := WriteOwnerKey(path, priv); err != nil {
70 return nil, err
71 }
72 return priv, nil
73}
74
75// WriteOwnerKey saves a given raw ED25519 private key as the owner key at a
76// given metroctl configuration directory path.
77func WriteOwnerKey(path string, priv ed25519.PrivateKey) error {
78 pemPriv := pem.EncodeToMemory(&pem.Block{Type: ownerKeyType, Bytes: priv})
79 if err := os.WriteFile(filepath.Join(path, OwnerKeyFileName), pemPriv, 0600); err != nil {
80 return fmt.Errorf("when saving key: %w", err)
81 }
82 return nil
83}
84
Serge Bazanski7eeef0f2024-02-05 14:40:15 +010085// WriteCACertificate writes the given der-encoded X509 certificate to the given
86// metorctl configuration directory path.
87func WriteCACertificate(path string, der []byte) error {
88 pemCert := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: der})
89 if err := os.WriteFile(filepath.Join(path, CACertificateFileName), pemCert, 0600); err != nil {
90 return fmt.Errorf("when saving CA certificate: %w", err)
91 }
92 return nil
93}
94
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010095// GetOwnerKey loads and returns a raw ED25519 private key from the saved owner
96// key in a given metroctl configuration directory path. If the owner key doesn't
Tim Windelschmidt513df182024-04-18 23:44:50 +020097// exist, ErrNoCredentials will be returned.
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010098func GetOwnerKey(path string) (ed25519.PrivateKey, error) {
99 ownerPrivateKeyPEM, err := os.ReadFile(filepath.Join(path, OwnerKeyFileName))
100 if os.IsNotExist(err) {
Tim Windelschmidt513df182024-04-18 23:44:50 +0200101 return nil, ErrNoCredentials
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100102 } else if err != nil {
103 return nil, fmt.Errorf("failed to load owner private key: %w", err)
104 }
105 block, _ := pem.Decode(ownerPrivateKeyPEM)
106 if block == nil {
107 return nil, errors.New("owner-key.pem contains invalid PEM armoring")
108 }
109 if block.Type != ownerKeyType {
110 return nil, fmt.Errorf("owner-key.pem contains a PEM block that's not a %v", ownerKeyType)
111 }
112 if len(block.Bytes) != ed25519.PrivateKeySize {
113 return nil, errors.New("owner-key.pem contains a non-Ed25519 key")
114 }
115 return block.Bytes, nil
116}
117
118// WriteOwnerCertificate saves a given DER-encoded X509 certificate as the owner
119// key for a given metroctl configuration directory path.
120func WriteOwnerCertificate(path string, cert []byte) error {
121 ownerCertPEM := pem.Block{
122 Type: "CERTIFICATE",
123 Bytes: cert,
124 }
125 if err := os.WriteFile(filepath.Join(path, OwnerCertificateFileName), pem.EncodeToMemory(&ownerCertPEM), 0644); err != nil {
126 return err
127 }
128 return nil
129}
130
131// GetOwnerCredentials loads and returns a raw ED25519 private key alongside a
132// DER-encoded X509 certificate from the saved owner key and certificate in a
133// given metroctl configuration directory path. If either the key or certificate
Tim Windelschmidt513df182024-04-18 23:44:50 +0200134// doesn't exist, ErrNoCredentials will be returned.
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100135func GetOwnerCredentials(path string) (cert *x509.Certificate, key ed25519.PrivateKey, err error) {
136 key, err = GetOwnerKey(path)
137 if err != nil {
138 return nil, nil, err
139 }
140
141 ownerCertPEM, err := os.ReadFile(filepath.Join(path, OwnerCertificateFileName))
142 if os.IsNotExist(err) {
Tim Windelschmidt513df182024-04-18 23:44:50 +0200143 return nil, nil, ErrNoCredentials
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100144 } else if err != nil {
145 return nil, nil, fmt.Errorf("failed to load owner certificate: %w", err)
146 }
147 block, _ := pem.Decode(ownerCertPEM)
148 if block == nil {
149 return nil, nil, errors.New("owner.pem contains invalid PEM armoring")
150 }
151 if block.Type != "CERTIFICATE" {
152 return nil, nil, fmt.Errorf("owner.pem contains a PEM block that's not a CERTIFICATE")
153 }
154 cert, err = x509.ParseCertificate(block.Bytes)
155 if err != nil {
156 return nil, nil, fmt.Errorf("owner.pem contains an invalid X.509 certificate: %w", err)
157 }
158 return
159}
160
Serge Bazanski7eeef0f2024-02-05 14:40:15 +0100161// GetOwnerTLSCredentials returns a client TLS Certificate for authenticating to
162// the metropolis cluster, based on metroctl configuration at a given path.
163func GetOwnerTLSCredentials(path string) (*tls.Certificate, error) {
164 ocert, opkey, err := GetOwnerCredentials(path)
165 if err != nil {
166 return nil, err
167 }
168 return &tls.Certificate{
169 Certificate: [][]byte{ocert.Raw},
170 PrivateKey: opkey,
171 }, nil
172}
173
174// GetClusterCA returns the saved cluster CA certificate at the given metoctl
175// configuration path. This does not perform TOFU if the certificate is not
176// present.
177func GetClusterCA(path string) (cert *x509.Certificate, err error) {
178 caCertPEM, err := os.ReadFile(filepath.Join(path, CACertificateFileName))
179 if os.IsNotExist(err) {
Tim Windelschmidt513df182024-04-18 23:44:50 +0200180 return nil, ErrNoCACertificate
Serge Bazanski7eeef0f2024-02-05 14:40:15 +0100181 } else if err != nil {
182 return nil, fmt.Errorf("failed to load CA certificate: %w", err)
183 }
184 block, _ := pem.Decode(caCertPEM)
185 if block == nil {
186 return nil, errors.New("ca.pem contains invalid PEM armoring")
187 }
188 if block.Type != "CERTIFICATE" {
189 return nil, fmt.Errorf("ca.pem contains a PEM block that's not a CERTIFICATE")
190 }
191 cert, err = x509.ParseCertificate(block.Bytes)
192 if err != nil {
193 return nil, fmt.Errorf("ca.pem contains an invalid X.509 certificate: %w", err)
194 }
195 return
196}
197
Serge Bazanski1f8cad72023-03-20 16:58:10 +0100198// InstallKubeletConfig modifies the default kubelet kubeconfig of the host
199// system to be able to connect via a metroctl (and an associated ConnectOptions)
200// to a Kubernetes apiserver at IP address/hostname 'server'.
201//
202// The kubelet's kubeconfig changes will be limited to contexts/configs/... named
203// configName. The configName context will be made the default context only if
204// there is no other default context in the current subconfig.
205//
206// Kubeconfigs can only take a single Kubernetes server address, so this function
207// similarly only allows you to specify only a single server address.
Serge Bazanski568c38c2024-02-05 14:40:39 +0100208func InstallKubeletConfig(ctx context.Context, metroctlPath string, opts *ConnectOptions, configName, server string) error {
209 po := clientcmd.NewDefaultPathOptions()
210 config, err := po.GetStartingConfig()
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100211 if err != nil {
212 return fmt.Errorf("getting initial config failed: %w", err)
213 }
214
Serge Bazanski1f8cad72023-03-20 16:58:10 +0100215 args := []string{
216 "k8scredplugin",
217 }
218 args = append(args, opts.ToFlags()...)
219
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100220 config.AuthInfos[configName] = &clientapi.AuthInfo{
221 Exec: &clientapi.ExecConfig{
222 APIVersion: clientauthentication.SchemeGroupVersion.String(),
223 Command: metroctlPath,
Serge Bazanski1f8cad72023-03-20 16:58:10 +0100224 Args: args,
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100225 InstallHint: `Authenticating to Metropolis clusters requires metroctl to be present.
226Running metroctl takeownership creates this entry and either points to metroctl as a command in
227PATH if metroctl is in PATH at that time or to the absolute path to metroctl at that time.
228If you moved metroctl afterwards or want to switch to PATH resolution, edit $HOME/.kube/config and
229change users.metropolis.exec.command to the required path (or just metroctl if using PATH resolution).`,
230 InteractiveMode: clientapi.NeverExecInteractiveMode,
231 },
232 }
233
Serge Bazanski1f8cad72023-03-20 16:58:10 +0100234 var u url.URL
235 u.Scheme = "https"
Jan Schär0f8ce4c2025-09-04 13:27:50 +0200236 u.Host = net.JoinHostPort(server, allocs.PortKubernetesAPIWrapped.PortString())
Serge Bazanski7eeef0f2024-02-05 14:40:15 +0100237
Serge Bazanski568c38c2024-02-05 14:40:39 +0100238 // HACK: the Metropolis node certificates only contain the node ID as a SAN. This
239 // means that we can't use some 'global' identifier as the TLSServerName below
240 // that would be the same across all cluster nodes. Unfortunately the Kubeconfig
241 // system only allows for specifying a concrete name, not a regexp or some more
242 // complex validation mechanism for certs.
243 //
244 // The correct fix for this is to issue a new set of certs for the nodes to use,
245 // but that would require implementing a migration mechanism which we don't want
246 // to do as that entire system is getting replaced with SPIFFE based certificates
247 // very soon.
248 //
249 // To get around this, we thus pin the TLSServerName. This works because current
250 // production deployments only use a single node as the Kubernetes endpoint. To
251 // actually get the cert we connect here to the given server and retrieve its
252 // node ID.
253 //
254 // TODO(lorenz): replace as part of SPIFFE authn work
255
256 ca, err := GetClusterCAWithTOFU(ctx, opts)
257 if err != nil {
258 return fmt.Errorf("failed to retrieve CA certificate: %w", err)
259 }
260
261 pinnedNameC := make(chan string, 1)
262 connLower, err := opts.Dial("tcp", u.Host)
263 if err != nil {
264 return fmt.Errorf("failed to dial to retrieve server cert: %w", err)
265 }
266 conn := tls.Client(connLower, &tls.Config{
267 InsecureSkipVerify: true,
268 VerifyPeerCertificate: func(rawCerts [][]byte, verifiedChains [][]*x509.Certificate) error {
269 if ncerts := len(rawCerts); ncerts != 1 {
270 return fmt.Errorf("expected 1 server cert, got %d", ncerts)
271 }
272 cert, err := x509.ParseCertificate(rawCerts[0])
273 if err != nil {
274 return fmt.Errorf("parsing server certificate failed: %w", err)
275 }
276 if err := cert.CheckSignatureFrom(ca); err != nil {
277 return fmt.Errorf("server certificate verification failed: %w", err)
278 }
279 if nnames := len(cert.DNSNames); nnames != 1 {
280 return fmt.Errorf("expected 1 DNS SAN, got %q", cert.DNSNames)
281 }
282 pinnedNameC <- cert.DNSNames[0]
283 return nil
284 },
285 })
286 if err := conn.Handshake(); err != nil {
287 return fmt.Errorf("failed to connect to retrieve server cert: %w", err)
288 }
289 var pinnedName string
290 select {
291 case pinnedName = <-pinnedNameC:
292 case <-ctx.Done():
293 return ctx.Err()
294 }
295
296 log.Printf("Pinning Kubernetes server certificate to %q", pinnedName)
297
298 // Actually configure Kubernetes now.
299
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100300 config.Clusters[configName] = &clientapi.Cluster{
Serge Bazanski568c38c2024-02-05 14:40:39 +0100301 CertificateAuthorityData: pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: ca.Raw}),
302 TLSServerName: pinnedName,
303 Server: u.String(),
304 ProxyURL: opts.ProxyURL(),
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100305 }
306
307 config.Contexts[configName] = &clientapi.Context{
308 AuthInfo: configName,
309 Cluster: configName,
310 Namespace: "default",
311 }
312
313 // Only set us as the current context if no other exists. Changing that
314 // unprompted would be kind of rude.
315 if config.CurrentContext == "" {
316 config.CurrentContext = configName
317 }
318
Serge Bazanski568c38c2024-02-05 14:40:39 +0100319 if err := clientcmd.ModifyConfig(po, *config, true); err != nil {
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100320 return fmt.Errorf("modifying config failed: %w", err)
321 }
322 return nil
323}
Serge Bazanski1f8cad72023-03-20 16:58:10 +0100324
325// ConnectOptions define how to reach a Metropolis cluster from metroctl.
326//
327// This structure can be built directly. All unset fields mean 'default'. It can
328// then be used to generate the equivalent flags to passs to metroctl.
329//
330// Nil pointers to ConnectOptions are equivalent to an empty ConneectOptions when
331// methods on it are called.
332type ConnectOptions struct {
333 // ConfigPath is the path at which the metroctl configuration/credentials live.
334 // If not set, the default will be used.
335 ConfigPath string
336 // ProxyServer is a host:port pair that indicates the metropolis cluster should
337 // be reached via the given SOCKS5 proxy. If not set, the cluster can be reached
338 // directly from the host networking stack.
339 ProxyServer string
340 // Endpoints are the IP addresses/hostnames (without port part) of the Metropolis
341 // instances that metroctl should use to establish connectivity to a cluster.
342 // These instances should have the ControlPlane role set.
343 Endpoints []string
Serge Bazanski925ec3d2024-02-05 14:38:20 +0100344 // ResolverLogger can be set to enable verbose logging of the Metropolis RPC
345 // resolver layer.
Serge Bazanskica8d9512024-09-12 14:20:57 +0200346 ResolverLogger logging.Leveled
Serge Bazanski7eeef0f2024-02-05 14:40:15 +0100347 // TOFU overrides the trust-on-first-use behaviour for CA certificates for the
348 // connection. If not set, TerminalTOFU is used which will interactively ask the
349 // user to accept a CA certificate using os.Stdin/Stdout.
350 TOFU CertificateTOFU
Serge Bazanski1f8cad72023-03-20 16:58:10 +0100351}
352
353// ToFlags returns the metroctl flags corresponding to the options described by
354// this ConnectionOptions struct.
355func (c *ConnectOptions) ToFlags() []string {
356 var res []string
357
358 if c == nil {
359 return res
360 }
361
362 if c.ConfigPath != "" {
363 res = append(res, "--config", c.ConfigPath)
364 }
365 if c.ProxyServer != "" {
366 res = append(res, "--proxy", c.ProxyServer)
367 }
368 for _, ep := range c.Endpoints {
369 res = append(res, "--endpoints", ep)
370 }
371
372 return res
373}
374
375// ProxyURL returns a kubeconfig-compatible URL of the proxy server configured by
376// ConnectOptions, or an empty string if not set.
377func (c *ConnectOptions) ProxyURL() string {
378 if c == nil {
379 return ""
380 }
381 if c.ProxyServer == "" {
382 return ""
383 }
384 var u url.URL
385 u.Scheme = "socks5"
386 u.Host = c.ProxyServer
387 return u.String()
388}
Serge Bazanski568c38c2024-02-05 14:40:39 +0100389
390func (c *ConnectOptions) Dial(network, addr string) (net.Conn, error) {
391 if c.ProxyServer != "" {
392 socksDialer, err := proxy.SOCKS5("tcp", c.ProxyServer, nil, proxy.Direct)
393 if err != nil {
394 return nil, fmt.Errorf("failed to build a SOCKS dialer: %w", err)
395 }
396 return socksDialer.Dial(network, addr)
397 } else {
398 return net.Dial(network, addr)
399 }
400}