blob: d22dd9a4c062e61ad2b3f5aec87f9fed2ba9b8a6 [file] [log] [blame]
Serge Bazanskicf23ebc2023-03-14 17:02:04 +01001package core
2
3import (
Serge Bazanski568c38c2024-02-05 14:40:39 +01004 "context"
Serge Bazanskicf23ebc2023-03-14 17:02:04 +01005 "crypto/ed25519"
6 "crypto/rand"
Serge Bazanski7eeef0f2024-02-05 14:40:15 +01007 "crypto/tls"
Serge Bazanskicf23ebc2023-03-14 17:02:04 +01008 "crypto/x509"
9 "encoding/pem"
10 "errors"
11 "fmt"
Serge Bazanski568c38c2024-02-05 14:40:39 +010012 "log"
Serge Bazanski1f8cad72023-03-20 16:58:10 +010013 "net"
14 "net/url"
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010015 "os"
16 "path/filepath"
17
Serge Bazanski568c38c2024-02-05 14:40:39 +010018 "golang.org/x/net/proxy"
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010019 clientauthentication "k8s.io/client-go/pkg/apis/clientauthentication/v1"
20 "k8s.io/client-go/tools/clientcmd"
21 clientapi "k8s.io/client-go/tools/clientcmd/api"
Serge Bazanski1f8cad72023-03-20 16:58:10 +010022
Serge Bazanskica8d9512024-09-12 14:20:57 +020023 "source.monogon.dev/go/logging"
Serge Bazanski1f8cad72023-03-20 16:58:10 +010024 "source.monogon.dev/metropolis/node"
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010025)
26
27const (
28 // OwnerKeyFileName is the filename of the owner key in a metroctl config
29 // directory.
30 OwnerKeyFileName = "owner-key.pem"
31 // OwnerCertificateFileName is the filename of the owner certificate in a
32 // metroctl config directory.
33 OwnerCertificateFileName = "owner.pem"
Serge Bazanski7eeef0f2024-02-05 14:40:15 +010034 // CACertificateFileName is the filename of the cluster CA certificate in a
35 // metroctl config directory.
36 CACertificateFileName = "ca.pem"
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010037)
38
Tim Windelschmidt513df182024-04-18 23:44:50 +020039var (
40 // ErrNoCredentials indicates that the requested datum (eg. owner key or owner
41 // certificate) is not present in the requested directory.
42 ErrNoCredentials = errors.New("owner certificate or key does not exist")
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010043
Tim Windelschmidt513df182024-04-18 23:44:50 +020044 ErrNoCACertificate = errors.New("no cluster CA certificate while secure connection was requested")
45)
Serge Bazanski7eeef0f2024-02-05 14:40:15 +010046
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010047// A PEM block type for a Metropolis initial owner private key
48const ownerKeyType = "METROPOLIS INITIAL OWNER PRIVATE KEY"
49
50// GetOrMakeOwnerKey returns the owner key for a given metroctl configuration
51// directory path, generating and saving it first if it doesn't exist.
52func GetOrMakeOwnerKey(path string) (ed25519.PrivateKey, error) {
53 existing, err := GetOwnerKey(path)
Tim Windelschmidtd5f851b2024-04-23 14:59:37 +020054 switch {
55 case err == nil:
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010056 return existing, nil
Tim Windelschmidt513df182024-04-18 23:44:50 +020057 case errors.Is(err, ErrNoCredentials):
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010058 default:
59 return nil, err
60 }
61
62 _, priv, err := ed25519.GenerateKey(rand.Reader)
63 if err != nil {
64 return nil, fmt.Errorf("when generating key: %w", err)
65 }
66 if err := WriteOwnerKey(path, priv); err != nil {
67 return nil, err
68 }
69 return priv, nil
70}
71
72// WriteOwnerKey saves a given raw ED25519 private key as the owner key at a
73// given metroctl configuration directory path.
74func WriteOwnerKey(path string, priv ed25519.PrivateKey) error {
75 pemPriv := pem.EncodeToMemory(&pem.Block{Type: ownerKeyType, Bytes: priv})
76 if err := os.WriteFile(filepath.Join(path, OwnerKeyFileName), pemPriv, 0600); err != nil {
77 return fmt.Errorf("when saving key: %w", err)
78 }
79 return nil
80}
81
Serge Bazanski7eeef0f2024-02-05 14:40:15 +010082// WriteCACertificate writes the given der-encoded X509 certificate to the given
83// metorctl configuration directory path.
84func WriteCACertificate(path string, der []byte) error {
85 pemCert := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: der})
86 if err := os.WriteFile(filepath.Join(path, CACertificateFileName), pemCert, 0600); err != nil {
87 return fmt.Errorf("when saving CA certificate: %w", err)
88 }
89 return nil
90}
91
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010092// GetOwnerKey loads and returns a raw ED25519 private key from the saved owner
93// key in a given metroctl configuration directory path. If the owner key doesn't
Tim Windelschmidt513df182024-04-18 23:44:50 +020094// exist, ErrNoCredentials will be returned.
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010095func GetOwnerKey(path string) (ed25519.PrivateKey, error) {
96 ownerPrivateKeyPEM, err := os.ReadFile(filepath.Join(path, OwnerKeyFileName))
97 if os.IsNotExist(err) {
Tim Windelschmidt513df182024-04-18 23:44:50 +020098 return nil, ErrNoCredentials
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010099 } else if err != nil {
100 return nil, fmt.Errorf("failed to load owner private key: %w", err)
101 }
102 block, _ := pem.Decode(ownerPrivateKeyPEM)
103 if block == nil {
104 return nil, errors.New("owner-key.pem contains invalid PEM armoring")
105 }
106 if block.Type != ownerKeyType {
107 return nil, fmt.Errorf("owner-key.pem contains a PEM block that's not a %v", ownerKeyType)
108 }
109 if len(block.Bytes) != ed25519.PrivateKeySize {
110 return nil, errors.New("owner-key.pem contains a non-Ed25519 key")
111 }
112 return block.Bytes, nil
113}
114
115// WriteOwnerCertificate saves a given DER-encoded X509 certificate as the owner
116// key for a given metroctl configuration directory path.
117func WriteOwnerCertificate(path string, cert []byte) error {
118 ownerCertPEM := pem.Block{
119 Type: "CERTIFICATE",
120 Bytes: cert,
121 }
122 if err := os.WriteFile(filepath.Join(path, OwnerCertificateFileName), pem.EncodeToMemory(&ownerCertPEM), 0644); err != nil {
123 return err
124 }
125 return nil
126}
127
128// GetOwnerCredentials loads and returns a raw ED25519 private key alongside a
129// DER-encoded X509 certificate from the saved owner key and certificate in a
130// given metroctl configuration directory path. If either the key or certificate
Tim Windelschmidt513df182024-04-18 23:44:50 +0200131// doesn't exist, ErrNoCredentials will be returned.
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100132func GetOwnerCredentials(path string) (cert *x509.Certificate, key ed25519.PrivateKey, err error) {
133 key, err = GetOwnerKey(path)
134 if err != nil {
135 return nil, nil, err
136 }
137
138 ownerCertPEM, err := os.ReadFile(filepath.Join(path, OwnerCertificateFileName))
139 if os.IsNotExist(err) {
Tim Windelschmidt513df182024-04-18 23:44:50 +0200140 return nil, nil, ErrNoCredentials
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100141 } else if err != nil {
142 return nil, nil, fmt.Errorf("failed to load owner certificate: %w", err)
143 }
144 block, _ := pem.Decode(ownerCertPEM)
145 if block == nil {
146 return nil, nil, errors.New("owner.pem contains invalid PEM armoring")
147 }
148 if block.Type != "CERTIFICATE" {
149 return nil, nil, fmt.Errorf("owner.pem contains a PEM block that's not a CERTIFICATE")
150 }
151 cert, err = x509.ParseCertificate(block.Bytes)
152 if err != nil {
153 return nil, nil, fmt.Errorf("owner.pem contains an invalid X.509 certificate: %w", err)
154 }
155 return
156}
157
Serge Bazanski7eeef0f2024-02-05 14:40:15 +0100158// GetOwnerTLSCredentials returns a client TLS Certificate for authenticating to
159// the metropolis cluster, based on metroctl configuration at a given path.
160func GetOwnerTLSCredentials(path string) (*tls.Certificate, error) {
161 ocert, opkey, err := GetOwnerCredentials(path)
162 if err != nil {
163 return nil, err
164 }
165 return &tls.Certificate{
166 Certificate: [][]byte{ocert.Raw},
167 PrivateKey: opkey,
168 }, nil
169}
170
171// GetClusterCA returns the saved cluster CA certificate at the given metoctl
172// configuration path. This does not perform TOFU if the certificate is not
173// present.
174func GetClusterCA(path string) (cert *x509.Certificate, err error) {
175 caCertPEM, err := os.ReadFile(filepath.Join(path, CACertificateFileName))
176 if os.IsNotExist(err) {
Tim Windelschmidt513df182024-04-18 23:44:50 +0200177 return nil, ErrNoCACertificate
Serge Bazanski7eeef0f2024-02-05 14:40:15 +0100178 } else if err != nil {
179 return nil, fmt.Errorf("failed to load CA certificate: %w", err)
180 }
181 block, _ := pem.Decode(caCertPEM)
182 if block == nil {
183 return nil, errors.New("ca.pem contains invalid PEM armoring")
184 }
185 if block.Type != "CERTIFICATE" {
186 return nil, fmt.Errorf("ca.pem contains a PEM block that's not a CERTIFICATE")
187 }
188 cert, err = x509.ParseCertificate(block.Bytes)
189 if err != nil {
190 return nil, fmt.Errorf("ca.pem contains an invalid X.509 certificate: %w", err)
191 }
192 return
193}
194
Serge Bazanski1f8cad72023-03-20 16:58:10 +0100195// InstallKubeletConfig modifies the default kubelet kubeconfig of the host
196// system to be able to connect via a metroctl (and an associated ConnectOptions)
197// to a Kubernetes apiserver at IP address/hostname 'server'.
198//
199// The kubelet's kubeconfig changes will be limited to contexts/configs/... named
200// configName. The configName context will be made the default context only if
201// there is no other default context in the current subconfig.
202//
203// Kubeconfigs can only take a single Kubernetes server address, so this function
204// similarly only allows you to specify only a single server address.
Serge Bazanski568c38c2024-02-05 14:40:39 +0100205func InstallKubeletConfig(ctx context.Context, metroctlPath string, opts *ConnectOptions, configName, server string) error {
206 po := clientcmd.NewDefaultPathOptions()
207 config, err := po.GetStartingConfig()
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100208 if err != nil {
209 return fmt.Errorf("getting initial config failed: %w", err)
210 }
211
Serge Bazanski1f8cad72023-03-20 16:58:10 +0100212 args := []string{
213 "k8scredplugin",
214 }
215 args = append(args, opts.ToFlags()...)
216
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100217 config.AuthInfos[configName] = &clientapi.AuthInfo{
218 Exec: &clientapi.ExecConfig{
219 APIVersion: clientauthentication.SchemeGroupVersion.String(),
220 Command: metroctlPath,
Serge Bazanski1f8cad72023-03-20 16:58:10 +0100221 Args: args,
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100222 InstallHint: `Authenticating to Metropolis clusters requires metroctl to be present.
223Running metroctl takeownership creates this entry and either points to metroctl as a command in
224PATH if metroctl is in PATH at that time or to the absolute path to metroctl at that time.
225If you moved metroctl afterwards or want to switch to PATH resolution, edit $HOME/.kube/config and
226change users.metropolis.exec.command to the required path (or just metroctl if using PATH resolution).`,
227 InteractiveMode: clientapi.NeverExecInteractiveMode,
228 },
229 }
230
Serge Bazanski1f8cad72023-03-20 16:58:10 +0100231 var u url.URL
232 u.Scheme = "https"
233 u.Host = net.JoinHostPort(server, node.KubernetesAPIWrappedPort.PortString())
Serge Bazanski7eeef0f2024-02-05 14:40:15 +0100234
Serge Bazanski568c38c2024-02-05 14:40:39 +0100235 // HACK: the Metropolis node certificates only contain the node ID as a SAN. This
236 // means that we can't use some 'global' identifier as the TLSServerName below
237 // that would be the same across all cluster nodes. Unfortunately the Kubeconfig
238 // system only allows for specifying a concrete name, not a regexp or some more
239 // complex validation mechanism for certs.
240 //
241 // The correct fix for this is to issue a new set of certs for the nodes to use,
242 // but that would require implementing a migration mechanism which we don't want
243 // to do as that entire system is getting replaced with SPIFFE based certificates
244 // very soon.
245 //
246 // To get around this, we thus pin the TLSServerName. This works because current
247 // production deployments only use a single node as the Kubernetes endpoint. To
248 // actually get the cert we connect here to the given server and retrieve its
249 // node ID.
250 //
251 // TODO(lorenz): replace as part of SPIFFE authn work
252
253 ca, err := GetClusterCAWithTOFU(ctx, opts)
254 if err != nil {
255 return fmt.Errorf("failed to retrieve CA certificate: %w", err)
256 }
257
258 pinnedNameC := make(chan string, 1)
259 connLower, err := opts.Dial("tcp", u.Host)
260 if err != nil {
261 return fmt.Errorf("failed to dial to retrieve server cert: %w", err)
262 }
263 conn := tls.Client(connLower, &tls.Config{
264 InsecureSkipVerify: true,
265 VerifyPeerCertificate: func(rawCerts [][]byte, verifiedChains [][]*x509.Certificate) error {
266 if ncerts := len(rawCerts); ncerts != 1 {
267 return fmt.Errorf("expected 1 server cert, got %d", ncerts)
268 }
269 cert, err := x509.ParseCertificate(rawCerts[0])
270 if err != nil {
271 return fmt.Errorf("parsing server certificate failed: %w", err)
272 }
273 if err := cert.CheckSignatureFrom(ca); err != nil {
274 return fmt.Errorf("server certificate verification failed: %w", err)
275 }
276 if nnames := len(cert.DNSNames); nnames != 1 {
277 return fmt.Errorf("expected 1 DNS SAN, got %q", cert.DNSNames)
278 }
279 pinnedNameC <- cert.DNSNames[0]
280 return nil
281 },
282 })
283 if err := conn.Handshake(); err != nil {
284 return fmt.Errorf("failed to connect to retrieve server cert: %w", err)
285 }
286 var pinnedName string
287 select {
288 case pinnedName = <-pinnedNameC:
289 case <-ctx.Done():
290 return ctx.Err()
291 }
292
293 log.Printf("Pinning Kubernetes server certificate to %q", pinnedName)
294
295 // Actually configure Kubernetes now.
296
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100297 config.Clusters[configName] = &clientapi.Cluster{
Serge Bazanski568c38c2024-02-05 14:40:39 +0100298 CertificateAuthorityData: pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: ca.Raw}),
299 TLSServerName: pinnedName,
300 Server: u.String(),
301 ProxyURL: opts.ProxyURL(),
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100302 }
303
304 config.Contexts[configName] = &clientapi.Context{
305 AuthInfo: configName,
306 Cluster: configName,
307 Namespace: "default",
308 }
309
310 // Only set us as the current context if no other exists. Changing that
311 // unprompted would be kind of rude.
312 if config.CurrentContext == "" {
313 config.CurrentContext = configName
314 }
315
Serge Bazanski568c38c2024-02-05 14:40:39 +0100316 if err := clientcmd.ModifyConfig(po, *config, true); err != nil {
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100317 return fmt.Errorf("modifying config failed: %w", err)
318 }
319 return nil
320}
Serge Bazanski1f8cad72023-03-20 16:58:10 +0100321
322// ConnectOptions define how to reach a Metropolis cluster from metroctl.
323//
324// This structure can be built directly. All unset fields mean 'default'. It can
325// then be used to generate the equivalent flags to passs to metroctl.
326//
327// Nil pointers to ConnectOptions are equivalent to an empty ConneectOptions when
328// methods on it are called.
329type ConnectOptions struct {
330 // ConfigPath is the path at which the metroctl configuration/credentials live.
331 // If not set, the default will be used.
332 ConfigPath string
333 // ProxyServer is a host:port pair that indicates the metropolis cluster should
334 // be reached via the given SOCKS5 proxy. If not set, the cluster can be reached
335 // directly from the host networking stack.
336 ProxyServer string
337 // Endpoints are the IP addresses/hostnames (without port part) of the Metropolis
338 // instances that metroctl should use to establish connectivity to a cluster.
339 // These instances should have the ControlPlane role set.
340 Endpoints []string
Serge Bazanski925ec3d2024-02-05 14:38:20 +0100341 // ResolverLogger can be set to enable verbose logging of the Metropolis RPC
342 // resolver layer.
Serge Bazanskica8d9512024-09-12 14:20:57 +0200343 ResolverLogger logging.Leveled
Serge Bazanski7eeef0f2024-02-05 14:40:15 +0100344 // TOFU overrides the trust-on-first-use behaviour for CA certificates for the
345 // connection. If not set, TerminalTOFU is used which will interactively ask the
346 // user to accept a CA certificate using os.Stdin/Stdout.
347 TOFU CertificateTOFU
Serge Bazanski1f8cad72023-03-20 16:58:10 +0100348}
349
350// ToFlags returns the metroctl flags corresponding to the options described by
351// this ConnectionOptions struct.
352func (c *ConnectOptions) ToFlags() []string {
353 var res []string
354
355 if c == nil {
356 return res
357 }
358
359 if c.ConfigPath != "" {
360 res = append(res, "--config", c.ConfigPath)
361 }
362 if c.ProxyServer != "" {
363 res = append(res, "--proxy", c.ProxyServer)
364 }
365 for _, ep := range c.Endpoints {
366 res = append(res, "--endpoints", ep)
367 }
368
369 return res
370}
371
372// ProxyURL returns a kubeconfig-compatible URL of the proxy server configured by
373// ConnectOptions, or an empty string if not set.
374func (c *ConnectOptions) ProxyURL() string {
375 if c == nil {
376 return ""
377 }
378 if c.ProxyServer == "" {
379 return ""
380 }
381 var u url.URL
382 u.Scheme = "socks5"
383 u.Host = c.ProxyServer
384 return u.String()
385}
Serge Bazanski568c38c2024-02-05 14:40:39 +0100386
387func (c *ConnectOptions) Dial(network, addr string) (net.Conn, error) {
388 if c.ProxyServer != "" {
389 socksDialer, err := proxy.SOCKS5("tcp", c.ProxyServer, nil, proxy.Direct)
390 if err != nil {
391 return nil, fmt.Errorf("failed to build a SOCKS dialer: %w", err)
392 }
393 return socksDialer.Dial(network, addr)
394 } else {
395 return net.Dial(network, addr)
396 }
397}