blob: d1198835f8761bb3219fb80afbb15e25b5a0d8ee [file] [log] [blame]
Serge Bazanskicf23ebc2023-03-14 17:02:04 +01001package core
2
3import (
Serge Bazanski568c38c2024-02-05 14:40:39 +01004 "context"
Serge Bazanskicf23ebc2023-03-14 17:02:04 +01005 "crypto/ed25519"
6 "crypto/rand"
Serge Bazanski7eeef0f2024-02-05 14:40:15 +01007 "crypto/tls"
Serge Bazanskicf23ebc2023-03-14 17:02:04 +01008 "crypto/x509"
9 "encoding/pem"
10 "errors"
11 "fmt"
Serge Bazanski568c38c2024-02-05 14:40:39 +010012 "log"
Serge Bazanski1f8cad72023-03-20 16:58:10 +010013 "net"
14 "net/url"
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010015 "os"
16 "path/filepath"
17
Serge Bazanski568c38c2024-02-05 14:40:39 +010018 "golang.org/x/net/proxy"
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010019 clientauthentication "k8s.io/client-go/pkg/apis/clientauthentication/v1"
20 "k8s.io/client-go/tools/clientcmd"
21 clientapi "k8s.io/client-go/tools/clientcmd/api"
Serge Bazanski1f8cad72023-03-20 16:58:10 +010022
23 "source.monogon.dev/metropolis/node"
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010024)
25
26const (
27 // OwnerKeyFileName is the filename of the owner key in a metroctl config
28 // directory.
29 OwnerKeyFileName = "owner-key.pem"
30 // OwnerCertificateFileName is the filename of the owner certificate in a
31 // metroctl config directory.
32 OwnerCertificateFileName = "owner.pem"
Serge Bazanski7eeef0f2024-02-05 14:40:15 +010033 // CACertificateFileName is the filename of the cluster CA certificate in a
34 // metroctl config directory.
35 CACertificateFileName = "ca.pem"
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010036)
37
Tim Windelschmidt513df182024-04-18 23:44:50 +020038var (
39 // ErrNoCredentials indicates that the requested datum (eg. owner key or owner
40 // certificate) is not present in the requested directory.
41 ErrNoCredentials = errors.New("owner certificate or key does not exist")
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010042
Tim Windelschmidt513df182024-04-18 23:44:50 +020043 ErrNoCACertificate = errors.New("no cluster CA certificate while secure connection was requested")
44)
Serge Bazanski7eeef0f2024-02-05 14:40:15 +010045
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010046// A PEM block type for a Metropolis initial owner private key
47const ownerKeyType = "METROPOLIS INITIAL OWNER PRIVATE KEY"
48
49// GetOrMakeOwnerKey returns the owner key for a given metroctl configuration
50// directory path, generating and saving it first if it doesn't exist.
51func GetOrMakeOwnerKey(path string) (ed25519.PrivateKey, error) {
52 existing, err := GetOwnerKey(path)
Tim Windelschmidtd5f851b2024-04-23 14:59:37 +020053 switch {
54 case err == nil:
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010055 return existing, nil
Tim Windelschmidt513df182024-04-18 23:44:50 +020056 case errors.Is(err, ErrNoCredentials):
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010057 default:
58 return nil, err
59 }
60
61 _, priv, err := ed25519.GenerateKey(rand.Reader)
62 if err != nil {
63 return nil, fmt.Errorf("when generating key: %w", err)
64 }
65 if err := WriteOwnerKey(path, priv); err != nil {
66 return nil, err
67 }
68 return priv, nil
69}
70
71// WriteOwnerKey saves a given raw ED25519 private key as the owner key at a
72// given metroctl configuration directory path.
73func WriteOwnerKey(path string, priv ed25519.PrivateKey) error {
74 pemPriv := pem.EncodeToMemory(&pem.Block{Type: ownerKeyType, Bytes: priv})
75 if err := os.WriteFile(filepath.Join(path, OwnerKeyFileName), pemPriv, 0600); err != nil {
76 return fmt.Errorf("when saving key: %w", err)
77 }
78 return nil
79}
80
Serge Bazanski7eeef0f2024-02-05 14:40:15 +010081// WriteCACertificate writes the given der-encoded X509 certificate to the given
82// metorctl configuration directory path.
83func WriteCACertificate(path string, der []byte) error {
84 pemCert := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: der})
85 if err := os.WriteFile(filepath.Join(path, CACertificateFileName), pemCert, 0600); err != nil {
86 return fmt.Errorf("when saving CA certificate: %w", err)
87 }
88 return nil
89}
90
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010091// GetOwnerKey loads and returns a raw ED25519 private key from the saved owner
92// key in a given metroctl configuration directory path. If the owner key doesn't
Tim Windelschmidt513df182024-04-18 23:44:50 +020093// exist, ErrNoCredentials will be returned.
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010094func GetOwnerKey(path string) (ed25519.PrivateKey, error) {
95 ownerPrivateKeyPEM, err := os.ReadFile(filepath.Join(path, OwnerKeyFileName))
96 if os.IsNotExist(err) {
Tim Windelschmidt513df182024-04-18 23:44:50 +020097 return nil, ErrNoCredentials
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010098 } else if err != nil {
99 return nil, fmt.Errorf("failed to load owner private key: %w", err)
100 }
101 block, _ := pem.Decode(ownerPrivateKeyPEM)
102 if block == nil {
103 return nil, errors.New("owner-key.pem contains invalid PEM armoring")
104 }
105 if block.Type != ownerKeyType {
106 return nil, fmt.Errorf("owner-key.pem contains a PEM block that's not a %v", ownerKeyType)
107 }
108 if len(block.Bytes) != ed25519.PrivateKeySize {
109 return nil, errors.New("owner-key.pem contains a non-Ed25519 key")
110 }
111 return block.Bytes, nil
112}
113
114// WriteOwnerCertificate saves a given DER-encoded X509 certificate as the owner
115// key for a given metroctl configuration directory path.
116func WriteOwnerCertificate(path string, cert []byte) error {
117 ownerCertPEM := pem.Block{
118 Type: "CERTIFICATE",
119 Bytes: cert,
120 }
121 if err := os.WriteFile(filepath.Join(path, OwnerCertificateFileName), pem.EncodeToMemory(&ownerCertPEM), 0644); err != nil {
122 return err
123 }
124 return nil
125}
126
127// GetOwnerCredentials loads and returns a raw ED25519 private key alongside a
128// DER-encoded X509 certificate from the saved owner key and certificate in a
129// given metroctl configuration directory path. If either the key or certificate
Tim Windelschmidt513df182024-04-18 23:44:50 +0200130// doesn't exist, ErrNoCredentials will be returned.
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100131func GetOwnerCredentials(path string) (cert *x509.Certificate, key ed25519.PrivateKey, err error) {
132 key, err = GetOwnerKey(path)
133 if err != nil {
134 return nil, nil, err
135 }
136
137 ownerCertPEM, err := os.ReadFile(filepath.Join(path, OwnerCertificateFileName))
138 if os.IsNotExist(err) {
Tim Windelschmidt513df182024-04-18 23:44:50 +0200139 return nil, nil, ErrNoCredentials
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100140 } else if err != nil {
141 return nil, nil, fmt.Errorf("failed to load owner certificate: %w", err)
142 }
143 block, _ := pem.Decode(ownerCertPEM)
144 if block == nil {
145 return nil, nil, errors.New("owner.pem contains invalid PEM armoring")
146 }
147 if block.Type != "CERTIFICATE" {
148 return nil, nil, fmt.Errorf("owner.pem contains a PEM block that's not a CERTIFICATE")
149 }
150 cert, err = x509.ParseCertificate(block.Bytes)
151 if err != nil {
152 return nil, nil, fmt.Errorf("owner.pem contains an invalid X.509 certificate: %w", err)
153 }
154 return
155}
156
Serge Bazanski7eeef0f2024-02-05 14:40:15 +0100157// GetOwnerTLSCredentials returns a client TLS Certificate for authenticating to
158// the metropolis cluster, based on metroctl configuration at a given path.
159func GetOwnerTLSCredentials(path string) (*tls.Certificate, error) {
160 ocert, opkey, err := GetOwnerCredentials(path)
161 if err != nil {
162 return nil, err
163 }
164 return &tls.Certificate{
165 Certificate: [][]byte{ocert.Raw},
166 PrivateKey: opkey,
167 }, nil
168}
169
170// GetClusterCA returns the saved cluster CA certificate at the given metoctl
171// configuration path. This does not perform TOFU if the certificate is not
172// present.
173func GetClusterCA(path string) (cert *x509.Certificate, err error) {
174 caCertPEM, err := os.ReadFile(filepath.Join(path, CACertificateFileName))
175 if os.IsNotExist(err) {
Tim Windelschmidt513df182024-04-18 23:44:50 +0200176 return nil, ErrNoCACertificate
Serge Bazanski7eeef0f2024-02-05 14:40:15 +0100177 } else if err != nil {
178 return nil, fmt.Errorf("failed to load CA certificate: %w", err)
179 }
180 block, _ := pem.Decode(caCertPEM)
181 if block == nil {
182 return nil, errors.New("ca.pem contains invalid PEM armoring")
183 }
184 if block.Type != "CERTIFICATE" {
185 return nil, fmt.Errorf("ca.pem contains a PEM block that's not a CERTIFICATE")
186 }
187 cert, err = x509.ParseCertificate(block.Bytes)
188 if err != nil {
189 return nil, fmt.Errorf("ca.pem contains an invalid X.509 certificate: %w", err)
190 }
191 return
192}
193
Serge Bazanski1f8cad72023-03-20 16:58:10 +0100194// InstallKubeletConfig modifies the default kubelet kubeconfig of the host
195// system to be able to connect via a metroctl (and an associated ConnectOptions)
196// to a Kubernetes apiserver at IP address/hostname 'server'.
197//
198// The kubelet's kubeconfig changes will be limited to contexts/configs/... named
199// configName. The configName context will be made the default context only if
200// there is no other default context in the current subconfig.
201//
202// Kubeconfigs can only take a single Kubernetes server address, so this function
203// similarly only allows you to specify only a single server address.
Serge Bazanski568c38c2024-02-05 14:40:39 +0100204func InstallKubeletConfig(ctx context.Context, metroctlPath string, opts *ConnectOptions, configName, server string) error {
205 po := clientcmd.NewDefaultPathOptions()
206 config, err := po.GetStartingConfig()
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100207 if err != nil {
208 return fmt.Errorf("getting initial config failed: %w", err)
209 }
210
Serge Bazanski1f8cad72023-03-20 16:58:10 +0100211 args := []string{
212 "k8scredplugin",
213 }
214 args = append(args, opts.ToFlags()...)
215
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100216 config.AuthInfos[configName] = &clientapi.AuthInfo{
217 Exec: &clientapi.ExecConfig{
218 APIVersion: clientauthentication.SchemeGroupVersion.String(),
219 Command: metroctlPath,
Serge Bazanski1f8cad72023-03-20 16:58:10 +0100220 Args: args,
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100221 InstallHint: `Authenticating to Metropolis clusters requires metroctl to be present.
222Running metroctl takeownership creates this entry and either points to metroctl as a command in
223PATH if metroctl is in PATH at that time or to the absolute path to metroctl at that time.
224If you moved metroctl afterwards or want to switch to PATH resolution, edit $HOME/.kube/config and
225change users.metropolis.exec.command to the required path (or just metroctl if using PATH resolution).`,
226 InteractiveMode: clientapi.NeverExecInteractiveMode,
227 },
228 }
229
Serge Bazanski1f8cad72023-03-20 16:58:10 +0100230 var u url.URL
231 u.Scheme = "https"
232 u.Host = net.JoinHostPort(server, node.KubernetesAPIWrappedPort.PortString())
Serge Bazanski7eeef0f2024-02-05 14:40:15 +0100233
Serge Bazanski568c38c2024-02-05 14:40:39 +0100234 // HACK: the Metropolis node certificates only contain the node ID as a SAN. This
235 // means that we can't use some 'global' identifier as the TLSServerName below
236 // that would be the same across all cluster nodes. Unfortunately the Kubeconfig
237 // system only allows for specifying a concrete name, not a regexp or some more
238 // complex validation mechanism for certs.
239 //
240 // The correct fix for this is to issue a new set of certs for the nodes to use,
241 // but that would require implementing a migration mechanism which we don't want
242 // to do as that entire system is getting replaced with SPIFFE based certificates
243 // very soon.
244 //
245 // To get around this, we thus pin the TLSServerName. This works because current
246 // production deployments only use a single node as the Kubernetes endpoint. To
247 // actually get the cert we connect here to the given server and retrieve its
248 // node ID.
249 //
250 // TODO(lorenz): replace as part of SPIFFE authn work
251
252 ca, err := GetClusterCAWithTOFU(ctx, opts)
253 if err != nil {
254 return fmt.Errorf("failed to retrieve CA certificate: %w", err)
255 }
256
257 pinnedNameC := make(chan string, 1)
258 connLower, err := opts.Dial("tcp", u.Host)
259 if err != nil {
260 return fmt.Errorf("failed to dial to retrieve server cert: %w", err)
261 }
262 conn := tls.Client(connLower, &tls.Config{
263 InsecureSkipVerify: true,
264 VerifyPeerCertificate: func(rawCerts [][]byte, verifiedChains [][]*x509.Certificate) error {
265 if ncerts := len(rawCerts); ncerts != 1 {
266 return fmt.Errorf("expected 1 server cert, got %d", ncerts)
267 }
268 cert, err := x509.ParseCertificate(rawCerts[0])
269 if err != nil {
270 return fmt.Errorf("parsing server certificate failed: %w", err)
271 }
272 if err := cert.CheckSignatureFrom(ca); err != nil {
273 return fmt.Errorf("server certificate verification failed: %w", err)
274 }
275 if nnames := len(cert.DNSNames); nnames != 1 {
276 return fmt.Errorf("expected 1 DNS SAN, got %q", cert.DNSNames)
277 }
278 pinnedNameC <- cert.DNSNames[0]
279 return nil
280 },
281 })
282 if err := conn.Handshake(); err != nil {
283 return fmt.Errorf("failed to connect to retrieve server cert: %w", err)
284 }
285 var pinnedName string
286 select {
287 case pinnedName = <-pinnedNameC:
288 case <-ctx.Done():
289 return ctx.Err()
290 }
291
292 log.Printf("Pinning Kubernetes server certificate to %q", pinnedName)
293
294 // Actually configure Kubernetes now.
295
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100296 config.Clusters[configName] = &clientapi.Cluster{
Serge Bazanski568c38c2024-02-05 14:40:39 +0100297 CertificateAuthorityData: pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: ca.Raw}),
298 TLSServerName: pinnedName,
299 Server: u.String(),
300 ProxyURL: opts.ProxyURL(),
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100301 }
302
303 config.Contexts[configName] = &clientapi.Context{
304 AuthInfo: configName,
305 Cluster: configName,
306 Namespace: "default",
307 }
308
309 // Only set us as the current context if no other exists. Changing that
310 // unprompted would be kind of rude.
311 if config.CurrentContext == "" {
312 config.CurrentContext = configName
313 }
314
Serge Bazanski568c38c2024-02-05 14:40:39 +0100315 if err := clientcmd.ModifyConfig(po, *config, true); err != nil {
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100316 return fmt.Errorf("modifying config failed: %w", err)
317 }
318 return nil
319}
Serge Bazanski1f8cad72023-03-20 16:58:10 +0100320
321// ConnectOptions define how to reach a Metropolis cluster from metroctl.
322//
323// This structure can be built directly. All unset fields mean 'default'. It can
324// then be used to generate the equivalent flags to passs to metroctl.
325//
326// Nil pointers to ConnectOptions are equivalent to an empty ConneectOptions when
327// methods on it are called.
328type ConnectOptions struct {
329 // ConfigPath is the path at which the metroctl configuration/credentials live.
330 // If not set, the default will be used.
331 ConfigPath string
332 // ProxyServer is a host:port pair that indicates the metropolis cluster should
333 // be reached via the given SOCKS5 proxy. If not set, the cluster can be reached
334 // directly from the host networking stack.
335 ProxyServer string
336 // Endpoints are the IP addresses/hostnames (without port part) of the Metropolis
337 // instances that metroctl should use to establish connectivity to a cluster.
338 // These instances should have the ControlPlane role set.
339 Endpoints []string
Serge Bazanski925ec3d2024-02-05 14:38:20 +0100340 // ResolverLogger can be set to enable verbose logging of the Metropolis RPC
341 // resolver layer.
342 ResolverLogger ResolverLogger
Serge Bazanski7eeef0f2024-02-05 14:40:15 +0100343 // TOFU overrides the trust-on-first-use behaviour for CA certificates for the
344 // connection. If not set, TerminalTOFU is used which will interactively ask the
345 // user to accept a CA certificate using os.Stdin/Stdout.
346 TOFU CertificateTOFU
Serge Bazanski1f8cad72023-03-20 16:58:10 +0100347}
348
349// ToFlags returns the metroctl flags corresponding to the options described by
350// this ConnectionOptions struct.
351func (c *ConnectOptions) ToFlags() []string {
352 var res []string
353
354 if c == nil {
355 return res
356 }
357
358 if c.ConfigPath != "" {
359 res = append(res, "--config", c.ConfigPath)
360 }
361 if c.ProxyServer != "" {
362 res = append(res, "--proxy", c.ProxyServer)
363 }
364 for _, ep := range c.Endpoints {
365 res = append(res, "--endpoints", ep)
366 }
367
368 return res
369}
370
371// ProxyURL returns a kubeconfig-compatible URL of the proxy server configured by
372// ConnectOptions, or an empty string if not set.
373func (c *ConnectOptions) ProxyURL() string {
374 if c == nil {
375 return ""
376 }
377 if c.ProxyServer == "" {
378 return ""
379 }
380 var u url.URL
381 u.Scheme = "socks5"
382 u.Host = c.ProxyServer
383 return u.String()
384}
Serge Bazanski568c38c2024-02-05 14:40:39 +0100385
386func (c *ConnectOptions) Dial(network, addr string) (net.Conn, error) {
387 if c.ProxyServer != "" {
388 socksDialer, err := proxy.SOCKS5("tcp", c.ProxyServer, nil, proxy.Direct)
389 if err != nil {
390 return nil, fmt.Errorf("failed to build a SOCKS dialer: %w", err)
391 }
392 return socksDialer.Dial(network, addr)
393 } else {
394 return net.Dial(network, addr)
395 }
396}