blob: 639bfc38ac10c3595e44abf505bc13c646655595 [file] [log] [blame]
Serge Bazanskicf23ebc2023-03-14 17:02:04 +01001package core
2
3import (
Serge Bazanski568c38c2024-02-05 14:40:39 +01004 "context"
Serge Bazanskicf23ebc2023-03-14 17:02:04 +01005 "crypto/ed25519"
6 "crypto/rand"
Serge Bazanski7eeef0f2024-02-05 14:40:15 +01007 "crypto/tls"
Serge Bazanskicf23ebc2023-03-14 17:02:04 +01008 "crypto/x509"
9 "encoding/pem"
10 "errors"
11 "fmt"
Serge Bazanski568c38c2024-02-05 14:40:39 +010012 "log"
Serge Bazanski1f8cad72023-03-20 16:58:10 +010013 "net"
14 "net/url"
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010015 "os"
16 "path/filepath"
17
Serge Bazanski568c38c2024-02-05 14:40:39 +010018 "golang.org/x/net/proxy"
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010019 clientauthentication "k8s.io/client-go/pkg/apis/clientauthentication/v1"
20 "k8s.io/client-go/tools/clientcmd"
21 clientapi "k8s.io/client-go/tools/clientcmd/api"
Serge Bazanski1f8cad72023-03-20 16:58:10 +010022
23 "source.monogon.dev/metropolis/node"
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010024)
25
26const (
27 // OwnerKeyFileName is the filename of the owner key in a metroctl config
28 // directory.
29 OwnerKeyFileName = "owner-key.pem"
30 // OwnerCertificateFileName is the filename of the owner certificate in a
31 // metroctl config directory.
32 OwnerCertificateFileName = "owner.pem"
Serge Bazanski7eeef0f2024-02-05 14:40:15 +010033 // CACertificateFileName is the filename of the cluster CA certificate in a
34 // metroctl config directory.
35 CACertificateFileName = "ca.pem"
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010036)
37
38// NoCredentialsError indicates that the requested datum (eg. owner key or owner
39// certificate) is not present in the requested directory.
40var NoCredentialsError = errors.New("owner certificate or key does not exist")
41
Serge Bazanski7eeef0f2024-02-05 14:40:15 +010042var NoCACertificateError = errors.New("no cluster CA certificate while secure connection was requested")
43
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010044// A PEM block type for a Metropolis initial owner private key
45const ownerKeyType = "METROPOLIS INITIAL OWNER PRIVATE KEY"
46
47// GetOrMakeOwnerKey returns the owner key for a given metroctl configuration
48// directory path, generating and saving it first if it doesn't exist.
49func GetOrMakeOwnerKey(path string) (ed25519.PrivateKey, error) {
50 existing, err := GetOwnerKey(path)
51 switch err {
52 case nil:
53 return existing, nil
54 case NoCredentialsError:
55 default:
56 return nil, err
57 }
58
59 _, priv, err := ed25519.GenerateKey(rand.Reader)
60 if err != nil {
61 return nil, fmt.Errorf("when generating key: %w", err)
62 }
63 if err := WriteOwnerKey(path, priv); err != nil {
64 return nil, err
65 }
66 return priv, nil
67}
68
69// WriteOwnerKey saves a given raw ED25519 private key as the owner key at a
70// given metroctl configuration directory path.
71func WriteOwnerKey(path string, priv ed25519.PrivateKey) error {
72 pemPriv := pem.EncodeToMemory(&pem.Block{Type: ownerKeyType, Bytes: priv})
73 if err := os.WriteFile(filepath.Join(path, OwnerKeyFileName), pemPriv, 0600); err != nil {
74 return fmt.Errorf("when saving key: %w", err)
75 }
76 return nil
77}
78
Serge Bazanski7eeef0f2024-02-05 14:40:15 +010079// WriteCACertificate writes the given der-encoded X509 certificate to the given
80// metorctl configuration directory path.
81func WriteCACertificate(path string, der []byte) error {
82 pemCert := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: der})
83 if err := os.WriteFile(filepath.Join(path, CACertificateFileName), pemCert, 0600); err != nil {
84 return fmt.Errorf("when saving CA certificate: %w", err)
85 }
86 return nil
87}
88
Serge Bazanskicf23ebc2023-03-14 17:02:04 +010089// GetOwnerKey loads and returns a raw ED25519 private key from the saved owner
90// key in a given metroctl configuration directory path. If the owner key doesn't
91// exist, NoCredentialsError will be returned.
92func GetOwnerKey(path string) (ed25519.PrivateKey, error) {
93 ownerPrivateKeyPEM, err := os.ReadFile(filepath.Join(path, OwnerKeyFileName))
94 if os.IsNotExist(err) {
95 return nil, NoCredentialsError
96 } else if err != nil {
97 return nil, fmt.Errorf("failed to load owner private key: %w", err)
98 }
99 block, _ := pem.Decode(ownerPrivateKeyPEM)
100 if block == nil {
101 return nil, errors.New("owner-key.pem contains invalid PEM armoring")
102 }
103 if block.Type != ownerKeyType {
104 return nil, fmt.Errorf("owner-key.pem contains a PEM block that's not a %v", ownerKeyType)
105 }
106 if len(block.Bytes) != ed25519.PrivateKeySize {
107 return nil, errors.New("owner-key.pem contains a non-Ed25519 key")
108 }
109 return block.Bytes, nil
110}
111
112// WriteOwnerCertificate saves a given DER-encoded X509 certificate as the owner
113// key for a given metroctl configuration directory path.
114func WriteOwnerCertificate(path string, cert []byte) error {
115 ownerCertPEM := pem.Block{
116 Type: "CERTIFICATE",
117 Bytes: cert,
118 }
119 if err := os.WriteFile(filepath.Join(path, OwnerCertificateFileName), pem.EncodeToMemory(&ownerCertPEM), 0644); err != nil {
120 return err
121 }
122 return nil
123}
124
125// GetOwnerCredentials loads and returns a raw ED25519 private key alongside a
126// DER-encoded X509 certificate from the saved owner key and certificate in a
127// given metroctl configuration directory path. If either the key or certificate
128// doesn't exist, NoCredentialsError will be returned.
129func GetOwnerCredentials(path string) (cert *x509.Certificate, key ed25519.PrivateKey, err error) {
130 key, err = GetOwnerKey(path)
131 if err != nil {
132 return nil, nil, err
133 }
134
135 ownerCertPEM, err := os.ReadFile(filepath.Join(path, OwnerCertificateFileName))
136 if os.IsNotExist(err) {
137 return nil, nil, NoCredentialsError
138 } else if err != nil {
139 return nil, nil, fmt.Errorf("failed to load owner certificate: %w", err)
140 }
141 block, _ := pem.Decode(ownerCertPEM)
142 if block == nil {
143 return nil, nil, errors.New("owner.pem contains invalid PEM armoring")
144 }
145 if block.Type != "CERTIFICATE" {
146 return nil, nil, fmt.Errorf("owner.pem contains a PEM block that's not a CERTIFICATE")
147 }
148 cert, err = x509.ParseCertificate(block.Bytes)
149 if err != nil {
150 return nil, nil, fmt.Errorf("owner.pem contains an invalid X.509 certificate: %w", err)
151 }
152 return
153}
154
Serge Bazanski7eeef0f2024-02-05 14:40:15 +0100155// GetOwnerTLSCredentials returns a client TLS Certificate for authenticating to
156// the metropolis cluster, based on metroctl configuration at a given path.
157func GetOwnerTLSCredentials(path string) (*tls.Certificate, error) {
158 ocert, opkey, err := GetOwnerCredentials(path)
159 if err != nil {
160 return nil, err
161 }
162 return &tls.Certificate{
163 Certificate: [][]byte{ocert.Raw},
164 PrivateKey: opkey,
165 }, nil
166}
167
168// GetClusterCA returns the saved cluster CA certificate at the given metoctl
169// configuration path. This does not perform TOFU if the certificate is not
170// present.
171func GetClusterCA(path string) (cert *x509.Certificate, err error) {
172 caCertPEM, err := os.ReadFile(filepath.Join(path, CACertificateFileName))
173 if os.IsNotExist(err) {
174 return nil, NoCACertificateError
175 } else if err != nil {
176 return nil, fmt.Errorf("failed to load CA certificate: %w", err)
177 }
178 block, _ := pem.Decode(caCertPEM)
179 if block == nil {
180 return nil, errors.New("ca.pem contains invalid PEM armoring")
181 }
182 if block.Type != "CERTIFICATE" {
183 return nil, fmt.Errorf("ca.pem contains a PEM block that's not a CERTIFICATE")
184 }
185 cert, err = x509.ParseCertificate(block.Bytes)
186 if err != nil {
187 return nil, fmt.Errorf("ca.pem contains an invalid X.509 certificate: %w", err)
188 }
189 return
190}
191
Serge Bazanski1f8cad72023-03-20 16:58:10 +0100192// InstallKubeletConfig modifies the default kubelet kubeconfig of the host
193// system to be able to connect via a metroctl (and an associated ConnectOptions)
194// to a Kubernetes apiserver at IP address/hostname 'server'.
195//
196// The kubelet's kubeconfig changes will be limited to contexts/configs/... named
197// configName. The configName context will be made the default context only if
198// there is no other default context in the current subconfig.
199//
200// Kubeconfigs can only take a single Kubernetes server address, so this function
201// similarly only allows you to specify only a single server address.
Serge Bazanski568c38c2024-02-05 14:40:39 +0100202func InstallKubeletConfig(ctx context.Context, metroctlPath string, opts *ConnectOptions, configName, server string) error {
203 po := clientcmd.NewDefaultPathOptions()
204 config, err := po.GetStartingConfig()
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100205 if err != nil {
206 return fmt.Errorf("getting initial config failed: %w", err)
207 }
208
Serge Bazanski1f8cad72023-03-20 16:58:10 +0100209 args := []string{
210 "k8scredplugin",
211 }
212 args = append(args, opts.ToFlags()...)
213
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100214 config.AuthInfos[configName] = &clientapi.AuthInfo{
215 Exec: &clientapi.ExecConfig{
216 APIVersion: clientauthentication.SchemeGroupVersion.String(),
217 Command: metroctlPath,
Serge Bazanski1f8cad72023-03-20 16:58:10 +0100218 Args: args,
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100219 InstallHint: `Authenticating to Metropolis clusters requires metroctl to be present.
220Running metroctl takeownership creates this entry and either points to metroctl as a command in
221PATH if metroctl is in PATH at that time or to the absolute path to metroctl at that time.
222If you moved metroctl afterwards or want to switch to PATH resolution, edit $HOME/.kube/config and
223change users.metropolis.exec.command to the required path (or just metroctl if using PATH resolution).`,
224 InteractiveMode: clientapi.NeverExecInteractiveMode,
225 },
226 }
227
Serge Bazanski1f8cad72023-03-20 16:58:10 +0100228 var u url.URL
229 u.Scheme = "https"
230 u.Host = net.JoinHostPort(server, node.KubernetesAPIWrappedPort.PortString())
Serge Bazanski7eeef0f2024-02-05 14:40:15 +0100231
Serge Bazanski568c38c2024-02-05 14:40:39 +0100232 // HACK: the Metropolis node certificates only contain the node ID as a SAN. This
233 // means that we can't use some 'global' identifier as the TLSServerName below
234 // that would be the same across all cluster nodes. Unfortunately the Kubeconfig
235 // system only allows for specifying a concrete name, not a regexp or some more
236 // complex validation mechanism for certs.
237 //
238 // The correct fix for this is to issue a new set of certs for the nodes to use,
239 // but that would require implementing a migration mechanism which we don't want
240 // to do as that entire system is getting replaced with SPIFFE based certificates
241 // very soon.
242 //
243 // To get around this, we thus pin the TLSServerName. This works because current
244 // production deployments only use a single node as the Kubernetes endpoint. To
245 // actually get the cert we connect here to the given server and retrieve its
246 // node ID.
247 //
248 // TODO(lorenz): replace as part of SPIFFE authn work
249
250 ca, err := GetClusterCAWithTOFU(ctx, opts)
251 if err != nil {
252 return fmt.Errorf("failed to retrieve CA certificate: %w", err)
253 }
254
255 pinnedNameC := make(chan string, 1)
256 connLower, err := opts.Dial("tcp", u.Host)
257 if err != nil {
258 return fmt.Errorf("failed to dial to retrieve server cert: %w", err)
259 }
260 conn := tls.Client(connLower, &tls.Config{
261 InsecureSkipVerify: true,
262 VerifyPeerCertificate: func(rawCerts [][]byte, verifiedChains [][]*x509.Certificate) error {
263 if ncerts := len(rawCerts); ncerts != 1 {
264 return fmt.Errorf("expected 1 server cert, got %d", ncerts)
265 }
266 cert, err := x509.ParseCertificate(rawCerts[0])
267 if err != nil {
268 return fmt.Errorf("parsing server certificate failed: %w", err)
269 }
270 if err := cert.CheckSignatureFrom(ca); err != nil {
271 return fmt.Errorf("server certificate verification failed: %w", err)
272 }
273 if nnames := len(cert.DNSNames); nnames != 1 {
274 return fmt.Errorf("expected 1 DNS SAN, got %q", cert.DNSNames)
275 }
276 pinnedNameC <- cert.DNSNames[0]
277 return nil
278 },
279 })
280 if err := conn.Handshake(); err != nil {
281 return fmt.Errorf("failed to connect to retrieve server cert: %w", err)
282 }
283 var pinnedName string
284 select {
285 case pinnedName = <-pinnedNameC:
286 case <-ctx.Done():
287 return ctx.Err()
288 }
289
290 log.Printf("Pinning Kubernetes server certificate to %q", pinnedName)
291
292 // Actually configure Kubernetes now.
293
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100294 config.Clusters[configName] = &clientapi.Cluster{
Serge Bazanski568c38c2024-02-05 14:40:39 +0100295 CertificateAuthorityData: pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: ca.Raw}),
296 TLSServerName: pinnedName,
297 Server: u.String(),
298 ProxyURL: opts.ProxyURL(),
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100299 }
300
301 config.Contexts[configName] = &clientapi.Context{
302 AuthInfo: configName,
303 Cluster: configName,
304 Namespace: "default",
305 }
306
307 // Only set us as the current context if no other exists. Changing that
308 // unprompted would be kind of rude.
309 if config.CurrentContext == "" {
310 config.CurrentContext = configName
311 }
312
Serge Bazanski568c38c2024-02-05 14:40:39 +0100313 if err := clientcmd.ModifyConfig(po, *config, true); err != nil {
Serge Bazanskicf23ebc2023-03-14 17:02:04 +0100314 return fmt.Errorf("modifying config failed: %w", err)
315 }
316 return nil
317}
Serge Bazanski1f8cad72023-03-20 16:58:10 +0100318
319// ConnectOptions define how to reach a Metropolis cluster from metroctl.
320//
321// This structure can be built directly. All unset fields mean 'default'. It can
322// then be used to generate the equivalent flags to passs to metroctl.
323//
324// Nil pointers to ConnectOptions are equivalent to an empty ConneectOptions when
325// methods on it are called.
326type ConnectOptions struct {
327 // ConfigPath is the path at which the metroctl configuration/credentials live.
328 // If not set, the default will be used.
329 ConfigPath string
330 // ProxyServer is a host:port pair that indicates the metropolis cluster should
331 // be reached via the given SOCKS5 proxy. If not set, the cluster can be reached
332 // directly from the host networking stack.
333 ProxyServer string
334 // Endpoints are the IP addresses/hostnames (without port part) of the Metropolis
335 // instances that metroctl should use to establish connectivity to a cluster.
336 // These instances should have the ControlPlane role set.
337 Endpoints []string
Serge Bazanski925ec3d2024-02-05 14:38:20 +0100338 // ResolverLogger can be set to enable verbose logging of the Metropolis RPC
339 // resolver layer.
340 ResolverLogger ResolverLogger
Serge Bazanski7eeef0f2024-02-05 14:40:15 +0100341 // TOFU overrides the trust-on-first-use behaviour for CA certificates for the
342 // connection. If not set, TerminalTOFU is used which will interactively ask the
343 // user to accept a CA certificate using os.Stdin/Stdout.
344 TOFU CertificateTOFU
Serge Bazanski1f8cad72023-03-20 16:58:10 +0100345}
346
347// ToFlags returns the metroctl flags corresponding to the options described by
348// this ConnectionOptions struct.
349func (c *ConnectOptions) ToFlags() []string {
350 var res []string
351
352 if c == nil {
353 return res
354 }
355
356 if c.ConfigPath != "" {
357 res = append(res, "--config", c.ConfigPath)
358 }
359 if c.ProxyServer != "" {
360 res = append(res, "--proxy", c.ProxyServer)
361 }
362 for _, ep := range c.Endpoints {
363 res = append(res, "--endpoints", ep)
364 }
365
366 return res
367}
368
369// ProxyURL returns a kubeconfig-compatible URL of the proxy server configured by
370// ConnectOptions, or an empty string if not set.
371func (c *ConnectOptions) ProxyURL() string {
372 if c == nil {
373 return ""
374 }
375 if c.ProxyServer == "" {
376 return ""
377 }
378 var u url.URL
379 u.Scheme = "socks5"
380 u.Host = c.ProxyServer
381 return u.String()
382}
Serge Bazanski568c38c2024-02-05 14:40:39 +0100383
384func (c *ConnectOptions) Dial(network, addr string) (net.Conn, error) {
385 if c.ProxyServer != "" {
386 socksDialer, err := proxy.SOCKS5("tcp", c.ProxyServer, nil, proxy.Direct)
387 if err != nil {
388 return nil, fmt.Errorf("failed to build a SOCKS dialer: %w", err)
389 }
390 return socksDialer.Dial(network, addr)
391 } else {
392 return net.Dial(network, addr)
393 }
394}