blob: e989507e182d7aa4c6567206d0133c2df84725ed [file] [log] [blame]
Lorenz Brun6e8f69c2019-11-18 10:44:24 +01001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package kubernetes
18
19import (
Lorenz Brun878f5f92020-05-12 16:15:39 +020020 "context"
Serge Bazanskidbfc6382020-06-19 20:35:43 +020021 "fmt"
Lorenz Brun6e8f69c2019-11-18 10:44:24 +010022 "net"
Lorenz Brunb15abad2020-04-16 11:17:12 +020023 "time"
24
Lorenz Brun878f5f92020-05-12 16:15:39 +020025 "google.golang.org/grpc/codes"
26 "google.golang.org/grpc/status"
Lorenz Brunf042e6f2020-06-24 16:46:09 +020027 "k8s.io/client-go/informers"
Lorenz Brunb15abad2020-04-16 11:17:12 +020028 "k8s.io/client-go/kubernetes"
Lorenz Brunf042e6f2020-06-24 16:46:09 +020029 "k8s.io/client-go/tools/clientcmd"
Lorenz Brun878f5f92020-05-12 16:15:39 +020030
Lorenz Brun1de8b182021-12-21 17:15:18 +010031 "source.monogon.dev/metropolis/node/core/identity"
Serge Bazanski31370b02021-01-07 16:31:14 +010032 "source.monogon.dev/metropolis/node/core/localstorage"
Serge Bazanskid8af5bf2021-03-16 13:38:29 +010033 "source.monogon.dev/metropolis/node/core/network"
Serge Bazanski31370b02021-01-07 16:31:14 +010034 "source.monogon.dev/metropolis/node/core/network/dns"
Lorenz Bruncc078df2021-12-23 11:51:55 +010035 "source.monogon.dev/metropolis/node/kubernetes/authproxy"
Serge Bazanski31370b02021-01-07 16:31:14 +010036 "source.monogon.dev/metropolis/node/kubernetes/clusternet"
37 "source.monogon.dev/metropolis/node/kubernetes/nfproxy"
38 "source.monogon.dev/metropolis/node/kubernetes/pki"
Lorenz Brun4e090352021-03-17 17:44:41 +010039 "source.monogon.dev/metropolis/node/kubernetes/plugins/kvmdevice"
Serge Bazanski31370b02021-01-07 16:31:14 +010040 "source.monogon.dev/metropolis/node/kubernetes/reconciler"
41 "source.monogon.dev/metropolis/pkg/supervisor"
42 apb "source.monogon.dev/metropolis/proto/api"
Lorenz Brun6e8f69c2019-11-18 10:44:24 +010043)
44
45type Config struct {
Serge Bazanskid8af5bf2021-03-16 13:38:29 +010046 ServiceIPRange net.IPNet
47 ClusterNet net.IPNet
Lorenz Brun78cefca2022-06-20 12:59:55 +000048 ClusterDomain string
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020049
Serge Bazanskid8af5bf2021-03-16 13:38:29 +010050 KPKI *pki.PKI
51 Root *localstorage.Root
52 Network *network.Service
Lorenz Brun1de8b182021-12-21 17:15:18 +010053 Node *identity.Node
Lorenz Brun6e8f69c2019-11-18 10:44:24 +010054}
55
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020056type Service struct {
Serge Bazanski967be212020-11-02 11:26:59 +010057 c Config
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020058}
Serge Bazanskidbfc6382020-06-19 20:35:43 +020059
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020060func New(c Config) *Service {
61 s := &Service{
62 c: c,
63 }
64 return s
65}
66
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020067func (s *Service) Run(ctx context.Context) error {
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020068 controllerManagerConfig, err := getPKIControllerManagerConfig(ctx, s.c.KPKI)
69 if err != nil {
70 return fmt.Errorf("could not generate controller manager pki config: %w", err)
71 }
72 controllerManagerConfig.clusterNet = s.c.ClusterNet
73 schedulerConfig, err := getPKISchedulerConfig(ctx, s.c.KPKI)
74 if err != nil {
75 return fmt.Errorf("could not generate scheduler pki config: %w", err)
Lorenz Brun6e8f69c2019-11-18 10:44:24 +010076 }
Serge Bazanskidbfc6382020-06-19 20:35:43 +020077
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020078 masterKubeconfig, err := s.c.KPKI.Kubeconfig(ctx, pki.Master)
79 if err != nil {
80 return fmt.Errorf("could not generate master kubeconfig: %w", err)
81 }
Lorenz Brun6e8f69c2019-11-18 10:44:24 +010082
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020083 rawClientConfig, err := clientcmd.NewClientConfigFromBytes(masterKubeconfig)
84 if err != nil {
85 return fmt.Errorf("could not generate kubernetes client config: %w", err)
86 }
Lorenz Brun6e8f69c2019-11-18 10:44:24 +010087
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020088 clientConfig, err := rawClientConfig.ClientConfig()
89 clientSet, err := kubernetes.NewForConfig(clientConfig)
90 if err != nil {
91 return fmt.Errorf("could not generate kubernetes client: %w", err)
92 }
93
94 informerFactory := informers.NewSharedInformerFactory(clientSet, 5*time.Minute)
95
Serge Bazanskid8af5bf2021-03-16 13:38:29 +010096 // Sub-runnable which starts all parts of Kubernetes that depend on the
97 // machine's external IP address. If it changes, the runnable will exit.
98 // TODO(q3k): test this
Serge Bazanski356cbf32023-03-16 17:52:20 +010099 startKubelet := make(chan struct{})
Serge Bazanskid8af5bf2021-03-16 13:38:29 +0100100 supervisor.Run(ctx, "networked", func(ctx context.Context) error {
101 networkWatch := s.c.Network.Watch()
102 defer networkWatch.Close()
Lorenz Brun339582b2020-07-29 18:13:35 +0200103
Serge Bazanskid8af5bf2021-03-16 13:38:29 +0100104 var status *network.Status
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200105
Serge Bazanskid8af5bf2021-03-16 13:38:29 +0100106 supervisor.Logger(ctx).Info("Waiting for node networking...")
107 for status == nil || status.ExternalAddress == nil {
108 status, err = networkWatch.Get(ctx)
109 if err != nil {
110 return fmt.Errorf("failed to get network status: %w", err)
111 }
112 }
113 address := status.ExternalAddress
114 supervisor.Logger(ctx).Info("Node has active networking, starting apiserver/kubelet")
115
116 apiserver := &apiserverService{
117 KPKI: s.c.KPKI,
118 AdvertiseAddress: address,
119 ServiceIPRange: s.c.ServiceIPRange,
120 EphemeralConsensusDirectory: &s.c.Root.Ephemeral.Consensus,
121 }
122
123 kubelet := kubeletService{
Lorenz Brun1de8b182021-12-21 17:15:18 +0100124 NodeName: s.c.Node.ID(),
Serge Bazanskid8af5bf2021-03-16 13:38:29 +0100125 ClusterDNS: []net.IP{address},
Lorenz Brun78cefca2022-06-20 12:59:55 +0000126 ClusterDomain: s.c.ClusterDomain,
Serge Bazanskid8af5bf2021-03-16 13:38:29 +0100127 KubeletDirectory: &s.c.Root.Data.Kubernetes.Kubelet,
128 EphemeralDirectory: &s.c.Root.Ephemeral,
129 KPKI: s.c.KPKI,
130 }
131
132 err := supervisor.RunGroup(ctx, map[string]supervisor.Runnable{
133 "apiserver": apiserver.Run,
Serge Bazanski356cbf32023-03-16 17:52:20 +0100134 "kubelet": func(ctx context.Context) error {
135 <-startKubelet
136 return kubelet.Run(ctx)
137 },
Serge Bazanskid8af5bf2021-03-16 13:38:29 +0100138 })
139 if err != nil {
140 return fmt.Errorf("when starting apiserver/kubelet: %w", err)
141 }
142
143 supervisor.Signal(ctx, supervisor.SignalHealthy)
144
145 for status.ExternalAddress.Equal(address) {
146 status, err = networkWatch.Get(ctx)
147 if err != nil {
148 return fmt.Errorf("when watching for network changes: %w", err)
149 }
150 }
151 return fmt.Errorf("network configuration changed (%s -> %s)", address.String(), status.ExternalAddress.String())
152 })
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200153
Serge Bazanski356cbf32023-03-16 17:52:20 +0100154 // Before we start anything else, make sure reconciliation passes at least once.
155 // This makes the initial startup of a cluster much cleaner as we don't end up
156 // starting the scheduler/controller-manager/etc just to get them to immediately
157 // fail and back off with 'unauthorized'.
158 startLogging := time.Now().Add(2 * time.Second)
159 supervisor.Logger(ctx).Infof("Performing initial resource reconciliation...")
160 for {
161 err := reconciler.ReconcileAll(ctx, clientSet)
162 if err == nil {
163 supervisor.Logger(ctx).Infof("Initial resource reconciliation succeeded.")
164 close(startKubelet)
165 break
166 }
167 if time.Now().After(startLogging) {
168 supervisor.Logger(ctx).Errorf("Still couldn't do initial reconciliation: %v", err)
169 }
170 time.Sleep(100 * time.Millisecond)
171 }
172
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200173 csiPlugin := csiPluginServer{
174 KubeletDirectory: &s.c.Root.Data.Kubernetes.Kubelet,
175 VolumesDirectory: &s.c.Root.Data.Volumes,
176 }
177
178 csiProvisioner := csiProvisionerServer{
Lorenz Brun1de8b182021-12-21 17:15:18 +0100179 NodeName: s.c.Node.ID(),
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200180 Kubernetes: clientSet,
181 InformerFactory: informerFactory,
182 VolumesDirectory: &s.c.Root.Data.Volumes,
183 }
184
185 clusternet := clusternet.Service{
Lorenz Brun1de8b182021-12-21 17:15:18 +0100186 NodeName: s.c.Node.ID(),
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200187 Kubernetes: clientSet,
188 ClusterNet: s.c.ClusterNet,
189 InformerFactory: informerFactory,
190 DataDirectory: &s.c.Root.Data.Kubernetes.ClusterNetworking,
191 }
192
Lorenz Brunb682ba52020-07-08 14:51:36 +0200193 nfproxy := nfproxy.Service{
194 ClusterCIDR: s.c.ClusterNet,
195 ClientSet: clientSet,
196 }
197
Lorenz Brun4e090352021-03-17 17:44:41 +0100198 kvmDevicePlugin := kvmdevice.Plugin{
199 KubeletDirectory: &s.c.Root.Data.Kubernetes.Kubelet,
200 }
201
Lorenz Bruncc078df2021-12-23 11:51:55 +0100202 authProxy := authproxy.Service{
203 KPKI: s.c.KPKI,
204 Node: s.c.Node,
205 }
206
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200207 for _, sub := range []struct {
208 name string
209 runnable supervisor.Runnable
210 }{
Serge Bazanski967be212020-11-02 11:26:59 +0100211 {"controller-manager", runControllerManager(*controllerManagerConfig)},
212 {"scheduler", runScheduler(*schedulerConfig)},
Serge Bazanski356cbf32023-03-16 17:52:20 +0100213 {"reconciler", reconciler.Maintain(clientSet)},
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200214 {"csi-plugin", csiPlugin.Run},
215 {"csi-provisioner", csiProvisioner.Run},
216 {"clusternet", clusternet.Run},
Lorenz Brunb682ba52020-07-08 14:51:36 +0200217 {"nfproxy", nfproxy.Run},
Lorenz Brun4e090352021-03-17 17:44:41 +0100218 {"kvmdeviceplugin", kvmDevicePlugin.Run},
Lorenz Bruncc078df2021-12-23 11:51:55 +0100219 {"authproxy", authProxy.Run},
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200220 } {
221 err := supervisor.Run(ctx, sub.name, sub.runnable)
222 if err != nil {
223 return fmt.Errorf("could not run sub-service %q: %w", sub.name, err)
224 }
225 }
226
Lorenz Brunfa5c2fc2020-09-28 13:32:12 +0200227 supervisor.Logger(ctx).Info("Registering K8s CoreDNS")
Lorenz Brun78cefca2022-06-20 12:59:55 +0000228 clusterDNSDirective := dns.NewKubernetesDirective(s.c.ClusterDomain, masterKubeconfig)
Serge Bazanskid8af5bf2021-03-16 13:38:29 +0100229 s.c.Network.ConfigureDNS(clusterDNSDirective)
Lorenz Brunfa5c2fc2020-09-28 13:32:12 +0200230
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200231 supervisor.Signal(ctx, supervisor.SignalHealthy)
Lorenz Brunfa5c2fc2020-09-28 13:32:12 +0200232 <-ctx.Done()
Serge Bazanskid8af5bf2021-03-16 13:38:29 +0100233 s.c.Network.ConfigureDNS(dns.CancelDirective(clusterDNSDirective))
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200234 return nil
Lorenz Brun6e8f69c2019-11-18 10:44:24 +0100235}
236
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200237// GetDebugKubeconfig issues a kubeconfig for an arbitrary given identity.
238// Useful for debugging and testing.
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200239func (s *Service) GetDebugKubeconfig(ctx context.Context, request *apb.GetDebugKubeconfigRequest) (*apb.GetDebugKubeconfigResponse, error) {
Serge Bazanski9411f7c2021-03-10 13:12:53 +0100240 client, err := s.c.KPKI.VolatileClient(ctx, request.Id, request.Groups)
241 if err != nil {
242 return nil, status.Errorf(codes.Unavailable, "Failed to get volatile client certificate: %v", err)
243 }
244 kubeconfig, err := pki.Kubeconfig(ctx, s.c.KPKI.KV, client)
Lorenz Brun878f5f92020-05-12 16:15:39 +0200245 if err != nil {
246 return nil, status.Errorf(codes.Unavailable, "Failed to generate kubeconfig: %v", err)
247 }
Serge Bazanski9411f7c2021-03-10 13:12:53 +0100248 return &apb.GetDebugKubeconfigResponse{DebugKubeconfig: string(kubeconfig)}, nil
Lorenz Brun6e8f69c2019-11-18 10:44:24 +0100249}