blob: 701cea609ddd4f43840780918b6cbe3beb4d061d [file] [log] [blame]
Lorenz Brunae0d90d2019-09-05 17:53:56 +02001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package main
18
19import (
Serge Bazanskicdb8c782020-02-17 12:34:02 +010020 "context"
Serge Bazanski57b43752020-07-13 19:17:48 +020021 "crypto/ed25519"
22 "crypto/rand"
23 "crypto/x509"
Lorenz Brundd8c80e2019-10-07 16:19:49 +020024 "fmt"
Serge Bazanski1ebd1e12020-07-13 19:17:16 +020025 "log"
Serge Bazanski57b43752020-07-13 19:17:48 +020026 "math/big"
Serge Bazanski1ebd1e12020-07-13 19:17:16 +020027 "net"
Lorenz Brunae0d90d2019-09-05 17:53:56 +020028 "os"
Lorenz Brunae0d90d2019-09-05 17:53:56 +020029 "os/signal"
Lorenz Brunf95909d2019-09-11 19:48:26 +020030 "runtime/debug"
Lorenz Brunae0d90d2019-09-05 17:53:56 +020031
32 "go.uber.org/zap"
33 "golang.org/x/sys/unix"
Serge Bazanski1ebd1e12020-07-13 19:17:16 +020034 "google.golang.org/grpc"
Serge Bazanski57b43752020-07-13 19:17:48 +020035 "google.golang.org/grpc/codes"
36 "google.golang.org/grpc/status"
Serge Bazanski1ebd1e12020-07-13 19:17:16 +020037
38 "git.monogon.dev/source/nexantic.git/core/internal/cluster"
39 "git.monogon.dev/source/nexantic.git/core/internal/common"
40 "git.monogon.dev/source/nexantic.git/core/internal/common/supervisor"
Serge Bazanski57b43752020-07-13 19:17:48 +020041 "git.monogon.dev/source/nexantic.git/core/internal/consensus/ca"
Serge Bazanski1ebd1e12020-07-13 19:17:16 +020042 "git.monogon.dev/source/nexantic.git/core/internal/containerd"
43 "git.monogon.dev/source/nexantic.git/core/internal/kubernetes"
44 "git.monogon.dev/source/nexantic.git/core/internal/kubernetes/pki"
45 "git.monogon.dev/source/nexantic.git/core/internal/localstorage"
46 "git.monogon.dev/source/nexantic.git/core/internal/localstorage/declarative"
47 "git.monogon.dev/source/nexantic.git/core/internal/network"
48 "git.monogon.dev/source/nexantic.git/core/pkg/tpm"
49 apb "git.monogon.dev/source/nexantic.git/core/proto/api"
Lorenz Brunae0d90d2019-09-05 17:53:56 +020050)
51
Serge Bazanski1ebd1e12020-07-13 19:17:16 +020052var (
53 // kubernetesConfig is the static/global part of the Kubernetes service configuration. In the future, this might
54 // be configurable by loading it from the EnrolmentConfig. Fow now, it's static and same across all clusters.
55 kubernetesConfig = kubernetes.Config{
56 ServiceIPRange: net.IPNet{ // TODO(q3k): Decide if configurable / final value
Lorenz Brunca24cfa2020-08-18 13:49:37 +020057 IP: net.IP{10, 0, 255, 1},
Serge Bazanski1ebd1e12020-07-13 19:17:16 +020058 Mask: net.IPMask{0xff, 0xff, 0xff, 0x00}, // /24, but Go stores as a literal mask
59 },
60 ClusterNet: net.IPNet{
61 IP: net.IP{10, 0, 0, 0},
62 Mask: net.IPMask{0xff, 0xff, 0x00, 0x00}, // /16
63 },
64 }
Leopold Schabela4516f92019-12-04 20:27:05 +000065)
66
Lorenz Brunae0d90d2019-09-05 17:53:56 +020067func main() {
Lorenz Brunf95909d2019-09-11 19:48:26 +020068 defer func() {
69 if r := recover(); r != nil {
70 fmt.Println("Init panicked:", r)
71 debug.PrintStack()
72 }
73 unix.Sync()
Leopold Schabel68c58752019-11-14 21:00:59 +010074 // TODO(lorenz): Switch this to Reboot when init panics are less likely
75 // Best effort, nothing we can do if this fails except printing the error to the console.
76 if err := unix.Reboot(unix.LINUX_REBOOT_CMD_POWER_OFF); err != nil {
77 panic(fmt.Sprintf("failed to halt node: %v\n", err))
78 }
Lorenz Brunf95909d2019-09-11 19:48:26 +020079 }()
Lorenz Brunae0d90d2019-09-05 17:53:56 +020080 logger, err := zap.NewDevelopment()
81 if err != nil {
82 panic(err)
83 }
Serge Bazanski581b0bd2020-03-12 13:36:43 +010084
85 // Remount onto a tmpfs and re-exec if needed. Otherwise, keep running.
86 err = switchRoot(logger)
87 if err != nil {
88 panic(fmt.Errorf("could not remount root: %w", err))
89 }
90
Lorenz Brun878f5f92020-05-12 16:15:39 +020091 // Linux kernel default is 4096 which is far too low. Raise it to 1M which is what gVisor suggests.
92 if err := unix.Setrlimit(unix.RLIMIT_NOFILE, &unix.Rlimit{Cur: 1048576, Max: 1048576}); err != nil {
93 logger.Panic("Failed to raise rlimits", zap.Error(err))
94 }
95
Lorenz Brunae0d90d2019-09-05 17:53:56 +020096 logger.Info("Starting Smalltown Init")
97
Lorenz Brunae0d90d2019-09-05 17:53:56 +020098 signalChannel := make(chan os.Signal, 2)
99 signal.Notify(signalChannel)
100
Lorenz Brunae0d90d2019-09-05 17:53:56 +0200101 if err := tpm.Initialize(logger.With(zap.String("component", "tpm"))); err != nil {
102 logger.Panic("Failed to initialize TPM 2.0", zap.Error(err))
103 }
104
Serge Bazanskib1b742f2020-03-24 13:58:19 +0100105 networkSvc := network.New(network.Config{})
Leopold Schabel68c58752019-11-14 21:00:59 +0100106
Lorenz Brun70f65b22020-07-08 17:02:47 +0200107 // This function initializes a headless Delve if this is a debug build or does nothing if it's not
108 initializeDebugger(networkSvc)
109
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200110 // Prepare local storage.
111 root := &localstorage.Root{}
112 if err := declarative.PlaceFS(root, "/"); err != nil {
113 panic(fmt.Errorf("when placing root FS: %w", err))
114 }
115
116 // trapdoor is a channel used to signal to the init service that a very low-level, unrecoverable failure
117 // occured. This causes a GURU MEDITATION ERROR visible to the end user.
118 trapdoor := make(chan struct{})
119
120 // Make context for supervisor. We cancel it when we reach the trapdoor.
121 ctxS, ctxC := context.WithCancel(context.Background())
122
123 // Start root initialization code as a supervisor one-shot runnable. This means waiting for the network, starting
124 // the cluster manager, and then starting all services related to the node's roles.
125 // TODO(q3k): move this to a separate 'init' service.
126 supervisor.New(ctxS, logger, func(ctx context.Context) error {
127 logger := supervisor.Logger(ctx)
128
129 // Start storage and network - we need this to get anything else done.
130 if err := root.Start(ctx); err != nil {
131 return fmt.Errorf("cannot start root FS: %w", err)
132 }
Serge Bazanskib1b742f2020-03-24 13:58:19 +0100133 if err := supervisor.Run(ctx, "network", networkSvc.Run); err != nil {
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200134 return fmt.Errorf("when starting network: %w", err)
Serge Bazanskib1b742f2020-03-24 13:58:19 +0100135 }
Lorenz Brunf95909d2019-09-11 19:48:26 +0200136
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200137 // Wait for IP address from network.
138 ip, err := networkSvc.GetIP(ctx, true)
Serge Bazanskib1b742f2020-03-24 13:58:19 +0100139 if err != nil {
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200140 return fmt.Errorf("when waiting for IP address: %w", err)
Serge Bazanskib1b742f2020-03-24 13:58:19 +0100141 }
Lorenz Brunae0d90d2019-09-05 17:53:56 +0200142
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200143 // Start cluster manager. This kicks off cluster membership machinery, which will either start
144 // a new cluster, enroll into one or join one.
145 m := cluster.NewManager(root, networkSvc)
146 if err := supervisor.Run(ctx, "enrolment", m.Run); err != nil {
147 return fmt.Errorf("when starting enrolment: %w", err)
148 }
149
150 // Wait until the cluster manager settles.
151 success := m.WaitFinished()
152 if !success {
153 close(trapdoor)
154 return fmt.Errorf("enrolment failed, aborting")
155 }
156
157 // We are now in a cluster. We can thus access our 'node' object and start all services that
158 // we should be running.
159
160 node := m.Node()
161 if err := node.ConfigureLocalHostname(&root.Etc); err != nil {
162 close(trapdoor)
163 return fmt.Errorf("failed to set local hostname: %w", err)
164 }
165
166 logger.Info("Enrolment success, continuing startup.")
167 logger.Info(fmt.Sprintf("This node (%s) has roles:", node.String()))
168 if cm := node.ConsensusMember(); cm != nil {
169 // There's no need to start anything for when we are a consensus member - the cluster
170 // manager does this for us if necessary (as creating/enrolling/joining a cluster is
171 // pretty tied into cluster lifecycle management).
172 logger.Info(fmt.Sprintf(" - etcd consensus member"))
173 }
174 if kw := node.KubernetesWorker(); kw != nil {
175 logger.Info(fmt.Sprintf(" - kubernetes worker"))
176 }
177
178 // If we're supposed to be a kubernetes worker, start kubernetes services and containerd.
179 // In the future, this might be split further into kubernetes control plane and data plane
180 // roles.
181 var containerdSvc *containerd.Service
182 var kubeSvc *kubernetes.Service
183 if kw := node.KubernetesWorker(); kw != nil {
184 logger.Info("Starting Kubernetes worker services...")
185
186 // Ensure Kubernetes PKI objects exist in etcd.
187 kpkiKV := m.ConsensusKV("cluster", "kpki")
188 kpki := pki.NewKubernetes(logger.Named("kpki"), kpkiKV)
189 if err := kpki.EnsureAll(ctx); err != nil {
190 return fmt.Errorf("failed to ensure kubernetes PKI present: %w", err)
191 }
192
193 containerdSvc = &containerd.Service{
194 EphemeralVolume: &root.Ephemeral.Containerd,
195 }
196 if err := supervisor.Run(ctx, "containerd", containerdSvc.Run); err != nil {
197 return fmt.Errorf("failed to start containerd service: %w", err)
198 }
199
200 kubernetesConfig.KPKI = kpki
201 kubernetesConfig.Root = root
202 kubernetesConfig.AdvertiseAddress = *ip
203 kubeSvc = kubernetes.New(kubernetesConfig)
204 if err := supervisor.Run(ctx, "kubernetes", kubeSvc.Run); err != nil {
205 return fmt.Errorf("failed to start kubernetes service: %w", err)
206 }
207
208 }
209
210 // Start the node debug service.
211 // TODO(q3k): this needs to be done in a smarter way once LogTree lands, and then a few things can be
212 // refactored to start this earlier, or this can be split up into a multiple gRPC service on a single listener.
213 dbg := &debugService{
214 cluster: m,
215 containerd: containerdSvc,
216 kubernetes: kubeSvc,
217 }
218 dbgSrv := grpc.NewServer()
219 apb.RegisterNodeDebugServiceServer(dbgSrv, dbg)
220 dbgLis, err := net.Listen("tcp", fmt.Sprintf(":%d", common.DebugServicePort))
Serge Bazanskib1b742f2020-03-24 13:58:19 +0100221 if err != nil {
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200222 return fmt.Errorf("failed to listen on debug service: %w", err)
223 }
224 if err := supervisor.Run(ctx, "debug", supervisor.GRPCServer(dbgSrv, dbgLis, false)); err != nil {
225 return fmt.Errorf("failed to start debug service: %w", err)
Serge Bazanskib1b742f2020-03-24 13:58:19 +0100226 }
Lorenz Brunae0d90d2019-09-05 17:53:56 +0200227
Serge Bazanskib1b742f2020-03-24 13:58:19 +0100228 supervisor.Signal(ctx, supervisor.SignalHealthy)
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200229 supervisor.Signal(ctx, supervisor.SignalDone)
230 return nil
231 })
Serge Bazanskib1b742f2020-03-24 13:58:19 +0100232
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200233 // We're PID1, so orphaned processes get reparented to us to clean up
234 for {
235 select {
236 case <-trapdoor:
237 // If the trapdoor got closed, we got stuck early enough in the boot process that we can't do anything about
238 // it. Display a generic error message until we handle error conditions better.
239 ctxC()
240 log.Printf(" ########################")
241 log.Printf(" # GURU MEDIATION ERROR #")
242 log.Printf(" ########################")
243 log.Printf("")
244 log.Printf("Smalltown encountered an uncorrectable error and must be restarted.")
245 log.Printf("(Error condition: init trapdoor closed)")
246 log.Printf("")
247 select {}
248
249 case sig := <-signalChannel:
250 switch sig {
251 case unix.SIGCHLD:
252 var status unix.WaitStatus
253 var rusage unix.Rusage
254 for {
255 res, err := unix.Wait4(-1, &status, unix.WNOHANG, &rusage)
256 if err != nil && err != unix.ECHILD {
257 logger.Error("Failed to wait on orphaned child", zap.Error(err))
258 break
Serge Bazanskib1b742f2020-03-24 13:58:19 +0100259 }
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200260 if res <= 0 {
261 break
262 }
Lorenz Brunae0d90d2019-09-05 17:53:56 +0200263 }
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200264 case unix.SIGURG:
265 // Go 1.14 introduced asynchronous preemption, which uses SIGURG.
266 // In order not to break backwards compatibility in the unlikely case
267 // of an application actually using SIGURG on its own, they're not filtering them.
268 // (https://github.com/golang/go/issues/37942)
269 logger.Debug("Ignoring SIGURG")
270 // TODO(lorenz): We can probably get more than just SIGCHLD as init, but I can't think
271 // of any others right now, just log them in case we hit any of them.
272 default:
273 logger.Warn("Got unexpected signal", zap.String("signal", sig.String()))
Lorenz Brunae0d90d2019-09-05 17:53:56 +0200274 }
Lorenz Brunae0d90d2019-09-05 17:53:56 +0200275 }
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200276 }
Lorenz Brunae0d90d2019-09-05 17:53:56 +0200277}
Serge Bazanski57b43752020-07-13 19:17:48 +0200278
279// nodeCertificate creates a node key/certificate for a foreign node. This is duplicated code with localstorage's
280// PKIDirectory EnsureSelfSigned, but is temporary (and specific to 'golden tickets').
281func (s *debugService) nodeCertificate() (cert, key []byte, err error) {
282 pubKey, privKey, err := ed25519.GenerateKey(rand.Reader)
283 if err != nil {
284 err = fmt.Errorf("failed to generate key: %w", err)
285 return
286 }
287
288 key, err = x509.MarshalPKCS8PrivateKey(privKey)
289 if err != nil {
290 err = fmt.Errorf("failed to marshal key: %w", err)
291 return
292 }
293
294 serialNumberLimit := new(big.Int).Lsh(big.NewInt(1), 127)
295 serialNumber, err := rand.Int(rand.Reader, serialNumberLimit)
296 if err != nil {
297 err = fmt.Errorf("failed to generate serial number: %w", err)
298 return
299 }
300
301 template := localstorage.CertificateForNode(pubKey)
302 template.SerialNumber = serialNumber
303
304 cert, err = x509.CreateCertificate(rand.Reader, &template, &template, pubKey, privKey)
305 if err != nil {
306 err = fmt.Errorf("could not sign certificate: %w", err)
307 return
308 }
309 return
310}
311
312func (s *debugService) GetGoldenTicket(ctx context.Context, req *apb.GetGoldenTicketRequest) (*apb.GetGoldenTicketResponse, error) {
313 ip := net.ParseIP(req.ExternalIp)
314 if ip == nil {
315 return nil, status.Errorf(codes.InvalidArgument, "could not parse IP %q", req.ExternalIp)
316 }
317 this := s.cluster.Node()
318
319 certRaw, key, err := s.nodeCertificate()
320 if err != nil {
321 return nil, status.Errorf(codes.Unavailable, "failed to generate node certificate: %v", err)
322 }
323 cert, err := x509.ParseCertificate(certRaw)
324 if err != nil {
325 panic(err)
326 }
327 kv := s.cluster.ConsensusKVRoot()
328 ca, err := ca.Load(ctx, kv)
329 if err != nil {
330 return nil, status.Errorf(codes.Unavailable, "could not load CA: %v", err)
331 }
332 etcdCert, etcdKey, err := ca.Issue(ctx, kv, cert.Subject.CommonName, ip)
333 if err != nil {
334 return nil, status.Errorf(codes.Unavailable, "could not generate etcd peer certificate: %v", err)
335 }
336 etcdCRL, err := ca.GetCurrentCRL(ctx, kv)
337 if err != nil {
338 return nil, status.Errorf(codes.Unavailable, "could not get etcd CRL: %v", err)
339 }
340
341 // Add new etcd member to etcd cluster.
342 etcd := s.cluster.ConsensusCluster()
343 etcdAddr := fmt.Sprintf("https://%s:%d", ip.String(), common.ConsensusPort)
344 _, err = etcd.MemberAddAsLearner(ctx, []string{etcdAddr})
345 if err != nil {
346 return nil, status.Errorf(codes.Unavailable, "could not add as new etcd consensus member: %v", err)
347 }
348
349 return &apb.GetGoldenTicketResponse{
350 Ticket: &apb.GoldenTicket{
351 EtcdCaCert: ca.CACertRaw,
352 EtcdClientCert: etcdCert,
353 EtcdClientKey: etcdKey,
354 EtcdCrl: etcdCRL,
355 Peers: []*apb.GoldenTicket_EtcdPeer{
356 {Name: this.ID(), Address: this.Address().String()},
357 },
358 This: &apb.GoldenTicket_EtcdPeer{Name: cert.Subject.CommonName, Address: ip.String()},
359
360 NodeId: cert.Subject.CommonName,
361 NodeCert: certRaw,
362 NodeKey: key,
363 },
364 }, nil
365}