blob: 433f4356462d4210995479ee8a80acb79385e3b3 [file] [log] [blame]
Lorenz Brunae0d90d2019-09-05 17:53:56 +02001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package main
18
19import (
Serge Bazanskicdb8c782020-02-17 12:34:02 +010020 "context"
Serge Bazanski57b43752020-07-13 19:17:48 +020021 "crypto/ed25519"
22 "crypto/rand"
23 "crypto/x509"
Lorenz Brundd8c80e2019-10-07 16:19:49 +020024 "fmt"
Serge Bazanski1ebd1e12020-07-13 19:17:16 +020025 "log"
Serge Bazanski57b43752020-07-13 19:17:48 +020026 "math/big"
Serge Bazanski1ebd1e12020-07-13 19:17:16 +020027 "net"
Lorenz Brunae0d90d2019-09-05 17:53:56 +020028 "os"
Lorenz Brunae0d90d2019-09-05 17:53:56 +020029 "os/signal"
Lorenz Brunf95909d2019-09-11 19:48:26 +020030 "runtime/debug"
Lorenz Brunae0d90d2019-09-05 17:53:56 +020031
Lorenz Brunfa5c2fc2020-09-28 13:32:12 +020032 "git.monogon.dev/source/nexantic.git/core/internal/network/dns"
33
Lorenz Brunae0d90d2019-09-05 17:53:56 +020034 "go.uber.org/zap"
35 "golang.org/x/sys/unix"
Serge Bazanski1ebd1e12020-07-13 19:17:16 +020036 "google.golang.org/grpc"
Serge Bazanski57b43752020-07-13 19:17:48 +020037 "google.golang.org/grpc/codes"
38 "google.golang.org/grpc/status"
Serge Bazanski1ebd1e12020-07-13 19:17:16 +020039
40 "git.monogon.dev/source/nexantic.git/core/internal/cluster"
41 "git.monogon.dev/source/nexantic.git/core/internal/common"
42 "git.monogon.dev/source/nexantic.git/core/internal/common/supervisor"
Serge Bazanski57b43752020-07-13 19:17:48 +020043 "git.monogon.dev/source/nexantic.git/core/internal/consensus/ca"
Serge Bazanski1ebd1e12020-07-13 19:17:16 +020044 "git.monogon.dev/source/nexantic.git/core/internal/containerd"
45 "git.monogon.dev/source/nexantic.git/core/internal/kubernetes"
46 "git.monogon.dev/source/nexantic.git/core/internal/kubernetes/pki"
47 "git.monogon.dev/source/nexantic.git/core/internal/localstorage"
48 "git.monogon.dev/source/nexantic.git/core/internal/localstorage/declarative"
49 "git.monogon.dev/source/nexantic.git/core/internal/network"
50 "git.monogon.dev/source/nexantic.git/core/pkg/tpm"
51 apb "git.monogon.dev/source/nexantic.git/core/proto/api"
Lorenz Brunae0d90d2019-09-05 17:53:56 +020052)
53
Serge Bazanski1ebd1e12020-07-13 19:17:16 +020054var (
55 // kubernetesConfig is the static/global part of the Kubernetes service configuration. In the future, this might
56 // be configurable by loading it from the EnrolmentConfig. Fow now, it's static and same across all clusters.
57 kubernetesConfig = kubernetes.Config{
58 ServiceIPRange: net.IPNet{ // TODO(q3k): Decide if configurable / final value
Lorenz Brunca24cfa2020-08-18 13:49:37 +020059 IP: net.IP{10, 0, 255, 1},
Serge Bazanski1ebd1e12020-07-13 19:17:16 +020060 Mask: net.IPMask{0xff, 0xff, 0xff, 0x00}, // /24, but Go stores as a literal mask
61 },
62 ClusterNet: net.IPNet{
63 IP: net.IP{10, 0, 0, 0},
64 Mask: net.IPMask{0xff, 0xff, 0x00, 0x00}, // /16
65 },
66 }
Leopold Schabela4516f92019-12-04 20:27:05 +000067)
68
Lorenz Brunae0d90d2019-09-05 17:53:56 +020069func main() {
Lorenz Brunf95909d2019-09-11 19:48:26 +020070 defer func() {
71 if r := recover(); r != nil {
72 fmt.Println("Init panicked:", r)
73 debug.PrintStack()
74 }
75 unix.Sync()
Leopold Schabel68c58752019-11-14 21:00:59 +010076 // TODO(lorenz): Switch this to Reboot when init panics are less likely
77 // Best effort, nothing we can do if this fails except printing the error to the console.
78 if err := unix.Reboot(unix.LINUX_REBOOT_CMD_POWER_OFF); err != nil {
79 panic(fmt.Sprintf("failed to halt node: %v\n", err))
80 }
Lorenz Brunf95909d2019-09-11 19:48:26 +020081 }()
Lorenz Brunae0d90d2019-09-05 17:53:56 +020082 logger, err := zap.NewDevelopment()
83 if err != nil {
84 panic(err)
85 }
Serge Bazanski581b0bd2020-03-12 13:36:43 +010086
87 // Remount onto a tmpfs and re-exec if needed. Otherwise, keep running.
88 err = switchRoot(logger)
89 if err != nil {
90 panic(fmt.Errorf("could not remount root: %w", err))
91 }
92
Lorenz Brun878f5f92020-05-12 16:15:39 +020093 // Linux kernel default is 4096 which is far too low. Raise it to 1M which is what gVisor suggests.
94 if err := unix.Setrlimit(unix.RLIMIT_NOFILE, &unix.Rlimit{Cur: 1048576, Max: 1048576}); err != nil {
95 logger.Panic("Failed to raise rlimits", zap.Error(err))
96 }
97
Lorenz Brunae0d90d2019-09-05 17:53:56 +020098 logger.Info("Starting Smalltown Init")
99
Lorenz Brunae0d90d2019-09-05 17:53:56 +0200100 signalChannel := make(chan os.Signal, 2)
101 signal.Notify(signalChannel)
102
Lorenz Brunae0d90d2019-09-05 17:53:56 +0200103 if err := tpm.Initialize(logger.With(zap.String("component", "tpm"))); err != nil {
104 logger.Panic("Failed to initialize TPM 2.0", zap.Error(err))
105 }
106
Lorenz Brunfa5c2fc2020-09-28 13:32:12 +0200107 corednsRegistrationChan := make(chan *dns.ExtraDirective)
108
109 networkSvc := network.New(network.Config{CorednsRegistrationChan: corednsRegistrationChan})
Leopold Schabel68c58752019-11-14 21:00:59 +0100110
Lorenz Brun70f65b22020-07-08 17:02:47 +0200111 // This function initializes a headless Delve if this is a debug build or does nothing if it's not
112 initializeDebugger(networkSvc)
113
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200114 // Prepare local storage.
115 root := &localstorage.Root{}
116 if err := declarative.PlaceFS(root, "/"); err != nil {
117 panic(fmt.Errorf("when placing root FS: %w", err))
118 }
119
120 // trapdoor is a channel used to signal to the init service that a very low-level, unrecoverable failure
121 // occured. This causes a GURU MEDITATION ERROR visible to the end user.
122 trapdoor := make(chan struct{})
123
124 // Make context for supervisor. We cancel it when we reach the trapdoor.
125 ctxS, ctxC := context.WithCancel(context.Background())
126
127 // Start root initialization code as a supervisor one-shot runnable. This means waiting for the network, starting
128 // the cluster manager, and then starting all services related to the node's roles.
129 // TODO(q3k): move this to a separate 'init' service.
130 supervisor.New(ctxS, logger, func(ctx context.Context) error {
131 logger := supervisor.Logger(ctx)
132
133 // Start storage and network - we need this to get anything else done.
134 if err := root.Start(ctx); err != nil {
135 return fmt.Errorf("cannot start root FS: %w", err)
136 }
Serge Bazanskib1b742f2020-03-24 13:58:19 +0100137 if err := supervisor.Run(ctx, "network", networkSvc.Run); err != nil {
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200138 return fmt.Errorf("when starting network: %w", err)
Serge Bazanskib1b742f2020-03-24 13:58:19 +0100139 }
Lorenz Brunf95909d2019-09-11 19:48:26 +0200140
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200141 // Wait for IP address from network.
142 ip, err := networkSvc.GetIP(ctx, true)
Serge Bazanskib1b742f2020-03-24 13:58:19 +0100143 if err != nil {
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200144 return fmt.Errorf("when waiting for IP address: %w", err)
Serge Bazanskib1b742f2020-03-24 13:58:19 +0100145 }
Lorenz Brunae0d90d2019-09-05 17:53:56 +0200146
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200147 // Start cluster manager. This kicks off cluster membership machinery, which will either start
148 // a new cluster, enroll into one or join one.
149 m := cluster.NewManager(root, networkSvc)
150 if err := supervisor.Run(ctx, "enrolment", m.Run); err != nil {
151 return fmt.Errorf("when starting enrolment: %w", err)
152 }
153
154 // Wait until the cluster manager settles.
155 success := m.WaitFinished()
156 if !success {
157 close(trapdoor)
158 return fmt.Errorf("enrolment failed, aborting")
159 }
160
161 // We are now in a cluster. We can thus access our 'node' object and start all services that
162 // we should be running.
163
164 node := m.Node()
165 if err := node.ConfigureLocalHostname(&root.Etc); err != nil {
166 close(trapdoor)
167 return fmt.Errorf("failed to set local hostname: %w", err)
168 }
169
170 logger.Info("Enrolment success, continuing startup.")
171 logger.Info(fmt.Sprintf("This node (%s) has roles:", node.String()))
172 if cm := node.ConsensusMember(); cm != nil {
173 // There's no need to start anything for when we are a consensus member - the cluster
174 // manager does this for us if necessary (as creating/enrolling/joining a cluster is
175 // pretty tied into cluster lifecycle management).
176 logger.Info(fmt.Sprintf(" - etcd consensus member"))
177 }
178 if kw := node.KubernetesWorker(); kw != nil {
179 logger.Info(fmt.Sprintf(" - kubernetes worker"))
180 }
181
182 // If we're supposed to be a kubernetes worker, start kubernetes services and containerd.
183 // In the future, this might be split further into kubernetes control plane and data plane
184 // roles.
185 var containerdSvc *containerd.Service
186 var kubeSvc *kubernetes.Service
187 if kw := node.KubernetesWorker(); kw != nil {
188 logger.Info("Starting Kubernetes worker services...")
189
190 // Ensure Kubernetes PKI objects exist in etcd.
191 kpkiKV := m.ConsensusKV("cluster", "kpki")
192 kpki := pki.NewKubernetes(logger.Named("kpki"), kpkiKV)
193 if err := kpki.EnsureAll(ctx); err != nil {
194 return fmt.Errorf("failed to ensure kubernetes PKI present: %w", err)
195 }
196
197 containerdSvc = &containerd.Service{
198 EphemeralVolume: &root.Ephemeral.Containerd,
199 }
200 if err := supervisor.Run(ctx, "containerd", containerdSvc.Run); err != nil {
201 return fmt.Errorf("failed to start containerd service: %w", err)
202 }
203
204 kubernetesConfig.KPKI = kpki
205 kubernetesConfig.Root = root
206 kubernetesConfig.AdvertiseAddress = *ip
Lorenz Brunfa5c2fc2020-09-28 13:32:12 +0200207 kubernetesConfig.CorednsRegistrationChan = corednsRegistrationChan
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200208 kubeSvc = kubernetes.New(kubernetesConfig)
209 if err := supervisor.Run(ctx, "kubernetes", kubeSvc.Run); err != nil {
210 return fmt.Errorf("failed to start kubernetes service: %w", err)
211 }
212
213 }
214
215 // Start the node debug service.
216 // TODO(q3k): this needs to be done in a smarter way once LogTree lands, and then a few things can be
217 // refactored to start this earlier, or this can be split up into a multiple gRPC service on a single listener.
218 dbg := &debugService{
219 cluster: m,
220 containerd: containerdSvc,
221 kubernetes: kubeSvc,
222 }
223 dbgSrv := grpc.NewServer()
224 apb.RegisterNodeDebugServiceServer(dbgSrv, dbg)
225 dbgLis, err := net.Listen("tcp", fmt.Sprintf(":%d", common.DebugServicePort))
Serge Bazanskib1b742f2020-03-24 13:58:19 +0100226 if err != nil {
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200227 return fmt.Errorf("failed to listen on debug service: %w", err)
228 }
229 if err := supervisor.Run(ctx, "debug", supervisor.GRPCServer(dbgSrv, dbgLis, false)); err != nil {
230 return fmt.Errorf("failed to start debug service: %w", err)
Serge Bazanskib1b742f2020-03-24 13:58:19 +0100231 }
Lorenz Brunae0d90d2019-09-05 17:53:56 +0200232
Serge Bazanskib1b742f2020-03-24 13:58:19 +0100233 supervisor.Signal(ctx, supervisor.SignalHealthy)
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200234 supervisor.Signal(ctx, supervisor.SignalDone)
235 return nil
236 })
Serge Bazanskib1b742f2020-03-24 13:58:19 +0100237
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200238 // We're PID1, so orphaned processes get reparented to us to clean up
239 for {
240 select {
241 case <-trapdoor:
242 // If the trapdoor got closed, we got stuck early enough in the boot process that we can't do anything about
243 // it. Display a generic error message until we handle error conditions better.
244 ctxC()
245 log.Printf(" ########################")
246 log.Printf(" # GURU MEDIATION ERROR #")
247 log.Printf(" ########################")
248 log.Printf("")
249 log.Printf("Smalltown encountered an uncorrectable error and must be restarted.")
250 log.Printf("(Error condition: init trapdoor closed)")
251 log.Printf("")
252 select {}
253
254 case sig := <-signalChannel:
255 switch sig {
256 case unix.SIGCHLD:
257 var status unix.WaitStatus
258 var rusage unix.Rusage
259 for {
260 res, err := unix.Wait4(-1, &status, unix.WNOHANG, &rusage)
261 if err != nil && err != unix.ECHILD {
262 logger.Error("Failed to wait on orphaned child", zap.Error(err))
263 break
Serge Bazanskib1b742f2020-03-24 13:58:19 +0100264 }
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200265 if res <= 0 {
266 break
267 }
Lorenz Brunae0d90d2019-09-05 17:53:56 +0200268 }
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200269 case unix.SIGURG:
270 // Go 1.14 introduced asynchronous preemption, which uses SIGURG.
271 // In order not to break backwards compatibility in the unlikely case
272 // of an application actually using SIGURG on its own, they're not filtering them.
273 // (https://github.com/golang/go/issues/37942)
274 logger.Debug("Ignoring SIGURG")
275 // TODO(lorenz): We can probably get more than just SIGCHLD as init, but I can't think
276 // of any others right now, just log them in case we hit any of them.
277 default:
278 logger.Warn("Got unexpected signal", zap.String("signal", sig.String()))
Lorenz Brunae0d90d2019-09-05 17:53:56 +0200279 }
Lorenz Brunae0d90d2019-09-05 17:53:56 +0200280 }
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200281 }
Lorenz Brunae0d90d2019-09-05 17:53:56 +0200282}
Serge Bazanski57b43752020-07-13 19:17:48 +0200283
284// nodeCertificate creates a node key/certificate for a foreign node. This is duplicated code with localstorage's
285// PKIDirectory EnsureSelfSigned, but is temporary (and specific to 'golden tickets').
286func (s *debugService) nodeCertificate() (cert, key []byte, err error) {
287 pubKey, privKey, err := ed25519.GenerateKey(rand.Reader)
288 if err != nil {
289 err = fmt.Errorf("failed to generate key: %w", err)
290 return
291 }
292
293 key, err = x509.MarshalPKCS8PrivateKey(privKey)
294 if err != nil {
295 err = fmt.Errorf("failed to marshal key: %w", err)
296 return
297 }
298
299 serialNumberLimit := new(big.Int).Lsh(big.NewInt(1), 127)
300 serialNumber, err := rand.Int(rand.Reader, serialNumberLimit)
301 if err != nil {
302 err = fmt.Errorf("failed to generate serial number: %w", err)
303 return
304 }
305
306 template := localstorage.CertificateForNode(pubKey)
307 template.SerialNumber = serialNumber
308
309 cert, err = x509.CreateCertificate(rand.Reader, &template, &template, pubKey, privKey)
310 if err != nil {
311 err = fmt.Errorf("could not sign certificate: %w", err)
312 return
313 }
314 return
315}
316
317func (s *debugService) GetGoldenTicket(ctx context.Context, req *apb.GetGoldenTicketRequest) (*apb.GetGoldenTicketResponse, error) {
318 ip := net.ParseIP(req.ExternalIp)
319 if ip == nil {
320 return nil, status.Errorf(codes.InvalidArgument, "could not parse IP %q", req.ExternalIp)
321 }
322 this := s.cluster.Node()
323
324 certRaw, key, err := s.nodeCertificate()
325 if err != nil {
326 return nil, status.Errorf(codes.Unavailable, "failed to generate node certificate: %v", err)
327 }
328 cert, err := x509.ParseCertificate(certRaw)
329 if err != nil {
330 panic(err)
331 }
332 kv := s.cluster.ConsensusKVRoot()
333 ca, err := ca.Load(ctx, kv)
334 if err != nil {
335 return nil, status.Errorf(codes.Unavailable, "could not load CA: %v", err)
336 }
337 etcdCert, etcdKey, err := ca.Issue(ctx, kv, cert.Subject.CommonName, ip)
338 if err != nil {
339 return nil, status.Errorf(codes.Unavailable, "could not generate etcd peer certificate: %v", err)
340 }
341 etcdCRL, err := ca.GetCurrentCRL(ctx, kv)
342 if err != nil {
343 return nil, status.Errorf(codes.Unavailable, "could not get etcd CRL: %v", err)
344 }
345
346 // Add new etcd member to etcd cluster.
347 etcd := s.cluster.ConsensusCluster()
348 etcdAddr := fmt.Sprintf("https://%s:%d", ip.String(), common.ConsensusPort)
349 _, err = etcd.MemberAddAsLearner(ctx, []string{etcdAddr})
350 if err != nil {
351 return nil, status.Errorf(codes.Unavailable, "could not add as new etcd consensus member: %v", err)
352 }
353
354 return &apb.GetGoldenTicketResponse{
355 Ticket: &apb.GoldenTicket{
356 EtcdCaCert: ca.CACertRaw,
357 EtcdClientCert: etcdCert,
358 EtcdClientKey: etcdKey,
359 EtcdCrl: etcdCRL,
360 Peers: []*apb.GoldenTicket_EtcdPeer{
361 {Name: this.ID(), Address: this.Address().String()},
362 },
363 This: &apb.GoldenTicket_EtcdPeer{Name: cert.Subject.CommonName, Address: ip.String()},
364
365 NodeId: cert.Subject.CommonName,
366 NodeCert: certRaw,
367 NodeKey: key,
368 },
369 }, nil
370}