Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 1 | // Copyright 2020 The Monogon Project Authors. |
| 2 | // |
| 3 | // SPDX-License-Identifier: Apache-2.0 |
| 4 | // |
| 5 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | // you may not use this file except in compliance with the License. |
| 7 | // You may obtain a copy of the License at |
| 8 | // |
| 9 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | // |
| 11 | // Unless required by applicable law or agreed to in writing, software |
| 12 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | // See the License for the specific language governing permissions and |
| 15 | // limitations under the License. |
| 16 | |
| 17 | package main |
| 18 | |
| 19 | import ( |
Serge Bazanski | cdb8c78 | 2020-02-17 12:34:02 +0100 | [diff] [blame] | 20 | "context" |
Serge Bazanski | 57b4375 | 2020-07-13 19:17:48 +0200 | [diff] [blame] | 21 | "crypto/ed25519" |
| 22 | "crypto/rand" |
| 23 | "crypto/x509" |
Lorenz Brun | dd8c80e | 2019-10-07 16:19:49 +0200 | [diff] [blame] | 24 | "fmt" |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame] | 25 | "log" |
Serge Bazanski | 57b4375 | 2020-07-13 19:17:48 +0200 | [diff] [blame] | 26 | "math/big" |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame] | 27 | "net" |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 28 | "os" |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 29 | "os/signal" |
Lorenz Brun | f95909d | 2019-09-11 19:48:26 +0200 | [diff] [blame] | 30 | "runtime/debug" |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 31 | |
Serge Bazanski | c735967 | 2020-10-30 16:38:57 +0100 | [diff] [blame] | 32 | "git.monogon.dev/source/nexantic.git/core/pkg/logtree" |
| 33 | |
Lorenz Brun | fa5c2fc | 2020-09-28 13:32:12 +0200 | [diff] [blame] | 34 | "git.monogon.dev/source/nexantic.git/core/internal/network/dns" |
| 35 | |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 36 | "golang.org/x/sys/unix" |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame] | 37 | "google.golang.org/grpc" |
Serge Bazanski | 57b4375 | 2020-07-13 19:17:48 +0200 | [diff] [blame] | 38 | "google.golang.org/grpc/codes" |
| 39 | "google.golang.org/grpc/status" |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame] | 40 | |
| 41 | "git.monogon.dev/source/nexantic.git/core/internal/cluster" |
| 42 | "git.monogon.dev/source/nexantic.git/core/internal/common" |
| 43 | "git.monogon.dev/source/nexantic.git/core/internal/common/supervisor" |
Serge Bazanski | 57b4375 | 2020-07-13 19:17:48 +0200 | [diff] [blame] | 44 | "git.monogon.dev/source/nexantic.git/core/internal/consensus/ca" |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame] | 45 | "git.monogon.dev/source/nexantic.git/core/internal/containerd" |
| 46 | "git.monogon.dev/source/nexantic.git/core/internal/kubernetes" |
| 47 | "git.monogon.dev/source/nexantic.git/core/internal/kubernetes/pki" |
| 48 | "git.monogon.dev/source/nexantic.git/core/internal/localstorage" |
| 49 | "git.monogon.dev/source/nexantic.git/core/internal/localstorage/declarative" |
| 50 | "git.monogon.dev/source/nexantic.git/core/internal/network" |
| 51 | "git.monogon.dev/source/nexantic.git/core/pkg/tpm" |
| 52 | apb "git.monogon.dev/source/nexantic.git/core/proto/api" |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 53 | ) |
| 54 | |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame] | 55 | var ( |
| 56 | // kubernetesConfig is the static/global part of the Kubernetes service configuration. In the future, this might |
| 57 | // be configurable by loading it from the EnrolmentConfig. Fow now, it's static and same across all clusters. |
| 58 | kubernetesConfig = kubernetes.Config{ |
| 59 | ServiceIPRange: net.IPNet{ // TODO(q3k): Decide if configurable / final value |
Lorenz Brun | ca24cfa | 2020-08-18 13:49:37 +0200 | [diff] [blame] | 60 | IP: net.IP{10, 0, 255, 1}, |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame] | 61 | Mask: net.IPMask{0xff, 0xff, 0xff, 0x00}, // /24, but Go stores as a literal mask |
| 62 | }, |
| 63 | ClusterNet: net.IPNet{ |
| 64 | IP: net.IP{10, 0, 0, 0}, |
| 65 | Mask: net.IPMask{0xff, 0xff, 0x00, 0x00}, // /16 |
| 66 | }, |
| 67 | } |
Leopold Schabel | a4516f9 | 2019-12-04 20:27:05 +0000 | [diff] [blame] | 68 | ) |
| 69 | |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 70 | func main() { |
Lorenz Brun | f95909d | 2019-09-11 19:48:26 +0200 | [diff] [blame] | 71 | defer func() { |
| 72 | if r := recover(); r != nil { |
| 73 | fmt.Println("Init panicked:", r) |
| 74 | debug.PrintStack() |
| 75 | } |
| 76 | unix.Sync() |
Leopold Schabel | 68c5875 | 2019-11-14 21:00:59 +0100 | [diff] [blame] | 77 | // TODO(lorenz): Switch this to Reboot when init panics are less likely |
| 78 | // Best effort, nothing we can do if this fails except printing the error to the console. |
| 79 | if err := unix.Reboot(unix.LINUX_REBOOT_CMD_POWER_OFF); err != nil { |
| 80 | panic(fmt.Sprintf("failed to halt node: %v\n", err)) |
| 81 | } |
Lorenz Brun | f95909d | 2019-09-11 19:48:26 +0200 | [diff] [blame] | 82 | }() |
Serge Bazanski | c735967 | 2020-10-30 16:38:57 +0100 | [diff] [blame] | 83 | |
| 84 | // Set up logger for Smalltown. Currently logs everything to stderr. |
| 85 | lt := logtree.New() |
| 86 | reader, err := lt.Read("", logtree.WithChildren(), logtree.WithStream()) |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 87 | if err != nil { |
Serge Bazanski | c735967 | 2020-10-30 16:38:57 +0100 | [diff] [blame] | 88 | panic(fmt.Errorf("could not set up root log reader: %v", err)) |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 89 | } |
Serge Bazanski | c735967 | 2020-10-30 16:38:57 +0100 | [diff] [blame] | 90 | go func() { |
| 91 | for { |
| 92 | p := <-reader.Stream |
| 93 | if p.Leveled != nil { |
| 94 | // Use glog-like layout, but with supervisor DN instead of filename. |
| 95 | timestamp := p.Leveled.Timestamp() |
| 96 | _, month, day := timestamp.Date() |
| 97 | hour, minute, second := timestamp.Clock() |
| 98 | nsec := timestamp.Nanosecond() / 1000 |
| 99 | fmt.Fprintf(os.Stderr, "%s%02d%02d %02d:%02d:%02d.%06d %s] %s\n", p.Leveled.Severity(), month, day, hour, minute, second, nsec, p.DN, p.Leveled.Message()) |
| 100 | } |
| 101 | if p.Raw != nil { |
| 102 | fmt.Fprintf(os.Stderr, "%-32s R %s\n", p.DN, p.Raw) |
| 103 | } |
| 104 | } |
| 105 | }() |
| 106 | |
| 107 | // Initial logger. Used until we get to a supervisor. |
| 108 | logger := lt.MustLeveledFor("init") |
Serge Bazanski | 581b0bd | 2020-03-12 13:36:43 +0100 | [diff] [blame] | 109 | |
| 110 | // Remount onto a tmpfs and re-exec if needed. Otherwise, keep running. |
| 111 | err = switchRoot(logger) |
| 112 | if err != nil { |
| 113 | panic(fmt.Errorf("could not remount root: %w", err)) |
| 114 | } |
| 115 | |
Lorenz Brun | 878f5f9 | 2020-05-12 16:15:39 +0200 | [diff] [blame] | 116 | // Linux kernel default is 4096 which is far too low. Raise it to 1M which is what gVisor suggests. |
| 117 | if err := unix.Setrlimit(unix.RLIMIT_NOFILE, &unix.Rlimit{Cur: 1048576, Max: 1048576}); err != nil { |
Serge Bazanski | c735967 | 2020-10-30 16:38:57 +0100 | [diff] [blame] | 118 | logger.Fatalf("Failed to raise rlimits: %v", err) |
Lorenz Brun | 878f5f9 | 2020-05-12 16:15:39 +0200 | [diff] [blame] | 119 | } |
| 120 | |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 121 | logger.Info("Starting Smalltown Init") |
| 122 | |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 123 | signalChannel := make(chan os.Signal, 2) |
| 124 | signal.Notify(signalChannel) |
| 125 | |
Serge Bazanski | c735967 | 2020-10-30 16:38:57 +0100 | [diff] [blame] | 126 | if err := tpm.Initialize(logger); err != nil { |
| 127 | logger.Fatalf("Failed to initialize TPM 2.0: %v", err) |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 128 | } |
| 129 | |
Lorenz Brun | fa5c2fc | 2020-09-28 13:32:12 +0200 | [diff] [blame] | 130 | corednsRegistrationChan := make(chan *dns.ExtraDirective) |
| 131 | |
| 132 | networkSvc := network.New(network.Config{CorednsRegistrationChan: corednsRegistrationChan}) |
Leopold Schabel | 68c5875 | 2019-11-14 21:00:59 +0100 | [diff] [blame] | 133 | |
Lorenz Brun | 70f65b2 | 2020-07-08 17:02:47 +0200 | [diff] [blame] | 134 | // This function initializes a headless Delve if this is a debug build or does nothing if it's not |
| 135 | initializeDebugger(networkSvc) |
| 136 | |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame] | 137 | // Prepare local storage. |
| 138 | root := &localstorage.Root{} |
| 139 | if err := declarative.PlaceFS(root, "/"); err != nil { |
| 140 | panic(fmt.Errorf("when placing root FS: %w", err)) |
| 141 | } |
| 142 | |
| 143 | // trapdoor is a channel used to signal to the init service that a very low-level, unrecoverable failure |
| 144 | // occured. This causes a GURU MEDITATION ERROR visible to the end user. |
| 145 | trapdoor := make(chan struct{}) |
| 146 | |
| 147 | // Make context for supervisor. We cancel it when we reach the trapdoor. |
| 148 | ctxS, ctxC := context.WithCancel(context.Background()) |
| 149 | |
| 150 | // Start root initialization code as a supervisor one-shot runnable. This means waiting for the network, starting |
| 151 | // the cluster manager, and then starting all services related to the node's roles. |
| 152 | // TODO(q3k): move this to a separate 'init' service. |
Serge Bazanski | c735967 | 2020-10-30 16:38:57 +0100 | [diff] [blame] | 153 | supervisor.New(ctxS, func(ctx context.Context) error { |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame] | 154 | logger := supervisor.Logger(ctx) |
| 155 | |
| 156 | // Start storage and network - we need this to get anything else done. |
| 157 | if err := root.Start(ctx); err != nil { |
| 158 | return fmt.Errorf("cannot start root FS: %w", err) |
| 159 | } |
Serge Bazanski | b1b742f | 2020-03-24 13:58:19 +0100 | [diff] [blame] | 160 | if err := supervisor.Run(ctx, "network", networkSvc.Run); err != nil { |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame] | 161 | return fmt.Errorf("when starting network: %w", err) |
Serge Bazanski | b1b742f | 2020-03-24 13:58:19 +0100 | [diff] [blame] | 162 | } |
Lorenz Brun | f95909d | 2019-09-11 19:48:26 +0200 | [diff] [blame] | 163 | |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame] | 164 | // Wait for IP address from network. |
| 165 | ip, err := networkSvc.GetIP(ctx, true) |
Serge Bazanski | b1b742f | 2020-03-24 13:58:19 +0100 | [diff] [blame] | 166 | if err != nil { |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame] | 167 | return fmt.Errorf("when waiting for IP address: %w", err) |
Serge Bazanski | b1b742f | 2020-03-24 13:58:19 +0100 | [diff] [blame] | 168 | } |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 169 | |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame] | 170 | // Start cluster manager. This kicks off cluster membership machinery, which will either start |
| 171 | // a new cluster, enroll into one or join one. |
| 172 | m := cluster.NewManager(root, networkSvc) |
| 173 | if err := supervisor.Run(ctx, "enrolment", m.Run); err != nil { |
| 174 | return fmt.Errorf("when starting enrolment: %w", err) |
| 175 | } |
| 176 | |
| 177 | // Wait until the cluster manager settles. |
| 178 | success := m.WaitFinished() |
| 179 | if !success { |
| 180 | close(trapdoor) |
| 181 | return fmt.Errorf("enrolment failed, aborting") |
| 182 | } |
| 183 | |
| 184 | // We are now in a cluster. We can thus access our 'node' object and start all services that |
| 185 | // we should be running. |
| 186 | |
| 187 | node := m.Node() |
| 188 | if err := node.ConfigureLocalHostname(&root.Etc); err != nil { |
| 189 | close(trapdoor) |
| 190 | return fmt.Errorf("failed to set local hostname: %w", err) |
| 191 | } |
| 192 | |
| 193 | logger.Info("Enrolment success, continuing startup.") |
| 194 | logger.Info(fmt.Sprintf("This node (%s) has roles:", node.String())) |
| 195 | if cm := node.ConsensusMember(); cm != nil { |
| 196 | // There's no need to start anything for when we are a consensus member - the cluster |
| 197 | // manager does this for us if necessary (as creating/enrolling/joining a cluster is |
| 198 | // pretty tied into cluster lifecycle management). |
| 199 | logger.Info(fmt.Sprintf(" - etcd consensus member")) |
| 200 | } |
| 201 | if kw := node.KubernetesWorker(); kw != nil { |
| 202 | logger.Info(fmt.Sprintf(" - kubernetes worker")) |
| 203 | } |
| 204 | |
| 205 | // If we're supposed to be a kubernetes worker, start kubernetes services and containerd. |
| 206 | // In the future, this might be split further into kubernetes control plane and data plane |
| 207 | // roles. |
| 208 | var containerdSvc *containerd.Service |
| 209 | var kubeSvc *kubernetes.Service |
| 210 | if kw := node.KubernetesWorker(); kw != nil { |
| 211 | logger.Info("Starting Kubernetes worker services...") |
| 212 | |
| 213 | // Ensure Kubernetes PKI objects exist in etcd. |
| 214 | kpkiKV := m.ConsensusKV("cluster", "kpki") |
Serge Bazanski | c735967 | 2020-10-30 16:38:57 +0100 | [diff] [blame] | 215 | kpki := pki.NewKubernetes(lt.MustLeveledFor("pki.kubernetes"), kpkiKV) |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame] | 216 | if err := kpki.EnsureAll(ctx); err != nil { |
| 217 | return fmt.Errorf("failed to ensure kubernetes PKI present: %w", err) |
| 218 | } |
| 219 | |
| 220 | containerdSvc = &containerd.Service{ |
| 221 | EphemeralVolume: &root.Ephemeral.Containerd, |
| 222 | } |
| 223 | if err := supervisor.Run(ctx, "containerd", containerdSvc.Run); err != nil { |
| 224 | return fmt.Errorf("failed to start containerd service: %w", err) |
| 225 | } |
| 226 | |
| 227 | kubernetesConfig.KPKI = kpki |
| 228 | kubernetesConfig.Root = root |
| 229 | kubernetesConfig.AdvertiseAddress = *ip |
Lorenz Brun | fa5c2fc | 2020-09-28 13:32:12 +0200 | [diff] [blame] | 230 | kubernetesConfig.CorednsRegistrationChan = corednsRegistrationChan |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame] | 231 | kubeSvc = kubernetes.New(kubernetesConfig) |
| 232 | if err := supervisor.Run(ctx, "kubernetes", kubeSvc.Run); err != nil { |
| 233 | return fmt.Errorf("failed to start kubernetes service: %w", err) |
| 234 | } |
| 235 | |
| 236 | } |
| 237 | |
| 238 | // Start the node debug service. |
| 239 | // TODO(q3k): this needs to be done in a smarter way once LogTree lands, and then a few things can be |
| 240 | // refactored to start this earlier, or this can be split up into a multiple gRPC service on a single listener. |
| 241 | dbg := &debugService{ |
| 242 | cluster: m, |
| 243 | containerd: containerdSvc, |
| 244 | kubernetes: kubeSvc, |
| 245 | } |
| 246 | dbgSrv := grpc.NewServer() |
| 247 | apb.RegisterNodeDebugServiceServer(dbgSrv, dbg) |
| 248 | dbgLis, err := net.Listen("tcp", fmt.Sprintf(":%d", common.DebugServicePort)) |
Serge Bazanski | b1b742f | 2020-03-24 13:58:19 +0100 | [diff] [blame] | 249 | if err != nil { |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame] | 250 | return fmt.Errorf("failed to listen on debug service: %w", err) |
| 251 | } |
| 252 | if err := supervisor.Run(ctx, "debug", supervisor.GRPCServer(dbgSrv, dbgLis, false)); err != nil { |
| 253 | return fmt.Errorf("failed to start debug service: %w", err) |
Serge Bazanski | b1b742f | 2020-03-24 13:58:19 +0100 | [diff] [blame] | 254 | } |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 255 | |
Serge Bazanski | b1b742f | 2020-03-24 13:58:19 +0100 | [diff] [blame] | 256 | supervisor.Signal(ctx, supervisor.SignalHealthy) |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame] | 257 | supervisor.Signal(ctx, supervisor.SignalDone) |
| 258 | return nil |
Serge Bazanski | c735967 | 2020-10-30 16:38:57 +0100 | [diff] [blame] | 259 | }, supervisor.WithExistingLogtree(lt)) |
Serge Bazanski | b1b742f | 2020-03-24 13:58:19 +0100 | [diff] [blame] | 260 | |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame] | 261 | // We're PID1, so orphaned processes get reparented to us to clean up |
| 262 | for { |
| 263 | select { |
| 264 | case <-trapdoor: |
| 265 | // If the trapdoor got closed, we got stuck early enough in the boot process that we can't do anything about |
| 266 | // it. Display a generic error message until we handle error conditions better. |
| 267 | ctxC() |
| 268 | log.Printf(" ########################") |
| 269 | log.Printf(" # GURU MEDIATION ERROR #") |
| 270 | log.Printf(" ########################") |
| 271 | log.Printf("") |
| 272 | log.Printf("Smalltown encountered an uncorrectable error and must be restarted.") |
| 273 | log.Printf("(Error condition: init trapdoor closed)") |
| 274 | log.Printf("") |
| 275 | select {} |
| 276 | |
| 277 | case sig := <-signalChannel: |
| 278 | switch sig { |
| 279 | case unix.SIGCHLD: |
| 280 | var status unix.WaitStatus |
| 281 | var rusage unix.Rusage |
| 282 | for { |
| 283 | res, err := unix.Wait4(-1, &status, unix.WNOHANG, &rusage) |
| 284 | if err != nil && err != unix.ECHILD { |
Serge Bazanski | c735967 | 2020-10-30 16:38:57 +0100 | [diff] [blame] | 285 | logger.Errorf("Failed to wait on orphaned child: %v", err) |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame] | 286 | break |
Serge Bazanski | b1b742f | 2020-03-24 13:58:19 +0100 | [diff] [blame] | 287 | } |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame] | 288 | if res <= 0 { |
| 289 | break |
| 290 | } |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 291 | } |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame] | 292 | case unix.SIGURG: |
| 293 | // Go 1.14 introduced asynchronous preemption, which uses SIGURG. |
| 294 | // In order not to break backwards compatibility in the unlikely case |
| 295 | // of an application actually using SIGURG on its own, they're not filtering them. |
| 296 | // (https://github.com/golang/go/issues/37942) |
Serge Bazanski | c735967 | 2020-10-30 16:38:57 +0100 | [diff] [blame] | 297 | logger.V(5).Info("Ignoring SIGURG") |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame] | 298 | // TODO(lorenz): We can probably get more than just SIGCHLD as init, but I can't think |
| 299 | // of any others right now, just log them in case we hit any of them. |
| 300 | default: |
Serge Bazanski | c735967 | 2020-10-30 16:38:57 +0100 | [diff] [blame] | 301 | logger.Warningf("Got unexpected signal %s", sig.String()) |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 302 | } |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 303 | } |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame] | 304 | } |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 305 | } |
Serge Bazanski | 57b4375 | 2020-07-13 19:17:48 +0200 | [diff] [blame] | 306 | |
| 307 | // nodeCertificate creates a node key/certificate for a foreign node. This is duplicated code with localstorage's |
| 308 | // PKIDirectory EnsureSelfSigned, but is temporary (and specific to 'golden tickets'). |
| 309 | func (s *debugService) nodeCertificate() (cert, key []byte, err error) { |
| 310 | pubKey, privKey, err := ed25519.GenerateKey(rand.Reader) |
| 311 | if err != nil { |
| 312 | err = fmt.Errorf("failed to generate key: %w", err) |
| 313 | return |
| 314 | } |
| 315 | |
| 316 | key, err = x509.MarshalPKCS8PrivateKey(privKey) |
| 317 | if err != nil { |
| 318 | err = fmt.Errorf("failed to marshal key: %w", err) |
| 319 | return |
| 320 | } |
| 321 | |
| 322 | serialNumberLimit := new(big.Int).Lsh(big.NewInt(1), 127) |
| 323 | serialNumber, err := rand.Int(rand.Reader, serialNumberLimit) |
| 324 | if err != nil { |
| 325 | err = fmt.Errorf("failed to generate serial number: %w", err) |
| 326 | return |
| 327 | } |
| 328 | |
| 329 | template := localstorage.CertificateForNode(pubKey) |
| 330 | template.SerialNumber = serialNumber |
| 331 | |
| 332 | cert, err = x509.CreateCertificate(rand.Reader, &template, &template, pubKey, privKey) |
| 333 | if err != nil { |
| 334 | err = fmt.Errorf("could not sign certificate: %w", err) |
| 335 | return |
| 336 | } |
| 337 | return |
| 338 | } |
| 339 | |
| 340 | func (s *debugService) GetGoldenTicket(ctx context.Context, req *apb.GetGoldenTicketRequest) (*apb.GetGoldenTicketResponse, error) { |
| 341 | ip := net.ParseIP(req.ExternalIp) |
| 342 | if ip == nil { |
| 343 | return nil, status.Errorf(codes.InvalidArgument, "could not parse IP %q", req.ExternalIp) |
| 344 | } |
| 345 | this := s.cluster.Node() |
| 346 | |
| 347 | certRaw, key, err := s.nodeCertificate() |
| 348 | if err != nil { |
| 349 | return nil, status.Errorf(codes.Unavailable, "failed to generate node certificate: %v", err) |
| 350 | } |
| 351 | cert, err := x509.ParseCertificate(certRaw) |
| 352 | if err != nil { |
| 353 | panic(err) |
| 354 | } |
| 355 | kv := s.cluster.ConsensusKVRoot() |
| 356 | ca, err := ca.Load(ctx, kv) |
| 357 | if err != nil { |
| 358 | return nil, status.Errorf(codes.Unavailable, "could not load CA: %v", err) |
| 359 | } |
| 360 | etcdCert, etcdKey, err := ca.Issue(ctx, kv, cert.Subject.CommonName, ip) |
| 361 | if err != nil { |
| 362 | return nil, status.Errorf(codes.Unavailable, "could not generate etcd peer certificate: %v", err) |
| 363 | } |
| 364 | etcdCRL, err := ca.GetCurrentCRL(ctx, kv) |
| 365 | if err != nil { |
| 366 | return nil, status.Errorf(codes.Unavailable, "could not get etcd CRL: %v", err) |
| 367 | } |
| 368 | |
| 369 | // Add new etcd member to etcd cluster. |
| 370 | etcd := s.cluster.ConsensusCluster() |
| 371 | etcdAddr := fmt.Sprintf("https://%s:%d", ip.String(), common.ConsensusPort) |
| 372 | _, err = etcd.MemberAddAsLearner(ctx, []string{etcdAddr}) |
| 373 | if err != nil { |
| 374 | return nil, status.Errorf(codes.Unavailable, "could not add as new etcd consensus member: %v", err) |
| 375 | } |
| 376 | |
| 377 | return &apb.GetGoldenTicketResponse{ |
| 378 | Ticket: &apb.GoldenTicket{ |
| 379 | EtcdCaCert: ca.CACertRaw, |
| 380 | EtcdClientCert: etcdCert, |
| 381 | EtcdClientKey: etcdKey, |
| 382 | EtcdCrl: etcdCRL, |
| 383 | Peers: []*apb.GoldenTicket_EtcdPeer{ |
| 384 | {Name: this.ID(), Address: this.Address().String()}, |
| 385 | }, |
| 386 | This: &apb.GoldenTicket_EtcdPeer{Name: cert.Subject.CommonName, Address: ip.String()}, |
| 387 | |
| 388 | NodeId: cert.Subject.CommonName, |
| 389 | NodeCert: certRaw, |
| 390 | NodeKey: key, |
| 391 | }, |
| 392 | }, nil |
| 393 | } |