core/internal/cluster: implement multi-node clusters with 'golden ticket'.
As we have fully ripped out all traces of the node management service or
integrity checks, we implement a stopgap system that allows us to
continue developing multi-node clusters. This mechanism is enrolment
using 'golden tickets', which are protobuf messages that can be
generated via the debug service on an existing cluster, and set on a new
node's EnrolmentConfig to bring that enrol that node into the cluster.
As this is a stopgap measure (waiting for better cluster lifecycle
design), this is somewhat poorly implemented, with known issues:
- odd enrolment flow that creates all certificates off-node and results
in some code duplication in the cluster manager and node debug
service
- (more) assumptions that every node is both a kubernetes and etcd
member.
- absolutely no protection against consensus loss due to even quorum
membership, repeated issuance of certificates
- dependence on knowing the IP address of the new node ahead of time,
which is not something that our test harness supports well (or that
we want to rely on at all)
Test Plan: part of existing multi-node tests
X-Origin-Diff: phab/D591
GitOrigin-RevId: 8f099e6ef37f8d47fb2272a3a14b25ed480e377a
diff --git a/core/cmd/dbg/main.go b/core/cmd/dbg/main.go
index e0bde55..f2d8fc0 100644
--- a/core/cmd/dbg/main.go
+++ b/core/cmd/dbg/main.go
@@ -58,6 +58,7 @@
fmt.Fprintf(os.Stderr, "Example:\n %s %s --tail 5 kube.apiserver\n", os.Args[0], os.Args[1])
}
+ goldenticketCmd := flag.NewFlagSet("goldenticket", flag.ExitOnError)
conditionCmd := flag.NewFlagSet("condition", flag.ExitOnError)
conditionCmd.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: %s %s [options] component_path\n", os.Args[0], os.Args[1])
@@ -78,6 +79,15 @@
fmt.Println(line)
}
return
+ case "goldenticket":
+ goldenticketCmd.Parse(os.Args[2:])
+ ip := goldenticketCmd.Arg(0)
+ res, err := debugClient.GetGoldenTicket(ctx, &apb.GetGoldenTicketRequest{ExternalIp: ip})
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "Failed to get golden ticket: %v\n", err)
+ os.Exit(1)
+ }
+ fmt.Println(res.Ticket)
case "kubectl":
// Always get a kubeconfig with cluster-admin (group system:masters), kubectl itself can impersonate
kubeconfigFile, err := ioutil.TempFile("", "dbg_kubeconfig")
diff --git a/core/cmd/init/BUILD.bazel b/core/cmd/init/BUILD.bazel
index 2f8103f..34b666f 100644
--- a/core/cmd/init/BUILD.bazel
+++ b/core/cmd/init/BUILD.bazel
@@ -17,6 +17,7 @@
"//core/internal/cluster:go_default_library",
"//core/internal/common:go_default_library",
"//core/internal/common/supervisor:go_default_library",
+ "//core/internal/consensus/ca:go_default_library",
"//core/internal/containerd:go_default_library",
"//core/internal/kubernetes:go_default_library",
"//core/internal/kubernetes/pki:go_default_library",
diff --git a/core/cmd/init/main.go b/core/cmd/init/main.go
index 0dc7d5e..0c19ef9 100644
--- a/core/cmd/init/main.go
+++ b/core/cmd/init/main.go
@@ -18,8 +18,12 @@
import (
"context"
+ "crypto/ed25519"
+ "crypto/rand"
+ "crypto/x509"
"fmt"
"log"
+ "math/big"
"net"
"os"
"os/signal"
@@ -28,10 +32,13 @@
"go.uber.org/zap"
"golang.org/x/sys/unix"
"google.golang.org/grpc"
+ "google.golang.org/grpc/codes"
+ "google.golang.org/grpc/status"
"git.monogon.dev/source/nexantic.git/core/internal/cluster"
"git.monogon.dev/source/nexantic.git/core/internal/common"
"git.monogon.dev/source/nexantic.git/core/internal/common/supervisor"
+ "git.monogon.dev/source/nexantic.git/core/internal/consensus/ca"
"git.monogon.dev/source/nexantic.git/core/internal/containerd"
"git.monogon.dev/source/nexantic.git/core/internal/kubernetes"
"git.monogon.dev/source/nexantic.git/core/internal/kubernetes/pki"
@@ -268,3 +275,91 @@
}
}
}
+
+// nodeCertificate creates a node key/certificate for a foreign node. This is duplicated code with localstorage's
+// PKIDirectory EnsureSelfSigned, but is temporary (and specific to 'golden tickets').
+func (s *debugService) nodeCertificate() (cert, key []byte, err error) {
+ pubKey, privKey, err := ed25519.GenerateKey(rand.Reader)
+ if err != nil {
+ err = fmt.Errorf("failed to generate key: %w", err)
+ return
+ }
+
+ key, err = x509.MarshalPKCS8PrivateKey(privKey)
+ if err != nil {
+ err = fmt.Errorf("failed to marshal key: %w", err)
+ return
+ }
+
+ serialNumberLimit := new(big.Int).Lsh(big.NewInt(1), 127)
+ serialNumber, err := rand.Int(rand.Reader, serialNumberLimit)
+ if err != nil {
+ err = fmt.Errorf("failed to generate serial number: %w", err)
+ return
+ }
+
+ template := localstorage.CertificateForNode(pubKey)
+ template.SerialNumber = serialNumber
+
+ cert, err = x509.CreateCertificate(rand.Reader, &template, &template, pubKey, privKey)
+ if err != nil {
+ err = fmt.Errorf("could not sign certificate: %w", err)
+ return
+ }
+ return
+}
+
+func (s *debugService) GetGoldenTicket(ctx context.Context, req *apb.GetGoldenTicketRequest) (*apb.GetGoldenTicketResponse, error) {
+ ip := net.ParseIP(req.ExternalIp)
+ if ip == nil {
+ return nil, status.Errorf(codes.InvalidArgument, "could not parse IP %q", req.ExternalIp)
+ }
+ this := s.cluster.Node()
+
+ certRaw, key, err := s.nodeCertificate()
+ if err != nil {
+ return nil, status.Errorf(codes.Unavailable, "failed to generate node certificate: %v", err)
+ }
+ cert, err := x509.ParseCertificate(certRaw)
+ if err != nil {
+ panic(err)
+ }
+ kv := s.cluster.ConsensusKVRoot()
+ ca, err := ca.Load(ctx, kv)
+ if err != nil {
+ return nil, status.Errorf(codes.Unavailable, "could not load CA: %v", err)
+ }
+ etcdCert, etcdKey, err := ca.Issue(ctx, kv, cert.Subject.CommonName, ip)
+ if err != nil {
+ return nil, status.Errorf(codes.Unavailable, "could not generate etcd peer certificate: %v", err)
+ }
+ etcdCRL, err := ca.GetCurrentCRL(ctx, kv)
+ if err != nil {
+ return nil, status.Errorf(codes.Unavailable, "could not get etcd CRL: %v", err)
+ }
+
+ // Add new etcd member to etcd cluster.
+ etcd := s.cluster.ConsensusCluster()
+ etcdAddr := fmt.Sprintf("https://%s:%d", ip.String(), common.ConsensusPort)
+ _, err = etcd.MemberAddAsLearner(ctx, []string{etcdAddr})
+ if err != nil {
+ return nil, status.Errorf(codes.Unavailable, "could not add as new etcd consensus member: %v", err)
+ }
+
+ return &apb.GetGoldenTicketResponse{
+ Ticket: &apb.GoldenTicket{
+ EtcdCaCert: ca.CACertRaw,
+ EtcdClientCert: etcdCert,
+ EtcdClientKey: etcdKey,
+ EtcdCrl: etcdCRL,
+ Peers: []*apb.GoldenTicket_EtcdPeer{
+ {Name: this.ID(), Address: this.Address().String()},
+ },
+ This: &apb.GoldenTicket_EtcdPeer{Name: cert.Subject.CommonName, Address: ip.String()},
+
+ NodeId: cert.Subject.CommonName,
+ NodeCert: certRaw,
+ NodeKey: key,
+ },
+ }, nil
+}
diff --git a/core/cmd/launch-multi2/BUILD.bazel b/core/cmd/launch-multi2/BUILD.bazel
index 867838a..87f4c88 100644
--- a/core/cmd/launch-multi2/BUILD.bazel
+++ b/core/cmd/launch-multi2/BUILD.bazel
@@ -8,6 +8,7 @@
deps = [
"//core/internal/common:go_default_library",
"//core/internal/launch:go_default_library",
+ "//core/proto/api:go_default_library",
"@com_github_grpc_ecosystem_go_grpc_middleware//retry:go_default_library",
"@org_golang_google_grpc//:go_default_library",
],
diff --git a/core/cmd/launch-multi2/main.go b/core/cmd/launch-multi2/main.go
index 2a38cef..763395d 100644
--- a/core/cmd/launch-multi2/main.go
+++ b/core/cmd/launch-multi2/main.go
@@ -29,6 +29,7 @@
"git.monogon.dev/source/nexantic.git/core/internal/common"
"git.monogon.dev/source/nexantic.git/core/internal/launch"
+ apb "git.monogon.dev/source/nexantic.git/core/proto/api"
)
func main() {
@@ -66,15 +67,28 @@
opts := []grpcretry.CallOption{
grpcretry.WithBackoff(grpcretry.BackoffExponential(100 * time.Millisecond)),
}
- conn, err := nanoswitchPortMap.DialGRPC(common.ExternalServicePort, grpc.WithInsecure(),
+ conn, err := nanoswitchPortMap.DialGRPC(common.DebugServicePort, grpc.WithInsecure(),
grpc.WithUnaryInterceptor(grpcretry.UnaryClientInterceptor(opts...)))
if err != nil {
panic(err)
}
defer conn.Close()
- // TODO(D591): this gets implemented there.
- _ = vm1
- panic("unimplemented")
+ debug := apb.NewNodeDebugServiceClient(conn)
+ res, err := debug.GetGoldenTicket(ctx, &apb.GetGoldenTicketRequest{
+ // HACK: this is assigned by DHCP, and we assume that everything goes well.
+ ExternalIp: "10.1.0.3",
+ }, grpcretry.WithMax(10))
+ if err != nil {
+ log.Fatalf("Failed to get golden ticket: %v", err)
+ }
+
+ ec := &apb.EnrolmentConfig{
+ GoldenTicket: res.Ticket,
+ }
+
+ if err := launch.Launch(ctx, launch.Options{ConnectToSocket: vm1, EnrolmentConfig: ec, SerialPort: os.Stdout}); err != nil {
+ log.Fatalf("Failed to launch vm1: %v", err)
+ }
}()
if err := launch.RunMicroVM(ctx, &launch.MicroVMOptions{
SerialPort: os.Stdout,
diff --git a/core/internal/cluster/BUILD.bazel b/core/internal/cluster/BUILD.bazel
index 99a6eac..3efdab0 100644
--- a/core/internal/cluster/BUILD.bazel
+++ b/core/internal/cluster/BUILD.bazel
@@ -9,10 +9,13 @@
importpath = "git.monogon.dev/source/nexantic.git/core/internal/cluster",
visibility = ["//core:__subpackages__"],
deps = [
+ "//core/internal/common:go_default_library",
"//core/internal/common/supervisor:go_default_library",
"//core/internal/consensus:go_default_library",
"//core/internal/localstorage:go_default_library",
+ "//core/internal/localstorage/declarative:go_default_library",
"//core/internal/network:go_default_library",
+ "//core/proto/api:go_default_library",
"//core/proto/internal:go_default_library",
"@com_github_cenkalti_backoff_v4//:go_default_library",
"@com_github_golang_protobuf//proto:go_default_library",
diff --git a/core/internal/cluster/manager.go b/core/internal/cluster/manager.go
index 7e5af9f..dfdab36 100644
--- a/core/internal/cluster/manager.go
+++ b/core/internal/cluster/manager.go
@@ -18,19 +18,27 @@
import (
"context"
+ "crypto/x509"
+ "encoding/pem"
"fmt"
"io/ioutil"
"os"
+ "strings"
"sync"
"time"
+ apb "git.monogon.dev/source/nexantic.git/core/proto/api"
+
"github.com/cenkalti/backoff/v4"
+ "github.com/golang/protobuf/proto"
"go.etcd.io/etcd/clientv3"
"go.uber.org/zap"
+ "git.monogon.dev/source/nexantic.git/core/internal/common"
"git.monogon.dev/source/nexantic.git/core/internal/common/supervisor"
"git.monogon.dev/source/nexantic.git/core/internal/consensus"
"git.monogon.dev/source/nexantic.git/core/internal/localstorage"
+ "git.monogon.dev/source/nexantic.git/core/internal/localstorage/declarative"
"git.monogon.dev/source/nexantic.git/core/internal/network"
)
@@ -67,6 +75,9 @@
// reaches a final state (Running or Failed respectively).
stateWaiters []chan bool
+ // goldenTicket is the Golden Ticket present in the enrolment config, if any.
+ goldenTicket *apb.GoldenTicket
+
// consensus is the spawned etcd/consensus service, if the Manager brought up a Node that should run one.
consensus *consensus.Service
}
@@ -89,6 +100,9 @@
// StateCreatingCluster is when the Manager attempts to create a new cluster - this happens when a node is started
// with no EnrolmentConfig.
StateCreatingCluster
+ // StateCharlie is when the Manager uses the Golden Ticket debug/stopgap system to join an already
+ // existing cluster. This mechanism will be removed before the first Smalltown release.
+ StateCharlie
// StateRunning is when the Manager successfully got the node to be part of a cluster. stateRunningNode is valid.
StateRunning
// StateFailed is when the Manager failed to ge the node to be part of a cluster.
@@ -101,6 +115,8 @@
return "New"
case StateCreatingCluster:
return "CreatingCluster"
+ case StateCharlie:
+ return "Charlie"
case StateRunning:
return "Running"
case StateFailed:
@@ -112,8 +128,9 @@
// allowedTransition describes all allowed state transitions (map[From][]To).
var allowedTransitions = map[State][]State{
- StateNew: {StateCreatingCluster},
+ StateNew: {StateCreatingCluster, StateCharlie},
StateCreatingCluster: {StateRunning, StateFailed},
+ StateCharlie: {StateRunning, StateFailed},
}
// allowed returns whether a transition from a state to another state is allowed (ie. is defined in allowedTransitions).
@@ -207,6 +224,8 @@
err = m.stateNew(ctx)
case StateCreatingCluster:
err = m.stateCreatingCluster(ctx)
+ case StateCharlie:
+ err = m.stateCharlie(ctx)
default:
done = true
break
@@ -263,8 +282,21 @@
return nil
}
- // Enrolment file exists, this is not yet implemented (need to enroll into or join existing cluster).
- return fmt.Errorf("unimplemented join/enroll")
+ // Enrolment file exists, parse it.
+
+ enrolmentConfig := apb.EnrolmentConfig{}
+ if err := proto.Unmarshal(configRaw, &enrolmentConfig); err != nil {
+ return fmt.Errorf("could not unmarshal local enrolment file: %w", err)
+ }
+
+ // If no join ticket exists, we can't do anything yet.
+ if enrolmentConfig.GoldenTicket == nil {
+ return fmt.Errorf("joining a cluster without a golden ticket not yet implemented")
+ }
+
+ // Otherwise, we begin enrolling with the Golden Ticket.
+ m.next(ctx, StateCharlie)
+ return nil
}
// stateCreatingCluster is called when the Manager has decided to create a new cluster.
@@ -349,6 +381,122 @@
return nil
}
+// stateCharlie is used to join an existing cluster via the GoldenTicket mechanism. This mechanism is temporarily
+// implemented in Smalltown in order to allow for testing multi-node clusters without a TPM attestation flow implemented.
+// The Golden Ticket contains a pregenerated node certificate, etcd certificate, and other data that any node can
+// use to join the cluster.
+// Since this flow is temporary, it has a slight impedance mismatch with methods exposed by localstorage, node, etc.,
+// and the resulting sequencing is a bit odd:
+// - the {node,etcd} certificates/keys are loaded (this already dictates the new node name, as the node name is based
+// off of the node public key)
+// - local storage is initialized, a local/cluster unlock keypair is generated
+// - etcd keys are manually saved to localstorage (vs. being generated locally by CA)
+// - an etcd/consensus member is started, knowing that the remote member was already generated when the golden ticket
+// was generated (vs. being created now by an RPC call, via an promote-node-to-etcd-member flow)
+// - the node is then promoted to a consensus member and kubernetes worker, its clusterunlock key is set, and then it
+// is saved to etcd.
+// As such, in this flow, we first create an etcd member (on goldenticket generation), and then only create a new Smalltown
+// node (when the goldenticket is used).
+func (m *Manager) stateCharlie(ctx context.Context) error {
+ t := m.goldenTicket
+ nodeCert, err := x509.ParseCertificate(t.NodeCert)
+ if err != nil {
+ return fmt.Errorf("parsing node certificate from ticket: %w", err)
+ }
+
+ supervisor.Logger(ctx).Info("Joining cluster: waiting for IP address...")
+ ip, err := m.networkService.GetIP(ctx, true)
+ if err != nil {
+ return fmt.Errorf("when getting IP address: %w", err)
+ }
+ supervisor.Logger(ctx).Info("Joining cluster: got IP address", zap.String("address", ip.String()))
+
+ supervisor.Logger(ctx).Info("Joining cluster: initializing storage...")
+ cuk, err := m.storageRoot.Data.MountNew(&m.storageRoot.ESP.LocalUnlock)
+ if err != nil {
+ return fmt.Errorf("when making new data partition: %w", err)
+ }
+ supervisor.Logger(ctx).Info("Joining cluster: storage initialized")
+ node := NewNode(cuk, *ip, *nodeCert)
+
+ // Save etcd PKI to disk.
+ for _, f := range []struct {
+ target declarative.FilePlacement
+ data []byte
+ blockType string
+ }{
+ {m.storageRoot.Data.Etcd.PeerPKI.Key, t.EtcdClientKey, "PRIVATE KEY"},
+ {m.storageRoot.Data.Etcd.PeerPKI.Certificate, t.EtcdClientCert, "CERTIFICATE"},
+ {m.storageRoot.Data.Etcd.PeerPKI.CACertificate, t.EtcdCaCert, "CERTIFICATE"},
+ } {
+ if err := f.target.Write(pem.EncodeToMemory(&pem.Block{Type: f.blockType, Bytes: f.data}), 0600); err != nil {
+ return fmt.Errorf("when writing etcd PKI data: %w", err)
+ }
+ }
+ if err := m.storageRoot.Data.Etcd.PeerCRL.Write(t.EtcdCrl, 0600); err != nil {
+ return fmt.Errorf("when writing etcd CRL: %w", err)
+ }
+
+ https := func(p *apb.GoldenTicket_EtcdPeer) string {
+ return fmt.Sprintf("%s=https://%s:%d", p.Name, p.Address, common.ConsensusPort)
+ }
+ var initialCluster []string
+ for _, p := range t.Peers {
+ initialCluster = append(initialCluster, https(p))
+ }
+ initialCluster = append(initialCluster, https(t.This))
+
+ supervisor.Logger(ctx).Info("Joining cluster: starting etcd join...",
+ zap.String("initial_cluster", strings.Join(initialCluster, ",")), zap.String("name", node.ID()))
+ m.consensus = consensus.New(consensus.Config{
+ Data: &m.storageRoot.Data.Etcd,
+ Ephemeral: &m.storageRoot.Ephemeral.Consensus,
+ Name: node.ID(),
+ InitialCluster: strings.Join(initialCluster, ","),
+ ExternalHost: ip.String(),
+ ListenHost: ip.String(),
+ })
+
+ if err := supervisor.Run(ctx, "consensus", m.consensus.Run); err != nil {
+ return fmt.Errorf("when starting consensus: %w", err)
+ }
+
+ // TODO(q3k): make timeout configurable?
+ ctxT, ctxC := context.WithTimeout(ctx, 5*time.Second)
+ defer ctxC()
+
+ supervisor.Logger(ctx).Info("Joining cluster: waiting for consensus...")
+ if err := m.consensus.WaitReady(ctxT); err != nil {
+ return fmt.Errorf("consensus service failed to become ready: %w", err)
+ }
+
+ // Configure node to be a consensus member and kubernetes worker. In the future, different nodes will have
+ // different roles, but for now they're all symmetrical.
+ _, consensusName, err := m.consensus.MemberInfo(ctx)
+ if err != nil {
+ return fmt.Errorf("could not get consensus MemberInfo: %w", err)
+ }
+ if err := node.MakeConsensusMember(consensusName); err != nil {
+ return fmt.Errorf("could not make new node into consensus member: %w", err)
+ }
+ if err := node.MakeKubernetesWorker(node.ID()); err != nil {
+ return fmt.Errorf("could not make new node into kubernetes worker: %w", err)
+ }
+
+ // Save node into etcd.
+ supervisor.Logger(ctx).Info("Creating new cluster: storing first node...")
+ if err := node.Store(ctx, m.consensus.KV("cluster", "enrolment")); err != nil {
+ return fmt.Errorf("could not save new node: %w", err)
+ }
+
+ m.stateLock.Lock()
+ m.stateRunningNode = node
+ m.stateLock.Unlock()
+
+ m.next(ctx, StateRunning)
+ return nil
+}
+
// Node returns the Node that the Manager brought into a cluster, or nil if the Manager is not Running.
// This is safe to call from any goroutine.
func (m *Manager) Node() *Node {
diff --git a/core/proto/api/debug.proto b/core/proto/api/debug.proto
index ec96591..74d314a 100644
--- a/core/proto/api/debug.proto
+++ b/core/proto/api/debug.proto
@@ -28,6 +28,9 @@
rpc GetDebugKubeconfig(GetDebugKubeconfigRequest) returns (GetDebugKubeconfigResponse);
// GetComponentLogs dumps various log ringbuffers for binaries that we run.
rpc GetComponentLogs(GetComponentLogsRequest) returns (GetComponentLogsResponse);
+ // GetGoldenTicket requests a 'golden ticket' which can be used to enroll any node into the cluster.
+ // This bypasses integrity checks.
+ rpc GetGoldenTicket(GetGoldenTicketRequest) returns (GetGoldenTicketResponse);
}
@@ -49,3 +52,13 @@
message GetComponentLogsResponse {
repeated string line = 1;
}
+
+message GetGoldenTicketRequest {
+ // IP address at which the new node will run.
+ string external_ip = 1;
+}
+
+message GetGoldenTicketResponse {
+ // Ticket to use in the new node's EnrolmentConfig.
+ GoldenTicket ticket = 1;
+}
diff --git a/core/proto/api/enrolment.proto b/core/proto/api/enrolment.proto
index cf109ad..d4176cc 100644
--- a/core/proto/api/enrolment.proto
+++ b/core/proto/api/enrolment.proto
@@ -18,8 +18,46 @@
package smalltown.core.proto.api;
option go_package = "git.monogon.dev/source/nexantic.git/core/proto/api";
-// The EnrolmentConfig is one of the inputs for the integrity mechanism.
+// EnrolmentConfig is the single boot configuration file contained in the Smalltown ESP. It configures
+// the way the node will start up (what cluster it will join/enroll into/create).
message EnrolmentConfig {
+ // Debug/temporary cluster enrolment method. If set, the node will attempt to enroll into the
+ // cluster that this ticket was generated for. Otherwise, a new cluster will be created.
+ GoldenTicket golden_ticket = 1;
+
// Filled in by node after it is enrolled
- string node_id = 1;
+ string node_id = 2;
+}
+
+// GoldenTicket is a ticket that allows any node to enroll into a cluster, bypassing any integrity
+// checks.
+//
+// Currently, enrolling into a cluster does not use a TPM-based workflow, and instead
+// bases on a simplified workflow of joining consensus by being started with a
+// TLS client certificate. This is a short-circuit fix to allow multi-node
+// clusters for testing before we design the final cluster node lifecycle system.
+message GoldenTicket {
+ // Etcd peer CA certificate.
+ bytes etcd_ca_cert = 1;
+ // Etcd peer client certificate.
+ bytes etcd_client_cert = 2;
+ // Etcd peer client key.
+ bytes etcd_client_key = 3;
+ // Initial etcd peer CRL.
+ bytes etcd_crl = 4;
+
+ message EtcdPeer {
+ string name = 1;
+ string address = 2;
+ }
+ // All other current etcd peers in the cluster.
+ repeated EtcdPeer peers = 5;
+ // The peer that this node should start running.
+ EtcdPeer this = 6;
+
+ // Node configuration. Currently unused (in the future, this will be used to run a node
+ // management service separate from etcd clustering).
+ string node_id = 7;
+ bytes node_cert = 8;
+ bytes node_key = 9;
}