core/internal/cluster: implement multi-node clusters with 'golden ticket'.
As we have fully ripped out all traces of the node management service or
integrity checks, we implement a stopgap system that allows us to
continue developing multi-node clusters. This mechanism is enrolment
using 'golden tickets', which are protobuf messages that can be
generated via the debug service on an existing cluster, and set on a new
node's EnrolmentConfig to bring that enrol that node into the cluster.
As this is a stopgap measure (waiting for better cluster lifecycle
design), this is somewhat poorly implemented, with known issues:
- odd enrolment flow that creates all certificates off-node and results
in some code duplication in the cluster manager and node debug
service
- (more) assumptions that every node is both a kubernetes and etcd
member.
- absolutely no protection against consensus loss due to even quorum
membership, repeated issuance of certificates
- dependence on knowing the IP address of the new node ahead of time,
which is not something that our test harness supports well (or that
we want to rely on at all)
Test Plan: part of existing multi-node tests
X-Origin-Diff: phab/D591
GitOrigin-RevId: 8f099e6ef37f8d47fb2272a3a14b25ed480e377a
diff --git a/core/cmd/launch-multi2/BUILD.bazel b/core/cmd/launch-multi2/BUILD.bazel
index 867838a..87f4c88 100644
--- a/core/cmd/launch-multi2/BUILD.bazel
+++ b/core/cmd/launch-multi2/BUILD.bazel
@@ -8,6 +8,7 @@
deps = [
"//core/internal/common:go_default_library",
"//core/internal/launch:go_default_library",
+ "//core/proto/api:go_default_library",
"@com_github_grpc_ecosystem_go_grpc_middleware//retry:go_default_library",
"@org_golang_google_grpc//:go_default_library",
],
diff --git a/core/cmd/launch-multi2/main.go b/core/cmd/launch-multi2/main.go
index 2a38cef..763395d 100644
--- a/core/cmd/launch-multi2/main.go
+++ b/core/cmd/launch-multi2/main.go
@@ -29,6 +29,7 @@
"git.monogon.dev/source/nexantic.git/core/internal/common"
"git.monogon.dev/source/nexantic.git/core/internal/launch"
+ apb "git.monogon.dev/source/nexantic.git/core/proto/api"
)
func main() {
@@ -66,15 +67,28 @@
opts := []grpcretry.CallOption{
grpcretry.WithBackoff(grpcretry.BackoffExponential(100 * time.Millisecond)),
}
- conn, err := nanoswitchPortMap.DialGRPC(common.ExternalServicePort, grpc.WithInsecure(),
+ conn, err := nanoswitchPortMap.DialGRPC(common.DebugServicePort, grpc.WithInsecure(),
grpc.WithUnaryInterceptor(grpcretry.UnaryClientInterceptor(opts...)))
if err != nil {
panic(err)
}
defer conn.Close()
- // TODO(D591): this gets implemented there.
- _ = vm1
- panic("unimplemented")
+ debug := apb.NewNodeDebugServiceClient(conn)
+ res, err := debug.GetGoldenTicket(ctx, &apb.GetGoldenTicketRequest{
+ // HACK: this is assigned by DHCP, and we assume that everything goes well.
+ ExternalIp: "10.1.0.3",
+ }, grpcretry.WithMax(10))
+ if err != nil {
+ log.Fatalf("Failed to get golden ticket: %v", err)
+ }
+
+ ec := &apb.EnrolmentConfig{
+ GoldenTicket: res.Ticket,
+ }
+
+ if err := launch.Launch(ctx, launch.Options{ConnectToSocket: vm1, EnrolmentConfig: ec, SerialPort: os.Stdout}); err != nil {
+ log.Fatalf("Failed to launch vm1: %v", err)
+ }
}()
if err := launch.RunMicroVM(ctx, &launch.MicroVMOptions{
SerialPort: os.Stdout,