m/node: add Management service, implement GetRegisterTicket RPC

This follows the Cluster Lifecycle design document.

DO NOT MERGE: this needs a stacked CL on top which implements
authentication for the Management service.

Change-Id: I19422a63b9dbf2fc0c7f4cbe204851af35b4dbdf
Reviewed-on: https://review.monogon.dev/c/monogon/+/307
Reviewed-by: Mateusz Zalega <mateusz@monogon.tech>
diff --git a/metropolis/node/core/curator/BUILD.bazel b/metropolis/node/core/curator/BUILD.bazel
index 18bb351..4e98184 100644
--- a/metropolis/node/core/curator/BUILD.bazel
+++ b/metropolis/node/core/curator/BUILD.bazel
@@ -9,6 +9,7 @@
         "impl_leader.go",
         "impl_leader_aaa.go",
         "impl_leader_curator.go",
+        "impl_leader_management.go",
         "listener.go",
         "state_node.go",
         "state_pki.go",
@@ -45,6 +46,7 @@
     name = "go_default_test",
     srcs = [
         "curator_test.go",
+        "impl_leader_test.go",
         "listener_test.go",
     ],
     embed = [":go_default_library"],
@@ -54,6 +56,7 @@
         "//metropolis/node/core/localstorage/declarative:go_default_library",
         "//metropolis/pkg/event/memory:go_default_library",
         "//metropolis/pkg/supervisor:go_default_library",
+        "//metropolis/proto/api:go_default_library",
         "@io_etcd_go_etcd//clientv3:go_default_library",
         "@io_etcd_go_etcd//integration:go_default_library",
         "@org_golang_google_grpc//codes:go_default_library",
diff --git a/metropolis/node/core/curator/impl_follower.go b/metropolis/node/core/curator/impl_follower.go
index c258778..cf01b47 100644
--- a/metropolis/node/core/curator/impl_follower.go
+++ b/metropolis/node/core/curator/impl_follower.go
@@ -1,6 +1,8 @@
 package curator
 
 import (
+	"context"
+
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/status"
 
@@ -18,3 +20,7 @@
 func (f *curatorFollower) Escrow(srv apb.AAA_EscrowServer) error {
 	return status.Error(codes.Unimplemented, "curator follower not implemented")
 }
+
+func (f *curatorFollower) GetRegisterTicket(_ context.Context, _ *apb.GetRegisterTicketRequest) (*apb.GetRegisterTicketResponse, error) {
+	return nil, status.Error(codes.Unimplemented, "curator follower not implemented")
+}
diff --git a/metropolis/node/core/curator/impl_leader.go b/metropolis/node/core/curator/impl_leader.go
index 9c4414a..79e57e2 100644
--- a/metropolis/node/core/curator/impl_leader.go
+++ b/metropolis/node/core/curator/impl_leader.go
@@ -66,11 +66,13 @@
 type curatorLeader struct {
 	leaderCurator
 	leaderAAA
+	leaderManagement
 }
 
 func newCuratorLeader(l leadership) *curatorLeader {
 	return &curatorLeader{
 		leaderCurator{l},
 		leaderAAA{l},
+		leaderManagement{l},
 	}
 }
diff --git a/metropolis/node/core/curator/impl_leader_management.go b/metropolis/node/core/curator/impl_leader_management.go
new file mode 100644
index 0000000..4e519b3
--- /dev/null
+++ b/metropolis/node/core/curator/impl_leader_management.go
@@ -0,0 +1,78 @@
+package curator
+
+import (
+	"context"
+	"crypto/rand"
+
+	"go.etcd.io/etcd/clientv3"
+	"google.golang.org/grpc/codes"
+	"google.golang.org/grpc/status"
+	"google.golang.org/protobuf/proto"
+
+	cpb "source.monogon.dev/metropolis/node/core/curator/proto/private"
+	apb "source.monogon.dev/metropolis/proto/api"
+)
+
+type leaderManagement struct {
+	leadership
+}
+
+const (
+	// registerTicketSize is the size, in bytes, of the RegisterTicket used to
+	// perform early perimeter checks for nodes which wish to register into the
+	// cluster.
+	//
+	// The size was picked to offer resistance against on-line bruteforcing attacks
+	// in even the worst case scenario (no ratelimiting, no monitoring, zero latency
+	// between attacker and cluster). 256 bits of entropy require 3.6e68 requests
+	// per second to bruteforce the ticket within 10 years. The ticket doesn't need
+	// to be manually copied by humans, so the relatively overkill size also doesn't
+	// impact usability.
+	registerTicketSize = 32
+)
+
+const (
+	// registerTicketEtcdPath is the etcd key under which private.RegisterTicket is
+	// stored.
+	registerTicketEtcdPath = "/global/register_ticket"
+)
+
+func (l *leaderManagement) GetRegisterTicket(ctx context.Context, req *apb.GetRegisterTicketRequest) (*apb.GetRegisterTicketResponse, error) {
+	// TODO9(q3k): authenticate and authorize
+
+	// Retrieve existing ticket, if any.
+	res, err := l.txnAsLeader(ctx, clientv3.OpGet(registerTicketEtcdPath))
+	if err != nil {
+		return nil, status.Errorf(codes.Unavailable, "could not retrieve register ticket: %v", err)
+	}
+	kvs := res.Responses[0].GetResponseRange().Kvs
+	if len(kvs) > 0 {
+		// Ticket already generated, return.
+		return &apb.GetRegisterTicketResponse{
+			Ticket: kvs[0].Value,
+		}, nil
+	}
+
+	// No ticket, generate one.
+	ticket := &cpb.RegisterTicket{
+		Opaque: make([]byte, registerTicketSize),
+	}
+	_, err = rand.Read(ticket.Opaque)
+	if err != nil {
+		return nil, status.Errorf(codes.Unavailable, "could not generate new ticket: %v", err)
+	}
+	ticketBytes, err := proto.Marshal(ticket)
+	if err != nil {
+		return nil, status.Errorf(codes.Unavailable, "could not marshal new ticket: %v", err)
+	}
+
+	// Commit new ticket to etcd.
+	_, err = l.txnAsLeader(ctx, clientv3.OpPut(registerTicketEtcdPath, string(ticketBytes)))
+	if err != nil {
+		return nil, status.Errorf(codes.Unavailable, "could not save new ticket: %v", err)
+	}
+
+	return &apb.GetRegisterTicketResponse{
+		Ticket: ticketBytes,
+	}, nil
+}
diff --git a/metropolis/node/core/curator/impl_leader_test.go b/metropolis/node/core/curator/impl_leader_test.go
new file mode 100644
index 0000000..0145e0c
--- /dev/null
+++ b/metropolis/node/core/curator/impl_leader_test.go
@@ -0,0 +1,82 @@
+package curator
+
+import (
+	"bytes"
+	"context"
+	"testing"
+
+	"go.etcd.io/etcd/integration"
+
+	"source.monogon.dev/metropolis/node/core/consensus/client"
+	apb "source.monogon.dev/metropolis/proto/api"
+)
+
+// fakeLeader creates a curatorLeader without any underlying leader election, in
+// its own etcd namespace.
+//
+// This is used to test functionality of the individual curatorLeader RPC
+// implementations without the overhead of having to wait for a leader election.
+func fakeLeader(t *testing.T) (*curatorLeader, context.CancelFunc) {
+	t.Helper()
+	// Set up context whose cancel function will be returned to the user for
+	// terminating all harnesses started by this function.
+	ctx, ctxC := context.WithCancel(context.Background())
+
+	// Start a single-node etcd cluster.
+	cluster := integration.NewClusterV3(nil, &integration.ClusterConfig{
+		Size: 1,
+	})
+	// Terminate the etcd cluster on context cancel.
+	go func() {
+		<-ctx.Done()
+		cluster.Terminate(nil)
+	}()
+
+	// Create etcd client to test cluster.
+	cl := client.NewLocal(cluster.Client(0))
+
+	// Create a fake lock key/value and retrieve its revision. This replaces the
+	// leader election functionality in the curator to enable faster and more
+	// focused tests.
+	lockKey := "/test-lock"
+	res, err := cl.Put(ctx, lockKey, "fake key")
+	if err != nil {
+		t.Fatalf("setting fake leader key failed: %v", err)
+	}
+	lockRev := res.Header.Revision
+
+	// Return a normal curator leader object that directly implements the tested
+	// RPC methods. This will be exercised by tests.
+	return newCuratorLeader(leadership{
+		lockKey: lockKey,
+		lockRev: lockRev,
+		etcd:    cl,
+	}), ctxC
+}
+
+// TestManagementRegisterTicket exercises the Management.GetRegisterTicket RPC.
+func TestManagementRegisterTicket(t *testing.T) {
+	l, cancel := fakeLeader(t)
+	defer cancel()
+
+	ctx, ctxC := context.WithCancel(context.Background())
+	defer ctxC()
+
+	// Retrieve ticket twice.
+	res1, err := l.GetRegisterTicket(ctx, &apb.GetRegisterTicketRequest{})
+	if err != nil {
+		t.Fatalf("GetRegisterTicket failed: %v", err)
+	}
+	res2, err := l.GetRegisterTicket(ctx, &apb.GetRegisterTicketRequest{})
+	if err != nil {
+		t.Fatalf("GetRegisterTicket failed: %v", err)
+	}
+
+	// Ensure tickets are set and the same.
+	if len(res1.Ticket) == 0 {
+		t.Errorf("Ticket is empty")
+	}
+	if !bytes.Equal(res1.Ticket, res2.Ticket) {
+		t.Errorf("Unexpected ticket change between calls")
+	}
+}
diff --git a/metropolis/node/core/curator/listener.go b/metropolis/node/core/curator/listener.go
index 3b98315..b4f2b4d 100644
--- a/metropolis/node/core/curator/listener.go
+++ b/metropolis/node/core/curator/listener.go
@@ -118,6 +118,7 @@
 type services interface {
 	cpb.CuratorServer
 	apb.AAAServer
+	apb.ManagementServer
 }
 
 // activeTarget is the active implementation used by the listener dispatcher, or
@@ -208,6 +209,7 @@
 
 	cpb.RegisterCuratorServer(srvLocal, l)
 	apb.RegisterAAAServer(srvPublic, l)
+	apb.RegisterManagementServer(srvPublic, l)
 
 	err := supervisor.Run(ctx, "local", func(ctx context.Context) error {
 		lisLocal, err := net.ListenUnix("unix", &net.UnixAddr{Name: l.directory.ClientSocket.FullPath(), Net: "unix"})
@@ -354,3 +356,12 @@
 		})
 	})
 }
+
+func (l *listener) GetRegisterTicket(ctx context.Context, req *apb.GetRegisterTicketRequest) (res *apb.GetRegisterTicketResponse, err error) {
+	err = l.callImpl(ctx, func(ctx context.Context, impl services) error {
+		var err2 error
+		res, err2 = impl.GetRegisterTicket(ctx, req)
+		return err2
+	})
+	return
+}
diff --git a/metropolis/node/core/curator/proto/private/storage.proto b/metropolis/node/core/curator/proto/private/storage.proto
index e41cc79..3208cdb 100644
--- a/metropolis/node/core/curator/proto/private/storage.proto
+++ b/metropolis/node/core/curator/proto/private/storage.proto
@@ -40,3 +40,15 @@
     // ED25519 public key of cluster owner.
     bytes public_key = 1;
 }
+
+// A blob which needs to be provided by nodes registering into the cluster.
+// Presenting this ticket on registration does not automatically grant access
+// to arbitrary node registration. Instead it is used to guard the API surface
+// of the Register RPC from potential denial of service attacks, and can be
+// regenerated at any time in case it leaks.
+//
+// Stored under /global/register_ticket (see curator.registerTicketEtcdPath).
+message RegisterTicket {
+    bytes opaque = 1;
+}
+
diff --git a/metropolis/proto/api/BUILD.bazel b/metropolis/proto/api/BUILD.bazel
index 5004440..61d14fb 100644
--- a/metropolis/proto/api/BUILD.bazel
+++ b/metropolis/proto/api/BUILD.bazel
@@ -8,6 +8,7 @@
         "aaa.proto",
         "configuration.proto",
         "debug.proto",
+        "management.proto",
     ],
     visibility = ["//visibility:public"],
 )
diff --git a/metropolis/proto/api/management.proto b/metropolis/proto/api/management.proto
new file mode 100644
index 0000000..c0b8332
--- /dev/null
+++ b/metropolis/proto/api/management.proto
@@ -0,0 +1,22 @@
+syntax = "proto3";
+package metropolis.proto.api;
+option go_package = "source.monogon.dev/metropolis/proto/api";
+
+// Management service available to Cluster Managers.
+service Management {
+    // GetRegisterTicket retrieves the current RegisterTicket which is required
+    // for new nodes to register into the cluster. Presenting this ticket on
+    // registration does not automatically grant access to arbitrary node
+    // registration. Instead, it is used to guard the API surface of the
+    // Register RPC from potential denial of service attacks, and can be
+    // regenerated at any time in case it leaks.
+    rpc GetRegisterTicket(GetRegisterTicketRequest) returns (GetRegisterTicketResponse);
+}
+
+message GetRegisterTicketRequest {
+}
+
+message GetRegisterTicketResponse {
+    // Opaque bytes that comprise the RegisterTicket.
+    bytes ticket = 1;
+}
\ No newline at end of file