m/node: add Management service, implement GetRegisterTicket RPC
This follows the Cluster Lifecycle design document.
DO NOT MERGE: this needs a stacked CL on top which implements
authentication for the Management service.
Change-Id: I19422a63b9dbf2fc0c7f4cbe204851af35b4dbdf
Reviewed-on: https://review.monogon.dev/c/monogon/+/307
Reviewed-by: Mateusz Zalega <mateusz@monogon.tech>
diff --git a/metropolis/node/core/curator/BUILD.bazel b/metropolis/node/core/curator/BUILD.bazel
index 18bb351..4e98184 100644
--- a/metropolis/node/core/curator/BUILD.bazel
+++ b/metropolis/node/core/curator/BUILD.bazel
@@ -9,6 +9,7 @@
"impl_leader.go",
"impl_leader_aaa.go",
"impl_leader_curator.go",
+ "impl_leader_management.go",
"listener.go",
"state_node.go",
"state_pki.go",
@@ -45,6 +46,7 @@
name = "go_default_test",
srcs = [
"curator_test.go",
+ "impl_leader_test.go",
"listener_test.go",
],
embed = [":go_default_library"],
@@ -54,6 +56,7 @@
"//metropolis/node/core/localstorage/declarative:go_default_library",
"//metropolis/pkg/event/memory:go_default_library",
"//metropolis/pkg/supervisor:go_default_library",
+ "//metropolis/proto/api:go_default_library",
"@io_etcd_go_etcd//clientv3:go_default_library",
"@io_etcd_go_etcd//integration:go_default_library",
"@org_golang_google_grpc//codes:go_default_library",
diff --git a/metropolis/node/core/curator/impl_follower.go b/metropolis/node/core/curator/impl_follower.go
index c258778..cf01b47 100644
--- a/metropolis/node/core/curator/impl_follower.go
+++ b/metropolis/node/core/curator/impl_follower.go
@@ -1,6 +1,8 @@
package curator
import (
+ "context"
+
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
@@ -18,3 +20,7 @@
func (f *curatorFollower) Escrow(srv apb.AAA_EscrowServer) error {
return status.Error(codes.Unimplemented, "curator follower not implemented")
}
+
+func (f *curatorFollower) GetRegisterTicket(_ context.Context, _ *apb.GetRegisterTicketRequest) (*apb.GetRegisterTicketResponse, error) {
+ return nil, status.Error(codes.Unimplemented, "curator follower not implemented")
+}
diff --git a/metropolis/node/core/curator/impl_leader.go b/metropolis/node/core/curator/impl_leader.go
index 9c4414a..79e57e2 100644
--- a/metropolis/node/core/curator/impl_leader.go
+++ b/metropolis/node/core/curator/impl_leader.go
@@ -66,11 +66,13 @@
type curatorLeader struct {
leaderCurator
leaderAAA
+ leaderManagement
}
func newCuratorLeader(l leadership) *curatorLeader {
return &curatorLeader{
leaderCurator{l},
leaderAAA{l},
+ leaderManagement{l},
}
}
diff --git a/metropolis/node/core/curator/impl_leader_management.go b/metropolis/node/core/curator/impl_leader_management.go
new file mode 100644
index 0000000..4e519b3
--- /dev/null
+++ b/metropolis/node/core/curator/impl_leader_management.go
@@ -0,0 +1,78 @@
+package curator
+
+import (
+ "context"
+ "crypto/rand"
+
+ "go.etcd.io/etcd/clientv3"
+ "google.golang.org/grpc/codes"
+ "google.golang.org/grpc/status"
+ "google.golang.org/protobuf/proto"
+
+ cpb "source.monogon.dev/metropolis/node/core/curator/proto/private"
+ apb "source.monogon.dev/metropolis/proto/api"
+)
+
+type leaderManagement struct {
+ leadership
+}
+
+const (
+ // registerTicketSize is the size, in bytes, of the RegisterTicket used to
+ // perform early perimeter checks for nodes which wish to register into the
+ // cluster.
+ //
+ // The size was picked to offer resistance against on-line bruteforcing attacks
+ // in even the worst case scenario (no ratelimiting, no monitoring, zero latency
+ // between attacker and cluster). 256 bits of entropy require 3.6e68 requests
+ // per second to bruteforce the ticket within 10 years. The ticket doesn't need
+ // to be manually copied by humans, so the relatively overkill size also doesn't
+ // impact usability.
+ registerTicketSize = 32
+)
+
+const (
+ // registerTicketEtcdPath is the etcd key under which private.RegisterTicket is
+ // stored.
+ registerTicketEtcdPath = "/global/register_ticket"
+)
+
+func (l *leaderManagement) GetRegisterTicket(ctx context.Context, req *apb.GetRegisterTicketRequest) (*apb.GetRegisterTicketResponse, error) {
+ // TODO9(q3k): authenticate and authorize
+
+ // Retrieve existing ticket, if any.
+ res, err := l.txnAsLeader(ctx, clientv3.OpGet(registerTicketEtcdPath))
+ if err != nil {
+ return nil, status.Errorf(codes.Unavailable, "could not retrieve register ticket: %v", err)
+ }
+ kvs := res.Responses[0].GetResponseRange().Kvs
+ if len(kvs) > 0 {
+ // Ticket already generated, return.
+ return &apb.GetRegisterTicketResponse{
+ Ticket: kvs[0].Value,
+ }, nil
+ }
+
+ // No ticket, generate one.
+ ticket := &cpb.RegisterTicket{
+ Opaque: make([]byte, registerTicketSize),
+ }
+ _, err = rand.Read(ticket.Opaque)
+ if err != nil {
+ return nil, status.Errorf(codes.Unavailable, "could not generate new ticket: %v", err)
+ }
+ ticketBytes, err := proto.Marshal(ticket)
+ if err != nil {
+ return nil, status.Errorf(codes.Unavailable, "could not marshal new ticket: %v", err)
+ }
+
+ // Commit new ticket to etcd.
+ _, err = l.txnAsLeader(ctx, clientv3.OpPut(registerTicketEtcdPath, string(ticketBytes)))
+ if err != nil {
+ return nil, status.Errorf(codes.Unavailable, "could not save new ticket: %v", err)
+ }
+
+ return &apb.GetRegisterTicketResponse{
+ Ticket: ticketBytes,
+ }, nil
+}
diff --git a/metropolis/node/core/curator/impl_leader_test.go b/metropolis/node/core/curator/impl_leader_test.go
new file mode 100644
index 0000000..0145e0c
--- /dev/null
+++ b/metropolis/node/core/curator/impl_leader_test.go
@@ -0,0 +1,82 @@
+package curator
+
+import (
+ "bytes"
+ "context"
+ "testing"
+
+ "go.etcd.io/etcd/integration"
+
+ "source.monogon.dev/metropolis/node/core/consensus/client"
+ apb "source.monogon.dev/metropolis/proto/api"
+)
+
+// fakeLeader creates a curatorLeader without any underlying leader election, in
+// its own etcd namespace.
+//
+// This is used to test functionality of the individual curatorLeader RPC
+// implementations without the overhead of having to wait for a leader election.
+func fakeLeader(t *testing.T) (*curatorLeader, context.CancelFunc) {
+ t.Helper()
+ // Set up context whose cancel function will be returned to the user for
+ // terminating all harnesses started by this function.
+ ctx, ctxC := context.WithCancel(context.Background())
+
+ // Start a single-node etcd cluster.
+ cluster := integration.NewClusterV3(nil, &integration.ClusterConfig{
+ Size: 1,
+ })
+ // Terminate the etcd cluster on context cancel.
+ go func() {
+ <-ctx.Done()
+ cluster.Terminate(nil)
+ }()
+
+ // Create etcd client to test cluster.
+ cl := client.NewLocal(cluster.Client(0))
+
+ // Create a fake lock key/value and retrieve its revision. This replaces the
+ // leader election functionality in the curator to enable faster and more
+ // focused tests.
+ lockKey := "/test-lock"
+ res, err := cl.Put(ctx, lockKey, "fake key")
+ if err != nil {
+ t.Fatalf("setting fake leader key failed: %v", err)
+ }
+ lockRev := res.Header.Revision
+
+ // Return a normal curator leader object that directly implements the tested
+ // RPC methods. This will be exercised by tests.
+ return newCuratorLeader(leadership{
+ lockKey: lockKey,
+ lockRev: lockRev,
+ etcd: cl,
+ }), ctxC
+}
+
+// TestManagementRegisterTicket exercises the Management.GetRegisterTicket RPC.
+func TestManagementRegisterTicket(t *testing.T) {
+ l, cancel := fakeLeader(t)
+ defer cancel()
+
+ ctx, ctxC := context.WithCancel(context.Background())
+ defer ctxC()
+
+ // Retrieve ticket twice.
+ res1, err := l.GetRegisterTicket(ctx, &apb.GetRegisterTicketRequest{})
+ if err != nil {
+ t.Fatalf("GetRegisterTicket failed: %v", err)
+ }
+ res2, err := l.GetRegisterTicket(ctx, &apb.GetRegisterTicketRequest{})
+ if err != nil {
+ t.Fatalf("GetRegisterTicket failed: %v", err)
+ }
+
+ // Ensure tickets are set and the same.
+ if len(res1.Ticket) == 0 {
+ t.Errorf("Ticket is empty")
+ }
+ if !bytes.Equal(res1.Ticket, res2.Ticket) {
+ t.Errorf("Unexpected ticket change between calls")
+ }
+}
diff --git a/metropolis/node/core/curator/listener.go b/metropolis/node/core/curator/listener.go
index 3b98315..b4f2b4d 100644
--- a/metropolis/node/core/curator/listener.go
+++ b/metropolis/node/core/curator/listener.go
@@ -118,6 +118,7 @@
type services interface {
cpb.CuratorServer
apb.AAAServer
+ apb.ManagementServer
}
// activeTarget is the active implementation used by the listener dispatcher, or
@@ -208,6 +209,7 @@
cpb.RegisterCuratorServer(srvLocal, l)
apb.RegisterAAAServer(srvPublic, l)
+ apb.RegisterManagementServer(srvPublic, l)
err := supervisor.Run(ctx, "local", func(ctx context.Context) error {
lisLocal, err := net.ListenUnix("unix", &net.UnixAddr{Name: l.directory.ClientSocket.FullPath(), Net: "unix"})
@@ -354,3 +356,12 @@
})
})
}
+
+func (l *listener) GetRegisterTicket(ctx context.Context, req *apb.GetRegisterTicketRequest) (res *apb.GetRegisterTicketResponse, err error) {
+ err = l.callImpl(ctx, func(ctx context.Context, impl services) error {
+ var err2 error
+ res, err2 = impl.GetRegisterTicket(ctx, req)
+ return err2
+ })
+ return
+}
diff --git a/metropolis/node/core/curator/proto/private/storage.proto b/metropolis/node/core/curator/proto/private/storage.proto
index e41cc79..3208cdb 100644
--- a/metropolis/node/core/curator/proto/private/storage.proto
+++ b/metropolis/node/core/curator/proto/private/storage.proto
@@ -40,3 +40,15 @@
// ED25519 public key of cluster owner.
bytes public_key = 1;
}
+
+// A blob which needs to be provided by nodes registering into the cluster.
+// Presenting this ticket on registration does not automatically grant access
+// to arbitrary node registration. Instead it is used to guard the API surface
+// of the Register RPC from potential denial of service attacks, and can be
+// regenerated at any time in case it leaks.
+//
+// Stored under /global/register_ticket (see curator.registerTicketEtcdPath).
+message RegisterTicket {
+ bytes opaque = 1;
+}
+
diff --git a/metropolis/proto/api/BUILD.bazel b/metropolis/proto/api/BUILD.bazel
index 5004440..61d14fb 100644
--- a/metropolis/proto/api/BUILD.bazel
+++ b/metropolis/proto/api/BUILD.bazel
@@ -8,6 +8,7 @@
"aaa.proto",
"configuration.proto",
"debug.proto",
+ "management.proto",
],
visibility = ["//visibility:public"],
)
diff --git a/metropolis/proto/api/management.proto b/metropolis/proto/api/management.proto
new file mode 100644
index 0000000..c0b8332
--- /dev/null
+++ b/metropolis/proto/api/management.proto
@@ -0,0 +1,22 @@
+syntax = "proto3";
+package metropolis.proto.api;
+option go_package = "source.monogon.dev/metropolis/proto/api";
+
+// Management service available to Cluster Managers.
+service Management {
+ // GetRegisterTicket retrieves the current RegisterTicket which is required
+ // for new nodes to register into the cluster. Presenting this ticket on
+ // registration does not automatically grant access to arbitrary node
+ // registration. Instead, it is used to guard the API surface of the
+ // Register RPC from potential denial of service attacks, and can be
+ // regenerated at any time in case it leaks.
+ rpc GetRegisterTicket(GetRegisterTicketRequest) returns (GetRegisterTicketResponse);
+}
+
+message GetRegisterTicketRequest {
+}
+
+message GetRegisterTicketResponse {
+ // Opaque bytes that comprise the RegisterTicket.
+ bytes ticket = 1;
+}
\ No newline at end of file