m/n/c/curator: implement AAA.Escrow for initial owner pubkey

This finally implements AAA.Escrow in Metropolis.

We're not yet implementing multi-user support, so this currently only
implements retrieving an Owner certificate using the owner public key
specified in NodeParameters.cluster_bootstrap.

Change-Id: I64a7ba025a8069d82b3c804ca3e2a706de2b0fbf
Reviewed-on: https://review.monogon.dev/c/monogon/+/289
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
diff --git a/metropolis/node/core/curator/BUILD.bazel b/metropolis/node/core/curator/BUILD.bazel
index 0374297..18bb351 100644
--- a/metropolis/node/core/curator/BUILD.bazel
+++ b/metropolis/node/core/curator/BUILD.bazel
@@ -7,6 +7,7 @@
         "curator.go",
         "impl_follower.go",
         "impl_leader.go",
+        "impl_leader_aaa.go",
         "impl_leader_curator.go",
         "listener.go",
         "state_node.go",
@@ -26,12 +27,14 @@
         "//metropolis/pkg/event/memory:go_default_library",
         "//metropolis/pkg/pki:go_default_library",
         "//metropolis/pkg/supervisor:go_default_library",
+        "//metropolis/proto/api:go_default_library",
         "//metropolis/proto/common:go_default_library",
         "@io_etcd_go_etcd//clientv3:go_default_library",
         "@io_etcd_go_etcd//clientv3/concurrency:go_default_library",
         "@org_golang_google_grpc//:go_default_library",
         "@org_golang_google_grpc//codes:go_default_library",
         "@org_golang_google_grpc//credentials:go_default_library",
+        "@org_golang_google_grpc//peer:go_default_library",
         "@org_golang_google_grpc//status:go_default_library",
         "@org_golang_google_protobuf//proto:go_default_library",
         "@org_golang_x_sys//unix:go_default_library",
diff --git a/metropolis/node/core/curator/bootstrap.go b/metropolis/node/core/curator/bootstrap.go
index af0a038..c50cbbd 100644
--- a/metropolis/node/core/curator/bootstrap.go
+++ b/metropolis/node/core/curator/bootstrap.go
@@ -51,10 +51,6 @@
 	return
 }
 
-const (
-	initialOwnerEtcdPath = "/global/initial_owner"
-)
-
 // BootstrapFinish saves the given Node and initial cluster owner pubkey into
 // etcd, without regard for any other cluster state and directly using a given
 // etcd client.
diff --git a/metropolis/node/core/curator/impl_follower.go b/metropolis/node/core/curator/impl_follower.go
index 85e6bf5..c258778 100644
--- a/metropolis/node/core/curator/impl_follower.go
+++ b/metropolis/node/core/curator/impl_follower.go
@@ -4,12 +4,17 @@
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/status"
 
-	apb "source.monogon.dev/metropolis/node/core/curator/proto/api"
+	cpb "source.monogon.dev/metropolis/node/core/curator/proto/api"
+	apb "source.monogon.dev/metropolis/proto/api"
 )
 
 type curatorFollower struct {
 }
 
-func (f *curatorFollower) Watch(req *apb.WatchRequest, srv apb.Curator_WatchServer) error {
+func (f *curatorFollower) Watch(req *cpb.WatchRequest, srv cpb.Curator_WatchServer) error {
+	return status.Error(codes.Unimplemented, "curator follower not implemented")
+}
+
+func (f *curatorFollower) Escrow(srv apb.AAA_EscrowServer) error {
 	return status.Error(codes.Unimplemented, "curator follower not implemented")
 }
diff --git a/metropolis/node/core/curator/impl_leader.go b/metropolis/node/core/curator/impl_leader.go
index f42da61..9c4414a 100644
--- a/metropolis/node/core/curator/impl_leader.go
+++ b/metropolis/node/core/curator/impl_leader.go
@@ -65,10 +65,12 @@
 // which has access to the leadership structure.
 type curatorLeader struct {
 	leaderCurator
+	leaderAAA
 }
 
 func newCuratorLeader(l leadership) *curatorLeader {
 	return &curatorLeader{
 		leaderCurator{l},
+		leaderAAA{l},
 	}
 }
diff --git a/metropolis/node/core/curator/impl_leader_aaa.go b/metropolis/node/core/curator/impl_leader_aaa.go
new file mode 100644
index 0000000..2f8124a
--- /dev/null
+++ b/metropolis/node/core/curator/impl_leader_aaa.go
@@ -0,0 +1,180 @@
+package curator
+
+import (
+	"context"
+	"crypto/ed25519"
+	"crypto/subtle"
+	"errors"
+
+	"google.golang.org/grpc/codes"
+	"google.golang.org/grpc/credentials"
+	"google.golang.org/grpc/peer"
+	"google.golang.org/grpc/status"
+	"google.golang.org/protobuf/proto"
+
+	ppb "source.monogon.dev/metropolis/node/core/curator/proto/private"
+	"source.monogon.dev/metropolis/pkg/pki"
+	apb "source.monogon.dev/metropolis/proto/api"
+)
+
+const (
+	// initialOwnerPath is the etcd key under which private.InitialOwner is stored.
+	initialOwnerEtcdPath = "/global/initial_owner"
+)
+
+type leaderAAA struct {
+	leadership
+}
+
+// pubkeyFromGRPC returns the ed25519 public key presented by the client in any
+// client certificate for a gRPC call. If no certificate is presented, nil is
+// returned. If the connection is insecure or the client presented some invalid
+// certificate configuration, a gRPC status is returned that can be directly
+// passed to the client. Otherwise, the public key is returned.
+//
+// SECURITY: the public key is not verified to be authorized to perform any
+// action,just to be a valid ed25519 key.
+func pubkeyFromGRPC(ctx context.Context) (ed25519.PublicKey, error) {
+	p, ok := peer.FromContext(ctx)
+	if !ok {
+		return nil, status.Error(codes.Unavailable, "could not retrieve peer info")
+	}
+	tlsInfo, ok := p.AuthInfo.(credentials.TLSInfo)
+	if !ok {
+		return nil, status.Error(codes.Unauthenticated, "connection not secure")
+	}
+	count := len(tlsInfo.State.PeerCertificates)
+	if count == 0 {
+		return nil, nil
+	}
+	if count > 1 {
+		return nil, status.Errorf(codes.Unauthenticated, "exactly one client certificate must be sent (got %d)", count)
+	}
+	pk, ok := tlsInfo.State.PeerCertificates[0].PublicKey.(ed25519.PublicKey)
+	if !ok {
+		return nil, status.Errorf(codes.Unauthenticated, "client certificate must be for ed25519 key")
+	}
+	return pk, nil
+}
+
+// getOwnerPubkey returns the public key of the configured owner of the cluster.
+//
+// MVP: this should be turned into a proper user/entity system.
+func (a *leaderAAA) getOwnerPubkey(ctx context.Context) (ed25519.PublicKey, error) {
+	res, err := a.etcd.Get(ctx, initialOwnerEtcdPath)
+	if err != nil {
+		if !errors.Is(err, ctx.Err()) {
+			// TODO(q3k): log
+			return nil, status.Error(codes.Unavailable, "could not retrieve initial owner status in etcd")
+		}
+		return nil, err
+	}
+	if len(res.Kvs) != 1 {
+		return nil, status.Error(codes.FailedPrecondition, "no initial owner set for cluster")
+	}
+	var iom ppb.InitialOwner
+	if err := proto.Unmarshal(res.Kvs[0].Value, &iom); err != nil {
+		return nil, status.Error(codes.FailedPrecondition, "initial owner data could not be unmarshaled")
+	}
+
+	if len(iom.PublicKey) != ed25519.PublicKeySize {
+		return nil, status.Error(codes.FailedPrecondition, "initial owner publickey has invalid length")
+	}
+	return iom.PublicKey, nil
+}
+
+// Escrow implements the AAA Escrow gRPC method, but currently only for the
+// initial cluster owner exchange workflow. That is, the client presents a
+// self-signed certificate for the public key of the InitialClusterOwner public
+// key defined in the cluster bootstrap configuration, and receives a
+// certificate which can be used to perform further management actions.
+func (a *leaderAAA) Escrow(srv apb.AAA_EscrowServer) error {
+	ctx := srv.Context()
+
+	// Receive Parameters from client. This tells us what identity the client wants
+	// from us.
+	msg, err := srv.Recv()
+	if err != nil {
+		return err
+	}
+	if msg.Parameters == nil {
+		return status.Errorf(codes.InvalidArgument, "client parameters must be set")
+	}
+
+	// MVP: only support authenticating as 'owner' identity.
+	if msg.Parameters.RequestedIdentityName != "owner" {
+		return status.Errorf(codes.Unimplemented, "only owner escrow is currently implemented")
+	}
+
+	if len(msg.Parameters.PublicKey) != ed25519.PublicKeySize {
+		return status.Errorf(codes.InvalidArgument, "client parameters public_key must be set and valid")
+	}
+
+	// The owner is authenticated by the InitialOwnerKey set during cluster
+	// bootstrap, whose ownership is proven to the cluster by presenting a
+	// self-signed certificate emitted for that key.
+	//
+	// TODO(q3k) The AAA proto doesn't really have a proof kind for this, for now we
+	// go with REFRESH_CERTIFICATE. We should either make the AAA proto explicitly
+	// handle this as a special KIND.
+	pk, err := pubkeyFromGRPC(ctx)
+	if err != nil {
+		// If an error occurred, it's either because the connection is not secured by
+		// TLS, or an invalid certificate was presented (ie. more then one cert, or a
+		// non-ed25519 cert). Fail as per AAA proto.
+		return err
+	}
+	if pk == nil {
+		// No cert was presented, respond with REFRESH_CERTIFICATE request.
+		err := srv.Send(&apb.EscrowFromServer{
+			Needed: []*apb.EscrowFromServer_ProofRequest{
+				{
+					Kind: apb.EscrowFromServer_ProofRequest_KIND_REFRESH_CERTIFICATE,
+				},
+			},
+		})
+		if err != nil {
+			return err
+		}
+		return status.Error(codes.Unauthenticated, "cannot proceed without refresh certificate proof at transport layer")
+	}
+
+	// MVP: only support parameters public_key == TLS public key.
+	if subtle.ConstantTimeCompare(pk, msg.Parameters.PublicKey) != 1 {
+		return status.Errorf(codes.Unimplemented, "client parameters public_key different from transport public key unimplemented")
+	}
+
+	// Check client public key is the same as the cluster owner pubkey.
+	opk, err := a.getOwnerPubkey(ctx)
+	if err != nil {
+		return err
+	}
+	if subtle.ConstantTimeCompare(pk, opk) != 1 {
+		return status.Errorf(codes.PermissionDenied, "public key not authorized to escrow owner credentials")
+	}
+
+	// Everything okay, send response with certificate.
+	//
+	// MVP: The emitted certificate is valid forever.
+	oc := pki.Certificate{
+		Namespace: &pkiNamespace,
+		Issuer:    pkiCA,
+		Template:  pki.Client("owner", nil),
+		Name:      "owner",
+		Mode:      pki.CertificateExternal,
+		PublicKey: pk,
+	}
+	ocBytes, err := oc.Ensure(ctx, a.etcd)
+	if err != nil {
+		return status.Errorf(codes.Unavailable, "ensuring new certificate failed: %v", err)
+	}
+
+	return srv.Send(&apb.EscrowFromServer{
+		Fulfilled: []*apb.EscrowFromServer_ProofRequest{
+			{
+				Kind: apb.EscrowFromServer_ProofRequest_KIND_REFRESH_CERTIFICATE,
+			},
+		},
+		EmittedCertificate: ocBytes,
+	})
+}
diff --git a/metropolis/node/core/curator/listener.go b/metropolis/node/core/curator/listener.go
index b578290..e92ca0d 100644
--- a/metropolis/node/core/curator/listener.go
+++ b/metropolis/node/core/curator/listener.go
@@ -17,6 +17,7 @@
 	"source.monogon.dev/metropolis/node/core/localstorage"
 	"source.monogon.dev/metropolis/pkg/combinectx"
 	"source.monogon.dev/metropolis/pkg/supervisor"
+	apb "source.monogon.dev/metropolis/proto/api"
 )
 
 // listener is the curator runnable responsible for listening for gRPC
@@ -116,6 +117,7 @@
 // must implement.
 type services interface {
 	cpb.CuratorServer
+	apb.AAAServer
 }
 
 // activeTarget is the active implementation used by the listener dispatcher, or
@@ -215,7 +217,7 @@
 	srvPublic := grpc.NewServer(grpc.Creds(l.publicCreds))
 
 	cpb.RegisterCuratorServer(srvLocal, l)
-	// TODO(q3k): register servers on srvPublic.
+	apb.RegisterAAAServer(srvPublic, l)
 
 	if err := supervisor.Run(ctx, "local", supervisor.GRPCServer(srvLocal, lisLocal, true)); err != nil {
 		return fmt.Errorf("while starting local gRPC listener: %w", err)
@@ -311,3 +313,33 @@
 	}
 	return l.callImpl(srv.Context(), proxy)
 }
+
+type aaaEscrowServer struct {
+	grpc.ServerStream
+	ctx context.Context
+}
+
+func (m *aaaEscrowServer) Context() context.Context {
+	return m.ctx
+}
+
+func (m *aaaEscrowServer) Send(r *apb.EscrowFromServer) error {
+	return m.ServerStream.SendMsg(r)
+}
+
+func (m *aaaEscrowServer) Recv() (*apb.EscrowFromClient, error) {
+	var res apb.EscrowFromClient
+	if err := m.ServerStream.RecvMsg(&res); err != nil {
+		return nil, err
+	}
+	return &res, nil
+}
+
+func (l *listener) Escrow(srv apb.AAA_EscrowServer) error {
+	return l.callImpl(srv.Context(), func(ctx context.Context, impl services) error {
+		return impl.Escrow(&aaaEscrowServer{
+			ServerStream: srv,
+			ctx:          ctx,
+		})
+	})
+}
diff --git a/metropolis/node/core/curator/proto/private/storage.proto b/metropolis/node/core/curator/proto/private/storage.proto
index 6f42d3c..e41cc79 100644
--- a/metropolis/node/core/curator/proto/private/storage.proto
+++ b/metropolis/node/core/curator/proto/private/storage.proto
@@ -29,10 +29,14 @@
 // Information about the cluster owner, currently the only Metropolis management
 // entity, named 'owner' in public APIs.
 //
-// In the future, once we have implemented a manager/user entity system, this
-// will be replaced by a proper per-user entry.
+// This is populated from NodeParameters.cluster_bootstrap.owner_public_key on
+// cluster bootstrap.
 //
-// Stored under /global/initial_owner.
+// MVP: In the future, once we have implemented a manager/user entity system,
+// this will be replaced by a proper per-user entry.
+//
+// Stored under /global/initial_owner (see curator.initialOwnerEtcdPath).
 message InitialOwner {
+    // ED25519 public key of cluster owner.
     bytes public_key = 1;
 }