Add nanoswitch and cluster testing

Adds nanoswitch and the `switched-multi2` launch target to launch two Smalltown instances on a switched
network and enroll them into a single cluster. Nanoswitch contains a Linux bridge and a minimal DHCP server
and connects to the two Smalltown instances over virtual Ethernet cables. Also moves out the DHCP client into
a package since nanoswitch needs it.

Test Plan:
Manually tested using `bazel run //:launch -- switched-multi2` and observing that the second VM
(whose serial port is mapped to stdout) prints that it is enrolled. Also validated by `bazel run //core/cmd/dbg -- kubectl get node -o wide` returning two ready nodes.

X-Origin-Diff: phab/D572
GitOrigin-RevId: 9f6e2b3d8268749dd81588205646ae3976ad14b3
diff --git a/core/internal/api/BUILD.bazel b/core/internal/api/BUILD.bazel
index e862340..2f25fe6 100644
--- a/core/internal/api/BUILD.bazel
+++ b/core/internal/api/BUILD.bazel
@@ -18,6 +18,7 @@
         "//core/internal/consensus:go_default_library",
         "//core/pkg/tpm:go_default_library",
         "@com_github_gogo_protobuf//proto:go_default_library",
+        "@com_github_grpc_ecosystem_go_grpc_middleware//retry:go_default_library",
         "@io_etcd_go_etcd//clientv3:go_default_library",
         "@org_golang_google_grpc//:go_default_library",
         "@org_golang_google_grpc//codes:go_default_library",
diff --git a/core/internal/api/nodemanagement.go b/core/internal/api/nodemanagement.go
index 0a3614e..4bc4659 100644
--- a/core/internal/api/nodemanagement.go
+++ b/core/internal/api/nodemanagement.go
@@ -23,19 +23,26 @@
 	"crypto/rand"
 	"crypto/sha256"
 	"crypto/subtle"
+	"crypto/tls"
 	"crypto/x509"
 	"encoding/hex"
 	"errors"
 	"fmt"
 	"io"
+	"net"
+	"time"
 
 	"github.com/gogo/protobuf/proto"
+	grpcretry "github.com/grpc-ecosystem/go-grpc-middleware/retry"
 	"go.etcd.io/etcd/clientv3"
 	"go.uber.org/zap"
+	"google.golang.org/grpc"
 	"google.golang.org/grpc/codes"
+	"google.golang.org/grpc/credentials"
 	"google.golang.org/grpc/status"
 
 	"git.monogon.dev/source/nexantic.git/core/generated/api"
+	"git.monogon.dev/source/nexantic.git/core/internal/common"
 	"git.monogon.dev/source/nexantic.git/core/pkg/tpm"
 )
 
@@ -53,7 +60,7 @@
 		return "", errors.New("invalid node identity certificate")
 	}
 
-	return "smalltown-" + hex.EncodeToString([]byte(pubKey[:16])), nil
+	return common.NameFromIDKey(pubKey), nil
 }
 
 func (s *Server) registerNewNode(node *api.Node) error {
@@ -178,6 +185,42 @@
 	}})
 }
 
+func (s *Server) dialNode(ctx context.Context, node *api.Node) (api.NodeServiceClient, error) {
+	masterID, err := s.loadMasterCert()
+	if err != nil {
+		return nil, err
+	}
+
+	secureTransport := &tls.Config{
+		Certificates:       []tls.Certificate{*masterID},
+		InsecureSkipVerify: true,
+		// Critical function, please review any changes with care
+		// TODO(lorenz): Actively check that this actually provides the security guarantees that we need
+		VerifyPeerCertificate: func(rawCerts [][]byte, verifiedChains [][]*x509.Certificate) error {
+			for _, cert := range rawCerts {
+				// X.509 certificates in DER can be compared like this since DER has a unique representation
+				// for each certificate.
+				if bytes.Equal(cert, node.IdCert) {
+					return nil
+				}
+			}
+			return errors.New("failed to find authorized Node certificate")
+		},
+		MinVersion: tls.VersionTLS13,
+	}
+	addr := net.IP(node.Address)
+	opts := []grpcretry.CallOption{
+		grpcretry.WithBackoff(grpcretry.BackoffExponential(100 * time.Millisecond)),
+	}
+	clientCreds := grpc.WithTransportCredentials(credentials.NewTLS(secureTransport))
+	clientConn, err := grpc.DialContext(ctx, fmt.Sprintf("%v:%v", addr, common.NodeServicePort), clientCreds,
+		grpc.WithUnaryInterceptor(grpcretry.UnaryClientInterceptor(opts...)))
+	if err != nil {
+		return nil, fmt.Errorf("failed to dial node service: %w", err)
+	}
+	return api.NewNodeServiceClient(clientConn), nil
+}
+
 func (s *Server) NewTPM2NodeRegister(registerServer api.NodeManagementService_NewTPM2NodeRegisterServer) error {
 	registerReqContainer, err := registerServer.Recv()
 	if err != nil {
@@ -258,8 +301,17 @@
 	}
 
 	// TODO: Plug in policy engine here
+	idCert, err := x509.ParseCertificate(newNodeInfo.IdCert)
+	if err != nil {
+		return err
+	}
+	nodeIdPubKey, ok := idCert.PublicKey.(ed25519.PublicKey)
+	if !ok || len(nodeIdPubKey) != ed25519.PublicKeySize {
+		return status.Error(codes.InvalidArgument, "Invalid ID certificate public key")
+	}
 
 	node := api.Node{
+		Name:    common.NameFromIDKey(nodeIdPubKey),
 		Address: newNodeInfo.Ip,
 		Integrity: &api.Node_Tpm2{Tpm2: &api.NodeTPM2{
 			AkPub:    registerReq.AkPublic,
@@ -268,7 +320,7 @@
 		}},
 		GlobalUnlockKey: newNodeInfo.GlobalUnlockKey,
 		IdCert:          newNodeInfo.IdCert,
-		State:           api.Node_UNININITALIZED,
+		State:           api.Node_MASTER,
 	}
 
 	if err := s.registerNewNode(&node); err != nil {
@@ -276,5 +328,27 @@
 		return status.Error(codes.Internal, "failed to register node")
 	}
 
+	go func() {
+		ctx := context.Background()
+		nodeClient, err := s.dialNode(ctx, &node)
+		if err != nil {
+			s.Logger.Warn("Failed to join newly enrolled node", zap.Error(err))
+			return
+		}
+		newCerts, initialCluster, err := s.consensusService.ProvisionMember(node.Name, node.Address)
+		if err != nil {
+			s.Logger.Warn("Failed to join newly enrolled node", zap.Error(err))
+			return
+		}
+		_, err = nodeClient.JoinCluster(ctx, &api.JoinClusterRequest{
+			InitialCluster: initialCluster,
+			Certs:          newCerts,
+		}, grpcretry.WithMax(10))
+		if err != nil {
+			s.Logger.Warn("Failed to join newly enrolled node", zap.Error(err))
+			return
+		}
+	}()
+
 	return nil
 }