Add nanoswitch and cluster testing
Adds nanoswitch and the `switched-multi2` launch target to launch two Smalltown instances on a switched
network and enroll them into a single cluster. Nanoswitch contains a Linux bridge and a minimal DHCP server
and connects to the two Smalltown instances over virtual Ethernet cables. Also moves out the DHCP client into
a package since nanoswitch needs it.
Test Plan:
Manually tested using `bazel run //:launch -- switched-multi2` and observing that the second VM
(whose serial port is mapped to stdout) prints that it is enrolled. Also validated by `bazel run //core/cmd/dbg -- kubectl get node -o wide` returning two ready nodes.
X-Origin-Diff: phab/D572
GitOrigin-RevId: 9f6e2b3d8268749dd81588205646ae3976ad14b3
diff --git a/core/internal/api/BUILD.bazel b/core/internal/api/BUILD.bazel
index e862340..2f25fe6 100644
--- a/core/internal/api/BUILD.bazel
+++ b/core/internal/api/BUILD.bazel
@@ -18,6 +18,7 @@
"//core/internal/consensus:go_default_library",
"//core/pkg/tpm:go_default_library",
"@com_github_gogo_protobuf//proto:go_default_library",
+ "@com_github_grpc_ecosystem_go_grpc_middleware//retry:go_default_library",
"@io_etcd_go_etcd//clientv3:go_default_library",
"@org_golang_google_grpc//:go_default_library",
"@org_golang_google_grpc//codes:go_default_library",
diff --git a/core/internal/api/nodemanagement.go b/core/internal/api/nodemanagement.go
index 0a3614e..4bc4659 100644
--- a/core/internal/api/nodemanagement.go
+++ b/core/internal/api/nodemanagement.go
@@ -23,19 +23,26 @@
"crypto/rand"
"crypto/sha256"
"crypto/subtle"
+ "crypto/tls"
"crypto/x509"
"encoding/hex"
"errors"
"fmt"
"io"
+ "net"
+ "time"
"github.com/gogo/protobuf/proto"
+ grpcretry "github.com/grpc-ecosystem/go-grpc-middleware/retry"
"go.etcd.io/etcd/clientv3"
"go.uber.org/zap"
+ "google.golang.org/grpc"
"google.golang.org/grpc/codes"
+ "google.golang.org/grpc/credentials"
"google.golang.org/grpc/status"
"git.monogon.dev/source/nexantic.git/core/generated/api"
+ "git.monogon.dev/source/nexantic.git/core/internal/common"
"git.monogon.dev/source/nexantic.git/core/pkg/tpm"
)
@@ -53,7 +60,7 @@
return "", errors.New("invalid node identity certificate")
}
- return "smalltown-" + hex.EncodeToString([]byte(pubKey[:16])), nil
+ return common.NameFromIDKey(pubKey), nil
}
func (s *Server) registerNewNode(node *api.Node) error {
@@ -178,6 +185,42 @@
}})
}
+func (s *Server) dialNode(ctx context.Context, node *api.Node) (api.NodeServiceClient, error) {
+ masterID, err := s.loadMasterCert()
+ if err != nil {
+ return nil, err
+ }
+
+ secureTransport := &tls.Config{
+ Certificates: []tls.Certificate{*masterID},
+ InsecureSkipVerify: true,
+ // Critical function, please review any changes with care
+ // TODO(lorenz): Actively check that this actually provides the security guarantees that we need
+ VerifyPeerCertificate: func(rawCerts [][]byte, verifiedChains [][]*x509.Certificate) error {
+ for _, cert := range rawCerts {
+ // X.509 certificates in DER can be compared like this since DER has a unique representation
+ // for each certificate.
+ if bytes.Equal(cert, node.IdCert) {
+ return nil
+ }
+ }
+ return errors.New("failed to find authorized Node certificate")
+ },
+ MinVersion: tls.VersionTLS13,
+ }
+ addr := net.IP(node.Address)
+ opts := []grpcretry.CallOption{
+ grpcretry.WithBackoff(grpcretry.BackoffExponential(100 * time.Millisecond)),
+ }
+ clientCreds := grpc.WithTransportCredentials(credentials.NewTLS(secureTransport))
+ clientConn, err := grpc.DialContext(ctx, fmt.Sprintf("%v:%v", addr, common.NodeServicePort), clientCreds,
+ grpc.WithUnaryInterceptor(grpcretry.UnaryClientInterceptor(opts...)))
+ if err != nil {
+ return nil, fmt.Errorf("failed to dial node service: %w", err)
+ }
+ return api.NewNodeServiceClient(clientConn), nil
+}
+
func (s *Server) NewTPM2NodeRegister(registerServer api.NodeManagementService_NewTPM2NodeRegisterServer) error {
registerReqContainer, err := registerServer.Recv()
if err != nil {
@@ -258,8 +301,17 @@
}
// TODO: Plug in policy engine here
+ idCert, err := x509.ParseCertificate(newNodeInfo.IdCert)
+ if err != nil {
+ return err
+ }
+ nodeIdPubKey, ok := idCert.PublicKey.(ed25519.PublicKey)
+ if !ok || len(nodeIdPubKey) != ed25519.PublicKeySize {
+ return status.Error(codes.InvalidArgument, "Invalid ID certificate public key")
+ }
node := api.Node{
+ Name: common.NameFromIDKey(nodeIdPubKey),
Address: newNodeInfo.Ip,
Integrity: &api.Node_Tpm2{Tpm2: &api.NodeTPM2{
AkPub: registerReq.AkPublic,
@@ -268,7 +320,7 @@
}},
GlobalUnlockKey: newNodeInfo.GlobalUnlockKey,
IdCert: newNodeInfo.IdCert,
- State: api.Node_UNININITALIZED,
+ State: api.Node_MASTER,
}
if err := s.registerNewNode(&node); err != nil {
@@ -276,5 +328,27 @@
return status.Error(codes.Internal, "failed to register node")
}
+ go func() {
+ ctx := context.Background()
+ nodeClient, err := s.dialNode(ctx, &node)
+ if err != nil {
+ s.Logger.Warn("Failed to join newly enrolled node", zap.Error(err))
+ return
+ }
+ newCerts, initialCluster, err := s.consensusService.ProvisionMember(node.Name, node.Address)
+ if err != nil {
+ s.Logger.Warn("Failed to join newly enrolled node", zap.Error(err))
+ return
+ }
+ _, err = nodeClient.JoinCluster(ctx, &api.JoinClusterRequest{
+ InitialCluster: initialCluster,
+ Certs: newCerts,
+ }, grpcretry.WithMax(10))
+ if err != nil {
+ s.Logger.Warn("Failed to join newly enrolled node", zap.Error(err))
+ return
+ }
+ }()
+
return nil
}