blob: 4bc4659fce841b3c9982716df03fbfce70950972 [file] [log] [blame]
Lorenz Brunaa6b7342019-12-12 02:55:02 +01001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package api
18
19import (
20 "bytes"
21 "context"
22 "crypto/ed25519"
23 "crypto/rand"
24 "crypto/sha256"
25 "crypto/subtle"
Lorenz Brun52f7f292020-06-24 16:42:02 +020026 "crypto/tls"
Lorenz Brunaa6b7342019-12-12 02:55:02 +010027 "crypto/x509"
Lorenz Brun878f5f92020-05-12 16:15:39 +020028 "encoding/hex"
Lorenz Brunaa6b7342019-12-12 02:55:02 +010029 "errors"
30 "fmt"
31 "io"
Lorenz Brun52f7f292020-06-24 16:42:02 +020032 "net"
33 "time"
Lorenz Brunaa6b7342019-12-12 02:55:02 +010034
Lorenz Brunaa6b7342019-12-12 02:55:02 +010035 "github.com/gogo/protobuf/proto"
Lorenz Brun52f7f292020-06-24 16:42:02 +020036 grpcretry "github.com/grpc-ecosystem/go-grpc-middleware/retry"
Lorenz Brunaa6b7342019-12-12 02:55:02 +010037 "go.etcd.io/etcd/clientv3"
38 "go.uber.org/zap"
Lorenz Brun52f7f292020-06-24 16:42:02 +020039 "google.golang.org/grpc"
Lorenz Brunaa6b7342019-12-12 02:55:02 +010040 "google.golang.org/grpc/codes"
Lorenz Brun52f7f292020-06-24 16:42:02 +020041 "google.golang.org/grpc/credentials"
Lorenz Brunaa6b7342019-12-12 02:55:02 +010042 "google.golang.org/grpc/status"
Hendrik Hofstadt8efe51e2020-02-28 12:53:41 +010043
44 "git.monogon.dev/source/nexantic.git/core/generated/api"
Lorenz Brun52f7f292020-06-24 16:42:02 +020045 "git.monogon.dev/source/nexantic.git/core/internal/common"
Hendrik Hofstadt8efe51e2020-02-28 12:53:41 +010046 "git.monogon.dev/source/nexantic.git/core/pkg/tpm"
Lorenz Brunaa6b7342019-12-12 02:55:02 +010047)
48
49const nodesPrefix = "nodes/"
50const enrolmentsPrefix = "enrolments/"
51
52func nodeId(idCert []byte) (string, error) {
53 // Currently we only identify nodes by ID key
54 cert, err := x509.ParseCertificate(idCert)
55 if err != nil {
56 return "", err
57 }
58 pubKey, ok := cert.PublicKey.(ed25519.PublicKey)
59 if !ok {
60 return "", errors.New("invalid node identity certificate")
61 }
62
Lorenz Brun52f7f292020-06-24 16:42:02 +020063 return common.NameFromIDKey(pubKey), nil
Lorenz Brunaa6b7342019-12-12 02:55:02 +010064}
65
66func (s *Server) registerNewNode(node *api.Node) error {
67 nodeRaw, err := proto.Marshal(node)
68 if err != nil {
69 return err
70 }
71
72 nodeID, err := nodeId(node.IdCert)
73 if err != nil {
74 return err
75 }
76
77 key := nodesPrefix + nodeID
78
79 // Overwriting nodes is a BadIdea(TM), so make this a Compare-and-Swap
80 res, err := s.getStore().Txn(context.Background()).If(
81 clientv3.Compare(clientv3.CreateRevision(key), "=", 0),
82 ).Then(
83 clientv3.OpPut(key, string(nodeRaw)),
84 ).Commit()
85 if err != nil {
86 return fmt.Errorf("failed to store new node: %w", err)
87 }
88 if !res.Succeeded {
89 s.Logger.Warn("double-registration of node attempted", zap.String("node", nodeID))
90 }
91 return nil
92}
93
94func (s *Server) TPM2BootstrapNode(newNodeInfo *api.NewNodeInfo) (*api.Node, error) {
95 akPublic, err := tpm.GetAKPublic()
96 if err != nil {
97 return nil, err
98 }
99 ekPubkey, ekCert, err := tpm.GetEKPublic()
100 if err != nil {
101 return nil, err
102 }
103 return &api.Node{
104 Address: newNodeInfo.Ip,
105 Integrity: &api.Node_Tpm2{Tpm2: &api.NodeTPM2{
106 AkPub: akPublic,
107 EkCert: ekCert,
108 EkPubkey: ekPubkey,
109 }},
110 GlobalUnlockKey: newNodeInfo.GlobalUnlockKey,
111 IdCert: newNodeInfo.IdCert,
112 State: api.Node_MASTER,
113 }, nil
114}
115
116func (s *Server) TPM2Unlock(unlockServer api.NodeManagementService_TPM2UnlockServer) error {
117 nonce := make([]byte, 32)
118 if _, err := io.ReadFull(rand.Reader, nonce); err != nil {
Leopold Schabel8fba0f82020-01-22 18:46:25 +0100119 return status.Error(codes.Unavailable, "failed to get randomness")
Lorenz Brunaa6b7342019-12-12 02:55:02 +0100120 }
121 if err := unlockServer.Send(&api.TPM2UnlockFlowResponse{
122 Stage: &api.TPM2UnlockFlowResponse_UnlockInit{
123 UnlockInit: &api.TPM2UnlockInit{
124 Nonce: nonce,
125 },
126 },
127 }); err != nil {
128 return err
129 }
130 unlockReqContainer, err := unlockServer.Recv()
131 if err != nil {
132 return err
133 }
134 unlockReqVariant, ok := unlockReqContainer.Stage.(*api.TPM2UnlockFlowRequeset_UnlockRequest)
135 if !ok {
136 return status.Errorf(codes.InvalidArgument, "protocol violation")
137 }
138 unlockRequest := unlockReqVariant.UnlockRequest
139
140 store := s.getStore()
141 // This is safe, etcd does not do relative paths
142 path := nodesPrefix + unlockRequest.NodeId
143 nodeRes, err := store.Get(unlockServer.Context(), path)
144 if err != nil {
145 return status.Error(codes.Unavailable, "consensus request failed")
146 }
147 if nodeRes.Count == 0 {
148 return status.Error(codes.NotFound, "this node does not exist")
149 } else if nodeRes.Count > 1 {
150 panic("invariant violation: more than one node with the same id")
151 }
152 nodeRaw := nodeRes.Kvs[0].Value
153 var node api.Node
154 if err := proto.Unmarshal(nodeRaw, &node); err != nil {
155 s.Logger.Error("Failed to decode node", zap.Error(err))
156 return status.Error(codes.Internal, "invalid node")
157 }
158
159 nodeTPM2, ok := node.Integrity.(*api.Node_Tpm2)
160 if !ok {
161 return status.Error(codes.InvalidArgument, "node not integrity-protected with TPM2")
162 }
163
164 validQuote, err := tpm.VerifyAttestPlatform(nonce, nodeTPM2.Tpm2.AkPub, unlockRequest.Quote, unlockRequest.QuoteSignature)
165 if err != nil {
166 return status.Error(codes.PermissionDenied, "invalid quote")
167 }
168
169 pcrHash := sha256.New()
170 for _, pcr := range unlockRequest.Pcrs {
171 pcrHash.Write(pcr)
172 }
173 expectedPCRHash := pcrHash.Sum(nil)
174
175 if !bytes.Equal(validQuote.AttestedQuoteInfo.PCRDigest, expectedPCRHash) {
176 return status.Error(codes.InvalidArgument, "the quote's PCR hash does not match the supplied PCRs")
177 }
178
179 // TODO: Plug in policy engine to decide if the unlock should actually happen
180
181 return unlockServer.Send(&api.TPM2UnlockFlowResponse{Stage: &api.TPM2UnlockFlowResponse_UnlockResponse{
182 UnlockResponse: &api.TPM2UnlockResponse{
183 GlobalUnlockKey: node.GlobalUnlockKey,
184 },
185 }})
186}
187
Lorenz Brun52f7f292020-06-24 16:42:02 +0200188func (s *Server) dialNode(ctx context.Context, node *api.Node) (api.NodeServiceClient, error) {
189 masterID, err := s.loadMasterCert()
190 if err != nil {
191 return nil, err
192 }
193
194 secureTransport := &tls.Config{
195 Certificates: []tls.Certificate{*masterID},
196 InsecureSkipVerify: true,
197 // Critical function, please review any changes with care
198 // TODO(lorenz): Actively check that this actually provides the security guarantees that we need
199 VerifyPeerCertificate: func(rawCerts [][]byte, verifiedChains [][]*x509.Certificate) error {
200 for _, cert := range rawCerts {
201 // X.509 certificates in DER can be compared like this since DER has a unique representation
202 // for each certificate.
203 if bytes.Equal(cert, node.IdCert) {
204 return nil
205 }
206 }
207 return errors.New("failed to find authorized Node certificate")
208 },
209 MinVersion: tls.VersionTLS13,
210 }
211 addr := net.IP(node.Address)
212 opts := []grpcretry.CallOption{
213 grpcretry.WithBackoff(grpcretry.BackoffExponential(100 * time.Millisecond)),
214 }
215 clientCreds := grpc.WithTransportCredentials(credentials.NewTLS(secureTransport))
216 clientConn, err := grpc.DialContext(ctx, fmt.Sprintf("%v:%v", addr, common.NodeServicePort), clientCreds,
217 grpc.WithUnaryInterceptor(grpcretry.UnaryClientInterceptor(opts...)))
218 if err != nil {
219 return nil, fmt.Errorf("failed to dial node service: %w", err)
220 }
221 return api.NewNodeServiceClient(clientConn), nil
222}
223
Lorenz Brunaa6b7342019-12-12 02:55:02 +0100224func (s *Server) NewTPM2NodeRegister(registerServer api.NodeManagementService_NewTPM2NodeRegisterServer) error {
225 registerReqContainer, err := registerServer.Recv()
226 if err != nil {
227 return err
228 }
229 registerReqVariant, ok := registerReqContainer.Stage.(*api.TPM2FlowRequest_Register)
230 if !ok {
231 return status.Error(codes.InvalidArgument, "protocol violation")
232 }
233 registerReq := registerReqVariant.Register
234
235 challengeNonce := make([]byte, 32)
236 if _, err := io.ReadFull(rand.Reader, challengeNonce); err != nil {
Leopold Schabel8fba0f82020-01-22 18:46:25 +0100237 return status.Error(codes.Unavailable, "failed to get randomness")
Lorenz Brunaa6b7342019-12-12 02:55:02 +0100238 }
239 challenge, challengeBlob, err := tpm.MakeAKChallenge(registerReq.EkPubkey, registerReq.AkPublic, challengeNonce)
240 if err != nil {
241 return status.Errorf(codes.InvalidArgument, "failed to challenge AK: %v", err)
242 }
243 nonce := make([]byte, 32)
244 if _, err := io.ReadFull(rand.Reader, nonce); err != nil {
Leopold Schabel8fba0f82020-01-22 18:46:25 +0100245 return status.Error(codes.Unavailable, "failed to get randomness")
Lorenz Brunaa6b7342019-12-12 02:55:02 +0100246 }
247 if err := registerServer.Send(&api.TPM2FlowResponse{Stage: &api.TPM2FlowResponse_AttestRequest{AttestRequest: &api.TPM2AttestRequest{
248 AkChallenge: challenge,
249 AkChallengeSecret: challengeBlob,
250 QuoteNonce: nonce,
251 }}}); err != nil {
252 return err
253 }
254 attestationResContainer, err := registerServer.Recv()
255 if err != nil {
256 return err
257 }
258 attestResVariant, ok := attestationResContainer.Stage.(*api.TPM2FlowRequest_AttestResponse)
259 if !ok {
260 return status.Error(codes.InvalidArgument, "protocol violation")
261 }
262 attestRes := attestResVariant.AttestResponse
263
264 if subtle.ConstantTimeCompare(attestRes.AkChallengeSolution, challengeNonce) != 1 {
265 return status.Error(codes.InvalidArgument, "invalid challenge response")
266 }
267
268 validQuote, err := tpm.VerifyAttestPlatform(nonce, registerReq.AkPublic, attestRes.Quote, attestRes.QuoteSignature)
269 if err != nil {
270 return status.Error(codes.PermissionDenied, "invalid quote")
271 }
272
273 pcrHash := sha256.New()
274 for _, pcr := range attestRes.Pcrs {
275 pcrHash.Write(pcr)
276 }
277 expectedPCRHash := pcrHash.Sum(nil)
278
279 if !bytes.Equal(validQuote.AttestedQuoteInfo.PCRDigest, expectedPCRHash) {
280 return status.Error(codes.InvalidArgument, "the quote's PCR hash does not match the supplied PCRs")
281 }
282
283 newNodeInfoContainer, err := registerServer.Recv()
284 newNodeInfoVariant, ok := newNodeInfoContainer.Stage.(*api.TPM2FlowRequest_NewNodeInfo)
285 newNodeInfo := newNodeInfoVariant.NewNodeInfo
286
287 store := s.getStore()
Lorenz Brun878f5f92020-05-12 16:15:39 +0200288 res, err := store.Get(registerServer.Context(), "enrolments/"+hex.EncodeToString(newNodeInfo.EnrolmentConfig.EnrolmentSecret))
Lorenz Brunaa6b7342019-12-12 02:55:02 +0100289 if err != nil {
290 return status.Error(codes.Unavailable, "Consensus unavailable")
291 }
292 if res.Count == 0 {
293 return status.Error(codes.PermissionDenied, "Invalid enrolment secret")
294 } else if res.Count > 1 {
295 panic("more than one value for the same key, bailing")
296 }
297 rawVal := res.Kvs[0].Value
298 var config api.EnrolmentConfig
299 if err := proto.Unmarshal(rawVal, &config); err != nil {
300 return err
301 }
302
303 // TODO: Plug in policy engine here
Lorenz Brun52f7f292020-06-24 16:42:02 +0200304 idCert, err := x509.ParseCertificate(newNodeInfo.IdCert)
305 if err != nil {
306 return err
307 }
308 nodeIdPubKey, ok := idCert.PublicKey.(ed25519.PublicKey)
309 if !ok || len(nodeIdPubKey) != ed25519.PublicKeySize {
310 return status.Error(codes.InvalidArgument, "Invalid ID certificate public key")
311 }
Lorenz Brunaa6b7342019-12-12 02:55:02 +0100312
313 node := api.Node{
Lorenz Brun52f7f292020-06-24 16:42:02 +0200314 Name: common.NameFromIDKey(nodeIdPubKey),
Lorenz Brunaa6b7342019-12-12 02:55:02 +0100315 Address: newNodeInfo.Ip,
316 Integrity: &api.Node_Tpm2{Tpm2: &api.NodeTPM2{
317 AkPub: registerReq.AkPublic,
318 EkCert: registerReq.EkCert,
319 EkPubkey: registerReq.EkPubkey,
320 }},
321 GlobalUnlockKey: newNodeInfo.GlobalUnlockKey,
322 IdCert: newNodeInfo.IdCert,
Lorenz Brun52f7f292020-06-24 16:42:02 +0200323 State: api.Node_MASTER,
Lorenz Brunaa6b7342019-12-12 02:55:02 +0100324 }
325
326 if err := s.registerNewNode(&node); err != nil {
327 s.Logger.Error("failed to register a node", zap.Error(err))
328 return status.Error(codes.Internal, "failed to register node")
329 }
330
Lorenz Brun52f7f292020-06-24 16:42:02 +0200331 go func() {
332 ctx := context.Background()
333 nodeClient, err := s.dialNode(ctx, &node)
334 if err != nil {
335 s.Logger.Warn("Failed to join newly enrolled node", zap.Error(err))
336 return
337 }
338 newCerts, initialCluster, err := s.consensusService.ProvisionMember(node.Name, node.Address)
339 if err != nil {
340 s.Logger.Warn("Failed to join newly enrolled node", zap.Error(err))
341 return
342 }
343 _, err = nodeClient.JoinCluster(ctx, &api.JoinClusterRequest{
344 InitialCluster: initialCluster,
345 Certs: newCerts,
346 }, grpcretry.WithMax(10))
347 if err != nil {
348 s.Logger.Warn("Failed to join newly enrolled node", zap.Error(err))
349 return
350 }
351 }()
352
Lorenz Brunaa6b7342019-12-12 02:55:02 +0100353 return nil
354}