core/internal/cluster: add new single-node cluster code

This adds a cluster library, that consists of:
 - a Node object that can be loaded from and saved into etcd,
   representing a node of the cluster that can have different 'role
   tags' assigned to it
 - a cluster Manager, that is responsible for bringing up the local node
   into a cluster (by creaating a new cluster, enrolling into or joining a
   cluster)

This also gets wired into core/cmd/init, and as such completes a chunk
of The Refactor. This code should pass tests.

Test Plan: this should work! should be covered by existing e2e tests.

X-Origin-Diff: phab/D590
GitOrigin-RevId: e88022164e4353249b29fc16849a02805f15dd49
diff --git a/core/internal/kubernetes/pki/certificate.go b/core/internal/kubernetes/pki/certificate.go
index e0dea0d..6bd50f9 100644
--- a/core/internal/kubernetes/pki/certificate.go
+++ b/core/internal/kubernetes/pki/certificate.go
@@ -106,6 +106,12 @@
 
 // ensure returns a DER-encoded x509 certificate and internally encoded bare ed25519 key for a given Certificate,
 // in memory (if volatile), loading it from etcd, or creating and saving it on etcd if needed.
+// This function is safe to call in parallel from multiple etcd clients (including across machines), but it will error
+// in case a concurrent certificate generation happens. These errors are, however, safe to retry - as long as all the
+// certificate creators (ie., Smalltown nodes) run the same version of this code.
+// TODO(q3k): in the future, this should be handled better - especially as we introduce new certificates, or worse,
+// change the issuance chain. As a stopgap measure, an explicit per-certificate or even global lock can be implemented.
+// And, even before that, we can handle concurrency errors in a smarter way.
 func (c *Certificate) ensure(ctx context.Context, kv clientv3.KV) (cert, key []byte, err error) {
 	if c.name == "" {
 		// Volatile certificate - generate.
@@ -149,14 +155,21 @@
 		return
 	}
 
-	// Save to etcd in transaction. This ensures that no partial writes happen.
-	_, err = kv.Txn(ctx).
+	// Save to etcd in transaction. This ensures that no partial writes happen, and that we haven't been raced to the
+	// save.
+	res, err := kv.Txn(ctx).
+		If(
+			clientv3.Compare(clientv3.CreateRevision(certPath), "=", 0),
+			clientv3.Compare(clientv3.CreateRevision(keyPath), "=", 0),
+		).
 		Then(
 			clientv3.OpPut(certPath, string(cert)),
 			clientv3.OpPut(keyPath, string(key)),
 		).Commit()
 	if err != nil {
 		err = fmt.Errorf("failed to write newly issued certificate: %w", err)
+	} else if !res.Succeeded {
+		err = fmt.Errorf("certificate issuance transaction failed: concurrent write")
 	}
 
 	return