core/internal/cluster: add new single-node cluster code
This adds a cluster library, that consists of:
- a Node object that can be loaded from and saved into etcd,
representing a node of the cluster that can have different 'role
tags' assigned to it
- a cluster Manager, that is responsible for bringing up the local node
into a cluster (by creaating a new cluster, enrolling into or joining a
cluster)
This also gets wired into core/cmd/init, and as such completes a chunk
of The Refactor. This code should pass tests.
Test Plan: this should work! should be covered by existing e2e tests.
X-Origin-Diff: phab/D590
GitOrigin-RevId: e88022164e4353249b29fc16849a02805f15dd49
diff --git a/core/internal/kubernetes/pki/certificate.go b/core/internal/kubernetes/pki/certificate.go
index e0dea0d..6bd50f9 100644
--- a/core/internal/kubernetes/pki/certificate.go
+++ b/core/internal/kubernetes/pki/certificate.go
@@ -106,6 +106,12 @@
// ensure returns a DER-encoded x509 certificate and internally encoded bare ed25519 key for a given Certificate,
// in memory (if volatile), loading it from etcd, or creating and saving it on etcd if needed.
+// This function is safe to call in parallel from multiple etcd clients (including across machines), but it will error
+// in case a concurrent certificate generation happens. These errors are, however, safe to retry - as long as all the
+// certificate creators (ie., Smalltown nodes) run the same version of this code.
+// TODO(q3k): in the future, this should be handled better - especially as we introduce new certificates, or worse,
+// change the issuance chain. As a stopgap measure, an explicit per-certificate or even global lock can be implemented.
+// And, even before that, we can handle concurrency errors in a smarter way.
func (c *Certificate) ensure(ctx context.Context, kv clientv3.KV) (cert, key []byte, err error) {
if c.name == "" {
// Volatile certificate - generate.
@@ -149,14 +155,21 @@
return
}
- // Save to etcd in transaction. This ensures that no partial writes happen.
- _, err = kv.Txn(ctx).
+ // Save to etcd in transaction. This ensures that no partial writes happen, and that we haven't been raced to the
+ // save.
+ res, err := kv.Txn(ctx).
+ If(
+ clientv3.Compare(clientv3.CreateRevision(certPath), "=", 0),
+ clientv3.Compare(clientv3.CreateRevision(keyPath), "=", 0),
+ ).
Then(
clientv3.OpPut(certPath, string(cert)),
clientv3.OpPut(keyPath, string(key)),
).Commit()
if err != nil {
err = fmt.Errorf("failed to write newly issued certificate: %w", err)
+ } else if !res.Succeeded {
+ err = fmt.Errorf("certificate issuance transaction failed: concurrent write")
}
return