m/test/launch: allow specifying launch parameters
This adds flags to the launch-cluster command for specifying the size of
the cluster, tpm and storage security configuration, number of CPUs and
RAM size for all nodes, and assigning roles to specific nodes.
As an example, the following command launches a cluster with tpm
disabled, 4 nodes, 2 CPUs and 4 GiB of RAM on each node, and assigns the
Kubernetes Worker role to all except the first node:
bazel run //metropolis:launch-cluster -- -tpm-mode=disabled \
-num-nodes=4 -cpu=2 -ram=4G -kubernetes-worker=1-3
The default storage security policy was changed to insecure, as this
speeds up cluster launch.
The cluster configuration flags are defined in a new separate package to
avoid code duplication.
Fixes: https://github.com/monogon-dev/monogon/issues/315
Change-Id: Icf8b7fcbd6e609f4785b2a60ce5e7be14b641884
Reviewed-on: https://review.monogon.dev/c/monogon/+/3307
Reviewed-by: Serge Bazanski <serge@monogon.tech>
Tested-by: Jenkins CI
diff --git a/metropolis/test/launch/cli/launch-cluster/BUILD.bazel b/metropolis/test/launch/cli/launch-cluster/BUILD.bazel
index a952abd..74067a7 100644
--- a/metropolis/test/launch/cli/launch-cluster/BUILD.bazel
+++ b/metropolis/test/launch/cli/launch-cluster/BUILD.bazel
@@ -7,7 +7,10 @@
importpath = "source.monogon.dev/metropolis/test/launch/cli/launch-cluster",
visibility = ["//visibility:private"],
deps = [
+ "//metropolis/cli/flagdefs",
"//metropolis/cli/metroctl/core",
+ "//metropolis/node",
+ "//metropolis/proto/common",
"//metropolis/test/launch",
],
)
diff --git a/metropolis/test/launch/cli/launch-cluster/main.go b/metropolis/test/launch/cli/launch-cluster/main.go
index 695fcc7..cceebd1 100644
--- a/metropolis/test/launch/cli/launch-cluster/main.go
+++ b/metropolis/test/launch/cli/launch-cluster/main.go
@@ -18,36 +18,139 @@
import (
"context"
+ "errors"
+ "flag"
+ "fmt"
"log"
+ "net"
"os"
"os/exec"
"os/signal"
+ "strconv"
+ "strings"
+ "time"
+ "source.monogon.dev/metropolis/cli/flagdefs"
metroctl "source.monogon.dev/metropolis/cli/metroctl/core"
+ "source.monogon.dev/metropolis/node"
+ cpb "source.monogon.dev/metropolis/proto/common"
mlaunch "source.monogon.dev/metropolis/test/launch"
)
-func main() {
- ctx, _ := signal.NotifyContext(context.Background(), os.Interrupt)
- cl, err := mlaunch.LaunchCluster(ctx, mlaunch.ClusterOptions{
- NumNodes: 3,
- NodeLogsToFiles: true,
+const maxNodes = 256
+
+func nodeSetFlag(p *[]int, name string, usage string) {
+ flag.Func(name, usage, func(val string) error {
+ for _, part := range strings.Split(val, ",") {
+ part = strings.TrimSpace(part)
+ if part == "" {
+ continue
+ }
+ startStr, endStr, ok := strings.Cut(part, "-")
+ if !ok {
+ endStr = startStr
+ }
+ start, err := strconv.Atoi(startStr)
+ if err != nil {
+ return err
+ }
+ end, err := strconv.Atoi(endStr)
+ if err != nil {
+ return err
+ }
+ if end >= maxNodes {
+ return fmt.Errorf("node index %v out of range, there can be at most %v nodes", end, maxNodes)
+ }
+ if end < start {
+ return fmt.Errorf("invalid range %q, end is smaller than start", part)
+ }
+ for i := start; i <= end; i++ {
+ *p = append(*p, i)
+ }
+ }
+ return nil
})
+}
+
+func sizeFlagMiB(p *int, name string, usage string) {
+ flag.Func(name, usage, func(val string) error {
+ multiplier := 1
+ switch {
+ case strings.HasSuffix(val, "M"):
+ case strings.HasSuffix(val, "G"):
+ multiplier = 1024
+ default:
+ return errors.New("must have suffix M for MiB or G for GiB")
+ }
+ intVal, err := strconv.Atoi(val[:len(val)-1])
+ if err != nil {
+ return err
+ }
+ *p = multiplier * intVal
+ return nil
+ })
+}
+
+func main() {
+ clusterConfig := cpb.ClusterConfiguration{}
+ opts := mlaunch.ClusterOptions{
+ NodeLogsToFiles: true,
+ InitialClusterConfiguration: &clusterConfig,
+ }
+ var consensusMemberList, kubernetesControllerList, kubernetesWorkerList []int
+
+ flag.IntVar(&opts.NumNodes, "num-nodes", 3, "Number of cluster nodes")
+ flagdefs.TPMModeVar(flag.CommandLine, &clusterConfig.TpmMode, "tpm-mode", cpb.ClusterConfiguration_TPM_MODE_REQUIRED, "TPM mode to set on cluster")
+ flagdefs.StorageSecurityPolicyVar(flag.CommandLine, &clusterConfig.StorageSecurityPolicy, "storage-security", cpb.ClusterConfiguration_STORAGE_SECURITY_POLICY_NEEDS_INSECURE, "Storage security policy to set on cluster")
+ flag.IntVar(&opts.Node.CPUs, "cpu", 1, "Number of virtual CPUs of each node")
+ flag.IntVar(&opts.Node.ThreadsPerCPU, "threads-per-cpu", 1, "Number of threads per CPU")
+ sizeFlagMiB(&opts.Node.MemoryMiB, "ram", "RAM size of each node, with suffix M for MiB or G for GiB")
+ nodeSetFlag(&consensusMemberList, "consensus-member", "List of nodes which get the Consensus Member role. Example: 0,3-5")
+ nodeSetFlag(&kubernetesControllerList, "kubernetes-controller", "List of nodes which get the Kubernetes Controller role. Example: 0,3-5")
+ nodeSetFlag(&kubernetesWorkerList, "kubernetes-worker", "List of nodes which get the Kubernetes Worker role. Example: 0,3-5")
+ flag.Parse()
+
+ if opts.NumNodes >= maxNodes {
+ log.Fatalf("num-nodes (%v) is too large, there can be at most %v nodes", opts.NumNodes, maxNodes)
+ }
+ for _, list := range [][]int{consensusMemberList, kubernetesControllerList, kubernetesWorkerList} {
+ for i := len(list) - 1; i >= 0; i-- {
+ if list[i] >= opts.NumNodes {
+ log.Fatalf("Node index %v out of range, can be at most %v", list[i], opts.NumNodes-1)
+ }
+ }
+ }
+
+ ctx, _ := signal.NotifyContext(context.Background(), os.Interrupt)
+ cl, err := mlaunch.LaunchCluster(ctx, opts)
if err != nil {
log.Fatalf("LaunchCluster: %v", err)
}
+ for _, node := range consensusMemberList {
+ cl.MakeConsensusMember(ctx, cl.NodeIDs[node])
+ }
+ for _, node := range kubernetesControllerList {
+ cl.MakeKubernetesController(ctx, cl.NodeIDs[node])
+ }
+ for _, node := range kubernetesWorkerList {
+ cl.MakeKubernetesWorker(ctx, cl.NodeIDs[node])
+ }
+
wpath, err := cl.MakeMetroctlWrapper()
if err != nil {
log.Fatalf("MakeWrapper: %v", err)
}
- apiservers, err := cl.KubernetesControllerNodeAddresses(ctx)
- if err != nil {
- log.Fatalf("Could not get Kubernetes controller nodes: %v", err)
- }
- if len(apiservers) < 1 {
- log.Fatalf("Cluster has no Kubernetes controller nodes")
+ apiserver := cl.Nodes[cl.NodeIDs[0]].ManagementAddress
+ // Wait for the API server to start listening.
+ for {
+ conn, err := cl.DialNode(ctx, net.JoinHostPort(apiserver, node.KubernetesAPIWrappedPort.PortString()))
+ if err == nil {
+ conn.Close()
+ break
+ }
+ time.Sleep(100 * time.Millisecond)
}
// If the user has metroctl in their path, use the metroctl from path as
@@ -62,7 +165,7 @@
}
configName := "launch-cluster"
- if err := metroctl.InstallKubeletConfig(ctx, metroctlPath, cl.ConnectOptions(), configName, apiservers[0]); err != nil {
+ if err := metroctl.InstallKubeletConfig(ctx, metroctlPath, cl.ConnectOptions(), configName, apiserver); err != nil {
log.Fatalf("InstallKubeletConfig: %v", err)
}