blob: f2f7cc2528b2ebff370804aed6fc769141b4b5d9 [file] [log] [blame]
Serge Bazanski99b02142024-04-17 16:33:28 +02001package ha
2
3import (
4 "context"
5 "fmt"
6 "os"
7 "testing"
8 "time"
9
10 "github.com/bazelbuild/rules_go/go/runfiles"
11
Tim Windelschmidt9f21f532024-05-07 15:14:20 +020012 mlaunch "source.monogon.dev/metropolis/test/launch"
13 "source.monogon.dev/metropolis/test/localregistry"
Serge Bazanski99b02142024-04-17 16:33:28 +020014 "source.monogon.dev/metropolis/test/util"
Tim Windelschmidt9f21f532024-05-07 15:14:20 +020015 "source.monogon.dev/osbase/test/launch"
Serge Bazanski99b02142024-04-17 16:33:28 +020016)
17
Tim Windelschmidt82e6af72024-07-23 00:05:42 +000018var (
19 // These are filled by bazel at linking time with the canonical path of
20 // their corresponding file. Inside the init function we resolve it
21 // with the rules_go runfiles package to the real path.
22 xTestImagesManifestPath string
23)
24
25func init() {
26 var err error
27 for _, path := range []*string{
28 &xTestImagesManifestPath,
29 } {
30 *path, err = runfiles.Rlocation(*path)
31 if err != nil {
32 panic(err)
33 }
34 }
35}
36
Serge Bazanski99b02142024-04-17 16:33:28 +020037const (
38 // Timeout for the global test context.
39 //
40 // Bazel would eventually time out the test after 900s ("large") if, for
41 // some reason, the context cancellation fails to abort it.
42 globalTestTimeout = 600 * time.Second
43
44 // Timeouts for individual end-to-end tests of different sizes.
45 smallTestTimeout = 60 * time.Second
46 largeTestTimeout = 120 * time.Second
47)
48
49// TestE2ECoreHA exercises the basics of a high-availability control plane by
50// starting up a 3-node cluster, turning all nodes into ConsensusMembers, then
51// performing a rolling restart.
52func TestE2ECoreHA(t *testing.T) {
53 // Set a global timeout to make sure this terminates
54 ctx, cancel := context.WithTimeout(context.Background(), globalTestTimeout)
55 defer cancel()
56
Tim Windelschmidt82e6af72024-07-23 00:05:42 +000057 df, err := os.ReadFile(xTestImagesManifestPath)
Serge Bazanski99b02142024-04-17 16:33:28 +020058 if err != nil {
59 t.Fatalf("Reading registry manifest failed: %v", err)
60 }
61 lr, err := localregistry.FromBazelManifest(df)
62 if err != nil {
63 t.Fatalf("Creating test image registry failed: %v", err)
64 }
65 // Launch cluster.
Tim Windelschmidt9f21f532024-05-07 15:14:20 +020066 clusterOptions := mlaunch.ClusterOptions{
Serge Bazanski99b02142024-04-17 16:33:28 +020067 NumNodes: 3,
68 LocalRegistry: lr,
69 NodeLogsToFiles: true,
70 }
Tim Windelschmidt9f21f532024-05-07 15:14:20 +020071 cluster, err := mlaunch.LaunchCluster(ctx, clusterOptions)
Serge Bazanski99b02142024-04-17 16:33:28 +020072 if err != nil {
73 t.Fatalf("LaunchCluster failed: %v", err)
74 }
75 defer func() {
76 err := cluster.Close()
77 if err != nil {
78 t.Fatalf("cluster Close failed: %v", err)
79 }
80 }()
81
82 launch.Log("E2E: Cluster running, starting tests...")
83
84 util.MustTestEventual(t, "Add ConsensusMember roles", ctx, smallTestTimeout, func(ctx context.Context) error {
85 // Make everything but the first node into ConsensusMember.
86 for i := 1; i < clusterOptions.NumNodes; i++ {
87 err := cluster.MakeConsensusMember(ctx, cluster.NodeIDs[i])
88 if err != nil {
89 return fmt.Errorf("MakeConsensusMember(%d/%s): %w", i, cluster.NodeIDs[i], err)
90 }
91 }
92 return nil
93 })
94 util.TestEventual(t, "Heartbeat test successful", ctx, 20*time.Second, cluster.AllNodesHealthy)
95
96 // Perform rolling restart of all nodes. When a node rejoins it must be able to
97 // contact the cluster, so this also exercises that the cluster is serving even
98 // with the node having rebooted.
99 for i := 0; i < clusterOptions.NumNodes; i++ {
100 util.MustTestEventual(t, fmt.Sprintf("Node %d rejoin successful", i), ctx, 60*time.Second, func(ctx context.Context) error {
101 // Ensure nodes rejoin the cluster after a reboot by reboting the 1st node.
102 if err := cluster.RebootNode(ctx, i); err != nil {
103 return fmt.Errorf("while rebooting a node: %w", err)
104 }
105 return nil
106 })
107 }
Serge Bazanski99b02142024-04-17 16:33:28 +0200108}