blob: 80140a3aa62d99284240ff1b5994b30d788abad9 [file] [log] [blame]
Lorenz Brunfc5dbc62020-05-28 12:18:07 +02001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package e2e
18
19import (
20 "context"
21 "errors"
22 "fmt"
23 "log"
Leopold Schabele28e6d72020-06-03 11:39:25 +020024 "net"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020025 "net/http"
26 _ "net/http"
27 _ "net/http/pprof"
Lorenz Brun3ff5af32020-06-24 16:34:11 +020028 "os"
Lorenz Brun5e4fc2d2020-09-22 18:35:15 +020029 "strings"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020030 "testing"
31 "time"
32
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020033 corev1 "k8s.io/api/core/v1"
Lorenz Brun30167f52021-03-17 17:49:01 +010034 "k8s.io/apimachinery/pkg/api/resource"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020035 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
36 podv1 "k8s.io/kubernetes/pkg/api/v1/pod"
37
Serge Bazanski31370b02021-01-07 16:31:14 +010038 common "source.monogon.dev/metropolis/node"
Serge Bazanski6dff6d62022-01-28 18:15:14 +010039 "source.monogon.dev/metropolis/node/core/identity"
Serge Bazanski31370b02021-01-07 16:31:14 +010040 apb "source.monogon.dev/metropolis/proto/api"
Serge Bazanski66e58952021-10-05 17:06:56 +020041 "source.monogon.dev/metropolis/test/launch/cluster"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020042)
43
Leopold Schabeld603f842020-06-09 17:48:09 +020044const (
45 // Timeout for the global test context.
46 //
Serge Bazanski216fe7b2021-05-21 18:36:16 +020047 // Bazel would eventually time out the test after 900s ("large") if, for
48 // some reason, the context cancellation fails to abort it.
Leopold Schabeld603f842020-06-09 17:48:09 +020049 globalTestTimeout = 600 * time.Second
50
51 // Timeouts for individual end-to-end tests of different sizes.
Serge Bazanski1ebd1e12020-07-13 19:17:16 +020052 smallTestTimeout = 60 * time.Second
Leopold Schabeld603f842020-06-09 17:48:09 +020053 largeTestTimeout = 120 * time.Second
54)
55
Serge Bazanski216fe7b2021-05-21 18:36:16 +020056// TestE2E is the main E2E test entrypoint for single-node freshly-bootstrapped
57// E2E tests. It starts a full Metropolis node in bootstrap mode and then runs
58// tests against it. The actual tests it performs are located in the RunGroup
59// subtest.
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020060func TestE2E(t *testing.T) {
Leopold Schabele28e6d72020-06-03 11:39:25 +020061 // Run pprof server for debugging
Serge Bazanski66e58952021-10-05 17:06:56 +020062 addr, err := net.ResolveTCPAddr("tcp", "localhost:0")
63 if err != nil {
64 panic(err)
65 }
66
67 pprofListen, err := net.ListenTCP("tcp", addr)
68 if err != nil {
69 log.Fatalf("Failed to listen on pprof port: %s", pprofListen.Addr())
70 }
71
72 log.Printf("E2E: pprof server listening on %s", pprofListen.Addr())
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020073 go func() {
Serge Bazanski66e58952021-10-05 17:06:56 +020074 log.Printf("E2E: pprof server returned an error: %v", http.Serve(pprofListen, nil))
75 pprofListen.Close()
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020076 }()
Leopold Schabele28e6d72020-06-03 11:39:25 +020077
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020078 // Set a global timeout to make sure this terminates
Leopold Schabeld603f842020-06-09 17:48:09 +020079 ctx, cancel := context.WithTimeout(context.Background(), globalTestTimeout)
Serge Bazanski1f9a03b2021-08-17 13:40:53 +020080 defer cancel()
Serge Bazanski66e58952021-10-05 17:06:56 +020081
82 // Launch cluster.
Serge Bazanskie78a0892021-10-07 17:03:49 +020083 clusterOptions := cluster.ClusterOptions{
84 NumNodes: 2,
85 }
86 cluster, err := cluster.LaunchCluster(ctx, clusterOptions)
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020087 if err != nil {
Serge Bazanski66e58952021-10-05 17:06:56 +020088 t.Fatalf("LaunchCluster failed: %v", err)
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020089 }
Serge Bazanski66e58952021-10-05 17:06:56 +020090 defer func() {
91 err := cluster.Close()
92 if err != nil {
93 t.Fatalf("cluster Close failed: %v", err)
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020094 }
95 }()
Serge Bazanski1f9a03b2021-08-17 13:40:53 +020096
Serge Bazanski66e58952021-10-05 17:06:56 +020097 log.Printf("E2E: Cluster running, starting tests...")
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020098
Serge Bazanski216fe7b2021-05-21 18:36:16 +020099 // This exists to keep the parent around while all the children race.
100 // It currently tests both a set of OS-level conditions and Kubernetes
101 // Deployments and StatefulSets
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200102 t.Run("RunGroup", func(t *testing.T) {
Serge Bazanski6dff6d62022-01-28 18:15:14 +0100103 t.Run("Cluster", func(t *testing.T) {
Serge Bazanski66e58952021-10-05 17:06:56 +0200104 testEventual(t, "Retrieving cluster directory sucessful", ctx, 60*time.Second, func(ctx context.Context) error {
105 res, err := cluster.Management.GetClusterInfo(ctx, &apb.GetClusterInfoRequest{})
Serge Bazanskibf68fa92021-10-05 17:53:58 +0200106 if err != nil {
107 return fmt.Errorf("GetClusterInfo: %w", err)
108 }
109
Serge Bazanskie78a0892021-10-07 17:03:49 +0200110 // Ensure that the expected node count is present.
Serge Bazanskibf68fa92021-10-05 17:53:58 +0200111 nodes := res.ClusterDirectory.Nodes
Serge Bazanskie78a0892021-10-07 17:03:49 +0200112 if want, got := clusterOptions.NumNodes, len(nodes); want != got {
Serge Bazanskibf68fa92021-10-05 17:53:58 +0200113 return fmt.Errorf("wanted %d nodes in cluster directory, got %d", want, got)
114 }
Serge Bazanski6dff6d62022-01-28 18:15:14 +0100115
116 // Ensure the nodes have the expected addresses.
117 addresses := make(map[string]bool)
118 for _, n := range nodes {
119 if len(n.Addresses) != 1 {
120 return fmt.Errorf("node %s has no addresss", identity.NodeID(n.PublicKey))
121 }
122 address := n.Addresses[0].Host
123 addresses[address] = true
124 }
125
126 for _, address := range []string{"10.1.0.2", "10.1.0.3"} {
127 if !addresses[address] {
128 return fmt.Errorf("address %q not found in directory", address)
129 }
130 }
Serge Bazanski1f9a03b2021-08-17 13:40:53 +0200131 return nil
132 })
133 })
Serge Bazanski6dff6d62022-01-28 18:15:14 +0100134 t.Run("Kubernetes", func(t *testing.T) {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200135 t.Parallel()
Lorenz Bruncc078df2021-12-23 11:51:55 +0100136 clientSet, err := GetKubeClientSet(cluster, cluster.Ports[common.KubernetesAPIWrappedPort])
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200137 if err != nil {
138 t.Fatal(err)
139 }
Serge Bazanski6dff6d62022-01-28 18:15:14 +0100140 testEventual(t, "Nodes are registered and ready", ctx, largeTestTimeout, func(ctx context.Context) error {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200141 nodes, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
142 if err != nil {
143 return err
144 }
Serge Bazanski6dff6d62022-01-28 18:15:14 +0100145 if len(nodes.Items) < 2 {
146 return errors.New("nodes not yet registered")
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200147 }
148 node := nodes.Items[0]
149 for _, cond := range node.Status.Conditions {
150 if cond.Type != corev1.NodeReady {
151 continue
152 }
153 if cond.Status != corev1.ConditionTrue {
154 return fmt.Errorf("node not ready: %v", cond.Message)
155 }
156 }
157 return nil
158 })
Leopold Schabeld603f842020-06-09 17:48:09 +0200159 testEventual(t, "Simple deployment", ctx, largeTestTimeout, func(ctx context.Context) error {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200160 _, err := clientSet.AppsV1().Deployments("default").Create(ctx, makeTestDeploymentSpec("test-deploy-1"), metav1.CreateOptions{})
161 return err
162 })
Leopold Schabeld603f842020-06-09 17:48:09 +0200163 testEventual(t, "Simple deployment is running", ctx, largeTestTimeout, func(ctx context.Context) error {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200164 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-deploy-1"})
165 if err != nil {
166 return err
167 }
168 if len(res.Items) == 0 {
169 return errors.New("pod didn't get created")
170 }
171 pod := res.Items[0]
172 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
173 return nil
174 }
175 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
176 if err != nil || len(events.Items) == 0 {
177 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
178 } else {
179 return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
180 }
181 })
Lorenz Brun5e4fc2d2020-09-22 18:35:15 +0200182 testEventual(t, "Simple deployment with runc", ctx, largeTestTimeout, func(ctx context.Context) error {
183 deployment := makeTestDeploymentSpec("test-deploy-2")
184 var runcStr = "runc"
185 deployment.Spec.Template.Spec.RuntimeClassName = &runcStr
186 _, err := clientSet.AppsV1().Deployments("default").Create(ctx, deployment, metav1.CreateOptions{})
187 return err
188 })
189 testEventual(t, "Simple deployment is running on runc", ctx, largeTestTimeout, func(ctx context.Context) error {
190 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-deploy-2"})
191 if err != nil {
192 return err
193 }
194 if len(res.Items) == 0 {
195 return errors.New("pod didn't get created")
196 }
197 pod := res.Items[0]
198 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
199 return nil
200 }
201 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
202 if err != nil || len(events.Items) == 0 {
203 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
204 } else {
205 var errorMsg strings.Builder
206 for _, msg := range events.Items {
207 errorMsg.WriteString(" | ")
208 errorMsg.WriteString(msg.Message)
209 }
210 return fmt.Errorf("pod is not ready: %v", errorMsg.String())
211 }
212 })
Leopold Schabeld603f842020-06-09 17:48:09 +0200213 testEventual(t, "Simple StatefulSet with PVC", ctx, largeTestTimeout, func(ctx context.Context) error {
Lorenz Brun37050122021-03-30 14:00:27 +0200214 _, err := clientSet.AppsV1().StatefulSets("default").Create(ctx, makeTestStatefulSet("test-statefulset-1", corev1.PersistentVolumeFilesystem), metav1.CreateOptions{})
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200215 return err
216 })
Leopold Schabeld603f842020-06-09 17:48:09 +0200217 testEventual(t, "Simple StatefulSet with PVC is running", ctx, largeTestTimeout, func(ctx context.Context) error {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200218 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-statefulset-1"})
219 if err != nil {
220 return err
221 }
222 if len(res.Items) == 0 {
223 return errors.New("pod didn't get created")
224 }
225 pod := res.Items[0]
226 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
227 return nil
228 }
229 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
230 if err != nil || len(events.Items) == 0 {
231 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
232 } else {
233 return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
234 }
235 })
Lorenz Brun37050122021-03-30 14:00:27 +0200236 testEventual(t, "Simple StatefulSet with Block PVC", ctx, largeTestTimeout, func(ctx context.Context) error {
237 _, err := clientSet.AppsV1().StatefulSets("default").Create(ctx, makeTestStatefulSet("test-statefulset-2", corev1.PersistentVolumeBlock), metav1.CreateOptions{})
238 return err
239 })
240 testEventual(t, "Simple StatefulSet with Block PVC is running", ctx, largeTestTimeout, func(ctx context.Context) error {
241 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-statefulset-2"})
242 if err != nil {
243 return err
244 }
245 if len(res.Items) == 0 {
246 return errors.New("pod didn't get created")
247 }
248 pod := res.Items[0]
249 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
250 return nil
251 }
252 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
253 if err != nil || len(events.Items) == 0 {
254 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
255 } else {
256 return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
257 }
258 })
Lorenz Brun8b0431a2020-07-13 16:56:36 +0200259 testEventual(t, "Pod with preseeded image", ctx, smallTestTimeout, func(ctx context.Context) error {
260 _, err := clientSet.CoreV1().Pods("default").Create(ctx, &corev1.Pod{
261 ObjectMeta: metav1.ObjectMeta{
262 Name: "preseed-test-1",
263 },
264 Spec: corev1.PodSpec{
265 Containers: []corev1.Container{{
266 Name: "preseed-test-1",
267 ImagePullPolicy: corev1.PullNever,
Serge Bazanski77cb6c52020-12-19 00:09:22 +0100268 Image: "bazel/metropolis/test/e2e/preseedtest:preseedtest",
Lorenz Brun8b0431a2020-07-13 16:56:36 +0200269 }},
270 RestartPolicy: corev1.RestartPolicyNever,
271 },
272 }, metav1.CreateOptions{})
273 return err
274 })
275 testEventual(t, "Pod with preseeded image is completed", ctx, largeTestTimeout, func(ctx context.Context) error {
276 pod, err := clientSet.CoreV1().Pods("default").Get(ctx, "preseed-test-1", metav1.GetOptions{})
277 if err != nil {
278 return fmt.Errorf("failed to get pod: %w", err)
279 }
280 if pod.Status.Phase == corev1.PodSucceeded {
281 return nil
282 }
283 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
284 if err != nil || len(events.Items) == 0 {
285 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
286 } else {
287 return fmt.Errorf("pod is not ready: %v", events.Items[len(events.Items)-1].Message)
288 }
289 })
Lorenz Brun30167f52021-03-17 17:49:01 +0100290 if os.Getenv("HAVE_NESTED_KVM") != "" {
291 testEventual(t, "Pod for KVM/QEMU smoke test", ctx, smallTestTimeout, func(ctx context.Context) error {
292 runcRuntimeClass := "runc"
293 _, err := clientSet.CoreV1().Pods("default").Create(ctx, &corev1.Pod{
294 ObjectMeta: metav1.ObjectMeta{
295 Name: "vm-smoketest",
296 },
297 Spec: corev1.PodSpec{
298 Containers: []corev1.Container{{
299 Name: "vm-smoketest",
300 ImagePullPolicy: corev1.PullNever,
301 Image: "bazel/metropolis/vm/smoketest:smoketest_container",
302 Resources: corev1.ResourceRequirements{
303 Limits: corev1.ResourceList{
304 "devices.monogon.dev/kvm": *resource.NewQuantity(1, ""),
305 },
306 },
307 }},
308 RuntimeClassName: &runcRuntimeClass,
309 RestartPolicy: corev1.RestartPolicyNever,
310 },
311 }, metav1.CreateOptions{})
312 return err
313 })
314 testEventual(t, "KVM/QEMU smoke test completion", ctx, smallTestTimeout, func(ctx context.Context) error {
315 pod, err := clientSet.CoreV1().Pods("default").Get(ctx, "vm-smoketest", metav1.GetOptions{})
316 if err != nil {
317 return fmt.Errorf("failed to get pod: %w", err)
318 }
319 if pod.Status.Phase == corev1.PodSucceeded {
320 return nil
321 }
322 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
323 if err != nil || len(events.Items) == 0 {
324 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
325 } else {
326 return fmt.Errorf("pod is not ready: %v", events.Items[len(events.Items)-1].Message)
327 }
328 })
329 }
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200330 })
331 })
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200332}