blob: 0fe13783fe7c1c9eb376ecc15bb7378284b80c4f [file] [log] [blame]
Lorenz Brunfc5dbc62020-05-28 12:18:07 +02001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package e2e
18
19import (
Serge Bazanski1f9a03b2021-08-17 13:40:53 +020020 "bytes"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020021 "context"
Serge Bazanski1f9a03b2021-08-17 13:40:53 +020022 "crypto/ed25519"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020023 "errors"
24 "fmt"
25 "log"
Leopold Schabele28e6d72020-06-03 11:39:25 +020026 "net"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020027 "net/http"
28 _ "net/http"
29 _ "net/http/pprof"
Lorenz Brun3ff5af32020-06-24 16:34:11 +020030 "os"
Lorenz Brun5e4fc2d2020-09-22 18:35:15 +020031 "strings"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020032 "testing"
33 "time"
34
35 "google.golang.org/grpc"
36 corev1 "k8s.io/api/core/v1"
Lorenz Brun30167f52021-03-17 17:49:01 +010037 "k8s.io/apimachinery/pkg/api/resource"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020038 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
39 podv1 "k8s.io/kubernetes/pkg/api/v1/pod"
40
Serge Bazanski31370b02021-01-07 16:31:14 +010041 common "source.monogon.dev/metropolis/node"
42 apb "source.monogon.dev/metropolis/proto/api"
43 "source.monogon.dev/metropolis/test/launch"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020044)
45
Leopold Schabeld603f842020-06-09 17:48:09 +020046const (
47 // Timeout for the global test context.
48 //
Serge Bazanski216fe7b2021-05-21 18:36:16 +020049 // Bazel would eventually time out the test after 900s ("large") if, for
50 // some reason, the context cancellation fails to abort it.
Leopold Schabeld603f842020-06-09 17:48:09 +020051 globalTestTimeout = 600 * time.Second
52
53 // Timeouts for individual end-to-end tests of different sizes.
Serge Bazanski1ebd1e12020-07-13 19:17:16 +020054 smallTestTimeout = 60 * time.Second
Leopold Schabeld603f842020-06-09 17:48:09 +020055 largeTestTimeout = 120 * time.Second
56)
57
Serge Bazanski216fe7b2021-05-21 18:36:16 +020058// TestE2E is the main E2E test entrypoint for single-node freshly-bootstrapped
59// E2E tests. It starts a full Metropolis node in bootstrap mode and then runs
60// tests against it. The actual tests it performs are located in the RunGroup
61// subtest.
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020062func TestE2E(t *testing.T) {
Leopold Schabele28e6d72020-06-03 11:39:25 +020063 // Run pprof server for debugging
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020064 go func() {
Leopold Schabele28e6d72020-06-03 11:39:25 +020065 addr, err := net.ResolveTCPAddr("tcp", "localhost:0")
66 if err != nil {
67 panic(err)
68 }
69
70 l, err := net.ListenTCP("tcp", addr)
71 if err != nil {
72 log.Fatalf("Failed to listen on pprof port: %s", l.Addr())
73 }
74 defer l.Close()
75
76 log.Printf("pprof server listening on %s", l.Addr())
77 log.Printf("pprof server returned an error: %v", http.Serve(l, nil))
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020078 }()
Leopold Schabele28e6d72020-06-03 11:39:25 +020079
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020080 // Set a global timeout to make sure this terminates
Leopold Schabeld603f842020-06-09 17:48:09 +020081 ctx, cancel := context.WithTimeout(context.Background(), globalTestTimeout)
Serge Bazanski1f9a03b2021-08-17 13:40:53 +020082 defer cancel()
Lorenz Bruned0503c2020-07-28 17:21:25 +020083 portMap, err := launch.ConflictFreePortMap(launch.NodePorts)
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020084 if err != nil {
85 t.Fatalf("Failed to acquire ports for e2e test: %v", err)
86 }
Leopold Schabela013ffa2020-06-03 15:09:32 +020087
88 procExit := make(chan struct{})
89
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020090 go func() {
Serge Bazanski0ed2f962021-03-15 16:39:30 +010091 if err := launch.Launch(ctx, launch.Options{
92 Ports: portMap,
93 SerialPort: os.Stdout,
94 NodeParameters: &apb.NodeParameters{
95 Cluster: &apb.NodeParameters_ClusterBootstrap_{
Serge Bazanski1f9a03b2021-08-17 13:40:53 +020096 ClusterBootstrap: launch.InsecureClusterBootstrap,
Serge Bazanski0ed2f962021-03-15 16:39:30 +010097 },
98 },
99 }); err != nil {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200100 panic(err)
101 }
Leopold Schabela013ffa2020-06-03 15:09:32 +0200102 close(procExit)
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200103 }()
Serge Bazanski1f9a03b2021-08-17 13:40:53 +0200104
105 grpcDebug, err := portMap.DialGRPC(common.DebugServicePort, grpc.WithInsecure())
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200106 if err != nil {
Serge Bazanski1f9a03b2021-08-17 13:40:53 +0200107 log.Printf("Failed to dial debug service (is it running?): %v", err)
108 return
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200109 }
Serge Bazanski1f9a03b2021-08-17 13:40:53 +0200110 debug := apb.NewNodeDebugServiceClient(grpcDebug)
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200111
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200112 // This exists to keep the parent around while all the children race.
113 // It currently tests both a set of OS-level conditions and Kubernetes
114 // Deployments and StatefulSets
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200115 t.Run("RunGroup", func(t *testing.T) {
Serge Bazanski1f9a03b2021-08-17 13:40:53 +0200116 t.Run("Connect to Curator", func(t *testing.T) {
117 testEventual(t, "Retrieving owner credentials succesful", ctx, 60*time.Second, func(ctx context.Context) error {
118 initClient, err := launch.NewInitialClient(&launch.InitialClientOptions{
119 Remote: fmt.Sprintf("localhost:%v", portMap[common.CuratorServicePort]),
120 Private: launch.InsecurePrivateKey,
121 })
122 if err != nil {
123 return fmt.Errorf("NewInitialClient: %w", err)
124 }
125
126 cert, err := initClient.RetrieveOwnerCertificate(ctx)
127 if err != nil {
128 return fmt.Errorf("RetrieveOwnerCertificate: %w", err)
129 }
130
131 if !bytes.Equal(cert.PrivateKey.(ed25519.PrivateKey), launch.InsecurePrivateKey) {
132 t.Fatalf("Received certificate for wrong private key")
133 }
134
135 return nil
136 })
137 })
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200138 t.Run("Get Kubernetes Debug Kubeconfig", func(t *testing.T) {
139 t.Parallel()
Leopold Schabeld603f842020-06-09 17:48:09 +0200140 selfCtx, cancel := context.WithTimeout(ctx, largeTestTimeout)
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200141 defer cancel()
Serge Bazanski1f9a03b2021-08-17 13:40:53 +0200142 clientSet, err := GetKubeClientSet(selfCtx, debug, portMap[common.KubernetesAPIPort])
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200143 if err != nil {
144 t.Fatal(err)
145 }
Leopold Schabeld603f842020-06-09 17:48:09 +0200146 testEventual(t, "Node is registered and ready", ctx, largeTestTimeout, func(ctx context.Context) error {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200147 nodes, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
148 if err != nil {
149 return err
150 }
151 if len(nodes.Items) < 1 {
152 return errors.New("node not registered")
153 }
154 if len(nodes.Items) > 1 {
155 return errors.New("more than one node registered (but there is only one)")
156 }
157 node := nodes.Items[0]
158 for _, cond := range node.Status.Conditions {
159 if cond.Type != corev1.NodeReady {
160 continue
161 }
162 if cond.Status != corev1.ConditionTrue {
163 return fmt.Errorf("node not ready: %v", cond.Message)
164 }
165 }
166 return nil
167 })
Leopold Schabeld603f842020-06-09 17:48:09 +0200168 testEventual(t, "Simple deployment", ctx, largeTestTimeout, func(ctx context.Context) error {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200169 _, err := clientSet.AppsV1().Deployments("default").Create(ctx, makeTestDeploymentSpec("test-deploy-1"), metav1.CreateOptions{})
170 return err
171 })
Leopold Schabeld603f842020-06-09 17:48:09 +0200172 testEventual(t, "Simple deployment is running", ctx, largeTestTimeout, func(ctx context.Context) error {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200173 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-deploy-1"})
174 if err != nil {
175 return err
176 }
177 if len(res.Items) == 0 {
178 return errors.New("pod didn't get created")
179 }
180 pod := res.Items[0]
181 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
182 return nil
183 }
184 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
185 if err != nil || len(events.Items) == 0 {
186 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
187 } else {
188 return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
189 }
190 })
Lorenz Brun5e4fc2d2020-09-22 18:35:15 +0200191 testEventual(t, "Simple deployment with runc", ctx, largeTestTimeout, func(ctx context.Context) error {
192 deployment := makeTestDeploymentSpec("test-deploy-2")
193 var runcStr = "runc"
194 deployment.Spec.Template.Spec.RuntimeClassName = &runcStr
195 _, err := clientSet.AppsV1().Deployments("default").Create(ctx, deployment, metav1.CreateOptions{})
196 return err
197 })
198 testEventual(t, "Simple deployment is running on runc", ctx, largeTestTimeout, func(ctx context.Context) error {
199 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-deploy-2"})
200 if err != nil {
201 return err
202 }
203 if len(res.Items) == 0 {
204 return errors.New("pod didn't get created")
205 }
206 pod := res.Items[0]
207 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
208 return nil
209 }
210 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
211 if err != nil || len(events.Items) == 0 {
212 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
213 } else {
214 var errorMsg strings.Builder
215 for _, msg := range events.Items {
216 errorMsg.WriteString(" | ")
217 errorMsg.WriteString(msg.Message)
218 }
219 return fmt.Errorf("pod is not ready: %v", errorMsg.String())
220 }
221 })
Leopold Schabeld603f842020-06-09 17:48:09 +0200222 testEventual(t, "Simple StatefulSet with PVC", ctx, largeTestTimeout, func(ctx context.Context) error {
Lorenz Brun37050122021-03-30 14:00:27 +0200223 _, err := clientSet.AppsV1().StatefulSets("default").Create(ctx, makeTestStatefulSet("test-statefulset-1", corev1.PersistentVolumeFilesystem), metav1.CreateOptions{})
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200224 return err
225 })
Leopold Schabeld603f842020-06-09 17:48:09 +0200226 testEventual(t, "Simple StatefulSet with PVC is running", ctx, largeTestTimeout, func(ctx context.Context) error {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200227 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-statefulset-1"})
228 if err != nil {
229 return err
230 }
231 if len(res.Items) == 0 {
232 return errors.New("pod didn't get created")
233 }
234 pod := res.Items[0]
235 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
236 return nil
237 }
238 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
239 if err != nil || len(events.Items) == 0 {
240 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
241 } else {
242 return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
243 }
244 })
Lorenz Brun37050122021-03-30 14:00:27 +0200245 testEventual(t, "Simple StatefulSet with Block PVC", ctx, largeTestTimeout, func(ctx context.Context) error {
246 _, err := clientSet.AppsV1().StatefulSets("default").Create(ctx, makeTestStatefulSet("test-statefulset-2", corev1.PersistentVolumeBlock), metav1.CreateOptions{})
247 return err
248 })
249 testEventual(t, "Simple StatefulSet with Block PVC is running", ctx, largeTestTimeout, func(ctx context.Context) error {
250 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-statefulset-2"})
251 if err != nil {
252 return err
253 }
254 if len(res.Items) == 0 {
255 return errors.New("pod didn't get created")
256 }
257 pod := res.Items[0]
258 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
259 return nil
260 }
261 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
262 if err != nil || len(events.Items) == 0 {
263 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
264 } else {
265 return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
266 }
267 })
Lorenz Brun8b0431a2020-07-13 16:56:36 +0200268 testEventual(t, "Pod with preseeded image", ctx, smallTestTimeout, func(ctx context.Context) error {
269 _, err := clientSet.CoreV1().Pods("default").Create(ctx, &corev1.Pod{
270 ObjectMeta: metav1.ObjectMeta{
271 Name: "preseed-test-1",
272 },
273 Spec: corev1.PodSpec{
274 Containers: []corev1.Container{{
275 Name: "preseed-test-1",
276 ImagePullPolicy: corev1.PullNever,
Serge Bazanski77cb6c52020-12-19 00:09:22 +0100277 Image: "bazel/metropolis/test/e2e/preseedtest:preseedtest",
Lorenz Brun8b0431a2020-07-13 16:56:36 +0200278 }},
279 RestartPolicy: corev1.RestartPolicyNever,
280 },
281 }, metav1.CreateOptions{})
282 return err
283 })
284 testEventual(t, "Pod with preseeded image is completed", ctx, largeTestTimeout, func(ctx context.Context) error {
285 pod, err := clientSet.CoreV1().Pods("default").Get(ctx, "preseed-test-1", metav1.GetOptions{})
286 if err != nil {
287 return fmt.Errorf("failed to get pod: %w", err)
288 }
289 if pod.Status.Phase == corev1.PodSucceeded {
290 return nil
291 }
292 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
293 if err != nil || len(events.Items) == 0 {
294 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
295 } else {
296 return fmt.Errorf("pod is not ready: %v", events.Items[len(events.Items)-1].Message)
297 }
298 })
Lorenz Brun30167f52021-03-17 17:49:01 +0100299 if os.Getenv("HAVE_NESTED_KVM") != "" {
300 testEventual(t, "Pod for KVM/QEMU smoke test", ctx, smallTestTimeout, func(ctx context.Context) error {
301 runcRuntimeClass := "runc"
302 _, err := clientSet.CoreV1().Pods("default").Create(ctx, &corev1.Pod{
303 ObjectMeta: metav1.ObjectMeta{
304 Name: "vm-smoketest",
305 },
306 Spec: corev1.PodSpec{
307 Containers: []corev1.Container{{
308 Name: "vm-smoketest",
309 ImagePullPolicy: corev1.PullNever,
310 Image: "bazel/metropolis/vm/smoketest:smoketest_container",
311 Resources: corev1.ResourceRequirements{
312 Limits: corev1.ResourceList{
313 "devices.monogon.dev/kvm": *resource.NewQuantity(1, ""),
314 },
315 },
316 }},
317 RuntimeClassName: &runcRuntimeClass,
318 RestartPolicy: corev1.RestartPolicyNever,
319 },
320 }, metav1.CreateOptions{})
321 return err
322 })
323 testEventual(t, "KVM/QEMU smoke test completion", ctx, smallTestTimeout, func(ctx context.Context) error {
324 pod, err := clientSet.CoreV1().Pods("default").Get(ctx, "vm-smoketest", metav1.GetOptions{})
325 if err != nil {
326 return fmt.Errorf("failed to get pod: %w", err)
327 }
328 if pod.Status.Phase == corev1.PodSucceeded {
329 return nil
330 }
331 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
332 if err != nil || len(events.Items) == 0 {
333 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
334 } else {
335 return fmt.Errorf("pod is not ready: %v", events.Items[len(events.Items)-1].Message)
336 }
337 })
338 }
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200339 })
340 })
Leopold Schabela013ffa2020-06-03 15:09:32 +0200341
342 // Cancel the main context and wait for our subprocesses to exit
343 // to avoid leaking them and blocking the parent.
344 cancel()
345 <-procExit
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200346}