blob: 3e4ffb077a3246e7b487c3504e4ca74d3daaec13 [file] [log] [blame]
Lorenz Brunfc5dbc62020-05-28 12:18:07 +02001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package e2e
18
19import (
20 "context"
21 "errors"
22 "fmt"
23 "log"
Leopold Schabele28e6d72020-06-03 11:39:25 +020024 "net"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020025 "net/http"
26 _ "net/http"
27 _ "net/http/pprof"
Lorenz Brun3ff5af32020-06-24 16:34:11 +020028 "os"
Lorenz Brun5e4fc2d2020-09-22 18:35:15 +020029 "strings"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020030 "testing"
31 "time"
32
33 "google.golang.org/grpc"
34 corev1 "k8s.io/api/core/v1"
35 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
36 podv1 "k8s.io/kubernetes/pkg/api/v1/pod"
37
Serge Bazanski31370b02021-01-07 16:31:14 +010038 common "source.monogon.dev/metropolis/node"
39 apb "source.monogon.dev/metropolis/proto/api"
40 "source.monogon.dev/metropolis/test/launch"
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020041)
42
Leopold Schabeld603f842020-06-09 17:48:09 +020043const (
44 // Timeout for the global test context.
45 //
46 // Bazel would eventually time out the test after 900s ("large") if, for some reason,
47 // the context cancellation fails to abort it.
48 globalTestTimeout = 600 * time.Second
49
50 // Timeouts for individual end-to-end tests of different sizes.
Serge Bazanski1ebd1e12020-07-13 19:17:16 +020051 smallTestTimeout = 60 * time.Second
Leopold Schabeld603f842020-06-09 17:48:09 +020052 largeTestTimeout = 120 * time.Second
53)
54
Serge Bazanski662b5b32020-12-21 13:49:00 +010055// TestE2E is the main E2E test entrypoint for single-node freshly-bootstrapped E2E tests. It starts a full Metropolis node
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020056// in bootstrap mode and then runs tests against it. The actual tests it performs are located in the RunGroup subtest.
57func TestE2E(t *testing.T) {
Leopold Schabele28e6d72020-06-03 11:39:25 +020058 // Run pprof server for debugging
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020059 go func() {
Leopold Schabele28e6d72020-06-03 11:39:25 +020060 addr, err := net.ResolveTCPAddr("tcp", "localhost:0")
61 if err != nil {
62 panic(err)
63 }
64
65 l, err := net.ListenTCP("tcp", addr)
66 if err != nil {
67 log.Fatalf("Failed to listen on pprof port: %s", l.Addr())
68 }
69 defer l.Close()
70
71 log.Printf("pprof server listening on %s", l.Addr())
72 log.Printf("pprof server returned an error: %v", http.Serve(l, nil))
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020073 }()
Leopold Schabele28e6d72020-06-03 11:39:25 +020074
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020075 // Set a global timeout to make sure this terminates
Leopold Schabeld603f842020-06-09 17:48:09 +020076 ctx, cancel := context.WithTimeout(context.Background(), globalTestTimeout)
Lorenz Bruned0503c2020-07-28 17:21:25 +020077 portMap, err := launch.ConflictFreePortMap(launch.NodePorts)
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020078 if err != nil {
79 t.Fatalf("Failed to acquire ports for e2e test: %v", err)
80 }
Leopold Schabela013ffa2020-06-03 15:09:32 +020081
82 procExit := make(chan struct{})
83
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020084 go func() {
Serge Bazanski0ed2f962021-03-15 16:39:30 +010085 if err := launch.Launch(ctx, launch.Options{
86 Ports: portMap,
87 SerialPort: os.Stdout,
88 NodeParameters: &apb.NodeParameters{
89 Cluster: &apb.NodeParameters_ClusterBootstrap_{
90 ClusterBootstrap: &apb.NodeParameters_ClusterBootstrap{},
91 },
92 },
93 }); err != nil {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020094 panic(err)
95 }
Leopold Schabela013ffa2020-06-03 15:09:32 +020096 close(procExit)
Lorenz Brunfc5dbc62020-05-28 12:18:07 +020097 }()
98 grpcClient, err := portMap.DialGRPC(common.DebugServicePort, grpc.WithInsecure())
99 if err != nil {
100 fmt.Printf("Failed to dial debug service (is it running): %v\n", err)
101 }
Serge Bazanskiefdb6e92020-07-13 17:19:27 +0200102 debugClient := apb.NewNodeDebugServiceClient(grpcClient)
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200103
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200104 // This exists to keep the parent around while all the children race
105 // It currently tests both a set of OS-level conditions and Kubernetes Deployments and StatefulSets
106 t.Run("RunGroup", func(t *testing.T) {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200107 t.Run("Get Kubernetes Debug Kubeconfig", func(t *testing.T) {
108 t.Parallel()
Leopold Schabeld603f842020-06-09 17:48:09 +0200109 selfCtx, cancel := context.WithTimeout(ctx, largeTestTimeout)
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200110 defer cancel()
Lorenz Bruned0503c2020-07-28 17:21:25 +0200111 clientSet, err := GetKubeClientSet(selfCtx, debugClient, portMap[common.KubernetesAPIPort])
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200112 if err != nil {
113 t.Fatal(err)
114 }
Leopold Schabeld603f842020-06-09 17:48:09 +0200115 testEventual(t, "Node is registered and ready", ctx, largeTestTimeout, func(ctx context.Context) error {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200116 nodes, err := clientSet.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
117 if err != nil {
118 return err
119 }
120 if len(nodes.Items) < 1 {
121 return errors.New("node not registered")
122 }
123 if len(nodes.Items) > 1 {
124 return errors.New("more than one node registered (but there is only one)")
125 }
126 node := nodes.Items[0]
127 for _, cond := range node.Status.Conditions {
128 if cond.Type != corev1.NodeReady {
129 continue
130 }
131 if cond.Status != corev1.ConditionTrue {
132 return fmt.Errorf("node not ready: %v", cond.Message)
133 }
134 }
135 return nil
136 })
Leopold Schabeld603f842020-06-09 17:48:09 +0200137 testEventual(t, "Simple deployment", ctx, largeTestTimeout, func(ctx context.Context) error {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200138 _, err := clientSet.AppsV1().Deployments("default").Create(ctx, makeTestDeploymentSpec("test-deploy-1"), metav1.CreateOptions{})
139 return err
140 })
Leopold Schabeld603f842020-06-09 17:48:09 +0200141 testEventual(t, "Simple deployment is running", ctx, largeTestTimeout, func(ctx context.Context) error {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200142 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-deploy-1"})
143 if err != nil {
144 return err
145 }
146 if len(res.Items) == 0 {
147 return errors.New("pod didn't get created")
148 }
149 pod := res.Items[0]
150 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
151 return nil
152 }
153 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
154 if err != nil || len(events.Items) == 0 {
155 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
156 } else {
157 return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
158 }
159 })
Lorenz Brun5e4fc2d2020-09-22 18:35:15 +0200160 testEventual(t, "Simple deployment with runc", ctx, largeTestTimeout, func(ctx context.Context) error {
161 deployment := makeTestDeploymentSpec("test-deploy-2")
162 var runcStr = "runc"
163 deployment.Spec.Template.Spec.RuntimeClassName = &runcStr
164 _, err := clientSet.AppsV1().Deployments("default").Create(ctx, deployment, metav1.CreateOptions{})
165 return err
166 })
167 testEventual(t, "Simple deployment is running on runc", ctx, largeTestTimeout, func(ctx context.Context) error {
168 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-deploy-2"})
169 if err != nil {
170 return err
171 }
172 if len(res.Items) == 0 {
173 return errors.New("pod didn't get created")
174 }
175 pod := res.Items[0]
176 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
177 return nil
178 }
179 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
180 if err != nil || len(events.Items) == 0 {
181 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
182 } else {
183 var errorMsg strings.Builder
184 for _, msg := range events.Items {
185 errorMsg.WriteString(" | ")
186 errorMsg.WriteString(msg.Message)
187 }
188 return fmt.Errorf("pod is not ready: %v", errorMsg.String())
189 }
190 })
Leopold Schabeld603f842020-06-09 17:48:09 +0200191 testEventual(t, "Simple StatefulSet with PVC", ctx, largeTestTimeout, func(ctx context.Context) error {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200192 _, err := clientSet.AppsV1().StatefulSets("default").Create(ctx, makeTestStatefulSet("test-statefulset-1"), metav1.CreateOptions{})
193 return err
194 })
Leopold Schabeld603f842020-06-09 17:48:09 +0200195 testEventual(t, "Simple StatefulSet with PVC is running", ctx, largeTestTimeout, func(ctx context.Context) error {
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200196 res, err := clientSet.CoreV1().Pods("default").List(ctx, metav1.ListOptions{LabelSelector: "name=test-statefulset-1"})
197 if err != nil {
198 return err
199 }
200 if len(res.Items) == 0 {
201 return errors.New("pod didn't get created")
202 }
203 pod := res.Items[0]
204 if podv1.IsPodAvailable(&pod, 1, metav1.NewTime(time.Now())) {
205 return nil
206 }
207 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
208 if err != nil || len(events.Items) == 0 {
209 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
210 } else {
211 return fmt.Errorf("pod is not ready: %v", events.Items[0].Message)
212 }
213 })
Lorenz Brun8b0431a2020-07-13 16:56:36 +0200214 testEventual(t, "Pod with preseeded image", ctx, smallTestTimeout, func(ctx context.Context) error {
215 _, err := clientSet.CoreV1().Pods("default").Create(ctx, &corev1.Pod{
216 ObjectMeta: metav1.ObjectMeta{
217 Name: "preseed-test-1",
218 },
219 Spec: corev1.PodSpec{
220 Containers: []corev1.Container{{
221 Name: "preseed-test-1",
222 ImagePullPolicy: corev1.PullNever,
Serge Bazanski77cb6c52020-12-19 00:09:22 +0100223 Image: "bazel/metropolis/test/e2e/preseedtest:preseedtest",
Lorenz Brun8b0431a2020-07-13 16:56:36 +0200224 }},
225 RestartPolicy: corev1.RestartPolicyNever,
226 },
227 }, metav1.CreateOptions{})
228 return err
229 })
230 testEventual(t, "Pod with preseeded image is completed", ctx, largeTestTimeout, func(ctx context.Context) error {
231 pod, err := clientSet.CoreV1().Pods("default").Get(ctx, "preseed-test-1", metav1.GetOptions{})
232 if err != nil {
233 return fmt.Errorf("failed to get pod: %w", err)
234 }
235 if pod.Status.Phase == corev1.PodSucceeded {
236 return nil
237 }
238 events, err := clientSet.CoreV1().Events("default").List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.namespace=default", pod.Name)})
239 if err != nil || len(events.Items) == 0 {
240 return fmt.Errorf("pod is not ready: %v", pod.Status.Phase)
241 } else {
242 return fmt.Errorf("pod is not ready: %v", events.Items[len(events.Items)-1].Message)
243 }
244 })
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200245 })
246 })
Leopold Schabela013ffa2020-06-03 15:09:32 +0200247
248 // Cancel the main context and wait for our subprocesses to exit
249 // to avoid leaking them and blocking the parent.
250 cancel()
251 <-procExit
Lorenz Brunfc5dbc62020-05-28 12:18:07 +0200252}