blob: a7caf826438c5ea2ffd0667e0f5fe267d3df2a4f [file] [log] [blame]
Serge Bazanski9c09c4e2020-03-24 13:58:01 +01001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package supervisor
18
19import (
20 "context"
21 "fmt"
22 "regexp"
23 "strings"
24
25 "github.com/cenkalti/backoff/v4"
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010026)
27
28// node is a supervision tree node. It represents the state of a Runnable within this tree, its relation to other tree
29// elements, and contains supporting data needed to actually supervise it.
30type node struct {
31 // The name of this node. Opaque string. It's used to make up the 'dn' (distinguished name) of a node within
32 // the tree. When starting a runnable inside a tree, this is where that name gets used.
33 name string
34 runnable Runnable
35
36 // The supervisor managing this tree.
37 sup *supervisor
38 // The parent, within the tree, of this node. If this is the root node of the tree, this is nil.
39 parent *node
40 // Children of this tree. This is represented by a map keyed from child node names, for easy access.
41 children map[string]*node
42 // Supervision groups. Each group is a set of names of children. Sets, and as such groups, don't overlap between
43 // each other. A supervision group indicates that if any child within that group fails, all others should be
44 // canceled and restarted together.
45 groups []map[string]bool
46
47 // The current state of the runnable in this node.
48 state nodeState
49
50 // Backoff used to keep runnables from being restarted too fast.
51 bo *backoff.ExponentialBackOff
52
53 // Context passed to the runnable, and its cancel function.
54 ctx context.Context
55 ctxC context.CancelFunc
56}
57
58// nodeState is the state of a runnable within a node, and in a way the node itself.
59// This follows the state diagram from go/supervision.
60type nodeState int
61
62const (
63 // A node that has just been created, and whose runnable has been started already but hasn't signaled anything yet.
64 nodeStateNew nodeState = iota
65 // A node whose runnable has signaled being healthy - this means it's ready to serve/act.
66 nodeStateHealthy
67 // A node that has unexpectedly returned or panicked.
68 nodeStateDead
69 // A node that has declared that its done with its work and should not be restarted, unless a supervision tree
70 // failure requires that.
71 nodeStateDone
72 // A node that has returned after being requested to cancel.
73 nodeStateCanceled
74)
75
76func (s nodeState) String() string {
77 switch s {
78 case nodeStateNew:
79 return "NODE_STATE_NEW"
80 case nodeStateHealthy:
81 return "NODE_STATE_HEALTHY"
82 case nodeStateDead:
83 return "NODE_STATE_DEAD"
84 case nodeStateDone:
85 return "NODE_STATE_DONE"
86 case nodeStateCanceled:
87 return "NODE_STATE_CANCELED"
88 }
89 return "UNKNOWN"
90}
91
92func (n *node) String() string {
93 return fmt.Sprintf("%s (%s)", n.dn(), n.state.String())
94}
95
96// contextKey is a type used to keep data within context values.
97type contextKey string
98
99var (
100 supervisorKey = contextKey("supervisor")
101 dnKey = contextKey("dn")
102)
103
104// fromContext retrieves a tree node from a runnable context. It takes a lock on the tree and returns an unlock
105// function. This unlock function needs to be called once mutations on the tree/supervisor/node are done.
106func fromContext(ctx context.Context) (*node, func()) {
107 sup, ok := ctx.Value(supervisorKey).(*supervisor)
108 if !ok {
109 panic("supervisor function called from non-runnable context")
110 }
111
112 sup.mu.Lock()
113
114 dnParent, ok := ctx.Value(dnKey).(string)
115 if !ok {
116 sup.mu.Unlock()
117 panic("supervisor function called from non-runnable context")
118 }
119
120 return sup.nodeByDN(dnParent), sup.mu.Unlock
121}
122
123// All the following 'internal' supervisor functions must only be called with the supervisor lock taken. Getting a lock
124// via fromContext is enough.
125
126// dn returns the distinguished name of a node. This distinguished name is a period-separated, inverse-DNS-like name.
127// For instance, the runnable 'foo' within the runnable 'bar' will be called 'root.bar.foo'. The root of the tree is
128// always named, and has the dn, 'root'.
129func (n *node) dn() string {
130 if n.parent != nil {
131 return fmt.Sprintf("%s.%s", n.parent.dn(), n.name)
132 }
133 return n.name
134}
135
136// groupSiblings is a helper function to get all runnable group siblings of a given runnable name within this node.
137// All children are always in a group, even if that group is unary.
138func (n *node) groupSiblings(name string) map[string]bool {
139 for _, m := range n.groups {
140 if _, ok := m[name]; ok {
141 return m
142 }
143 }
144 return nil
145}
146
147// newNode creates a new node with a given parent. It does not register it with the parent (as that depends on group
148// placement).
149func newNode(name string, runnable Runnable, sup *supervisor, parent *node) *node {
Serge Bazanskib9431c92020-08-24 18:16:51 +0200150 // We use exponential backoff for failed runnables, but at some point we cap at a given backoff time.
151 // To achieve this, we set MaxElapsedTime to 0, which will cap the backoff at MaxInterval.
152 bo := backoff.NewExponentialBackOff()
153 bo.MaxElapsedTime = 0
154
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100155 n := &node{
156 name: name,
157 runnable: runnable,
158
Serge Bazanskib9431c92020-08-24 18:16:51 +0200159 bo: bo,
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100160
161 sup: sup,
162 parent: parent,
163 }
164 n.reset()
165 return n
166}
167
168// resetNode sets up all the dynamic fields of the node, in preparation of starting a runnable. It clears the node's
169// children, groups and resets its context.
170func (n *node) reset() {
171 // Make new context. First, acquire parent context. For the root node that's Background, otherwise it's the
172 // parent's context.
173 var pCtx context.Context
174 if n.parent == nil {
175 pCtx = context.Background()
176 } else {
177 pCtx = n.parent.ctx
178 }
179 // Mark DN and supervisor in context.
180 ctx := context.WithValue(pCtx, dnKey, n.dn())
181 ctx = context.WithValue(ctx, supervisorKey, n.sup)
182 ctx, ctxC := context.WithCancel(ctx)
183 // Set context
184 n.ctx = ctx
185 n.ctxC = ctxC
186
187 // Clear children and state
188 n.state = nodeStateNew
189 n.children = make(map[string]*node)
190 n.groups = nil
191
192 // The node is now ready to be scheduled.
193}
194
195// nodeByDN returns a node by given DN from the supervisor.
196func (s *supervisor) nodeByDN(dn string) *node {
197 parts := strings.Split(dn, ".")
198 if parts[0] != "root" {
199 panic("DN does not start with root.")
200 }
201 parts = parts[1:]
202 cur := s.root
203 for {
204 if len(parts) == 0 {
205 return cur
206 }
207
208 next, ok := cur.children[parts[0]]
209 if !ok {
210 panic(fmt.Errorf("could not find %v (%s) in %s", parts, dn, cur))
211 }
212 cur = next
213 parts = parts[1:]
214 }
215}
216
217// reNodeName validates a node name against constraints.
218var reNodeName = regexp.MustCompile(`[a-z90-9_]{1,64}`)
219
220// runGroup schedules a new group of runnables to run on a node.
221func (n *node) runGroup(runnables map[string]Runnable) error {
222 // Check that the parent node is in the right state.
223 if n.state != nodeStateNew {
224 return fmt.Errorf("cannot run new runnable on non-NEW node")
225 }
226
227 // Check the requested runnable names.
228 for name, _ := range runnables {
229 if !reNodeName.MatchString(name) {
230 return fmt.Errorf("runnable name %q is invalid", name)
231 }
232 if _, ok := n.children[name]; ok {
233 return fmt.Errorf("runnable %q already exists", name)
234 }
235 }
236
237 // Create child nodes.
238 dns := make(map[string]string)
239 group := make(map[string]bool)
240 for name, runnable := range runnables {
241 if g := n.groupSiblings(name); g != nil {
242 return fmt.Errorf("duplicate child name %q", name)
243 }
244 node := newNode(name, runnable, n.sup, n)
245 n.children[name] = node
246
247 dns[name] = node.dn()
248 group[name] = true
249 }
250 // Add group.
251 n.groups = append(n.groups, group)
252
253 // Schedule execution of group members.
254 go func() {
255 for name, _ := range runnables {
256 n.sup.pReq <- &processorRequest{
257 schedule: &processorRequestSchedule{
258 dn: dns[name],
259 },
260 }
261 }
262 }()
263 return nil
264}
265
266// signal sequences state changes by signals received from runnables and updates a node's status accordingly.
267func (n *node) signal(signal SignalType) {
268 switch signal {
269 case SignalHealthy:
270 if n.state != nodeStateNew {
271 panic(fmt.Errorf("node %s signaled healthy", n))
272 }
273 n.state = nodeStateHealthy
274 n.bo.Reset()
275 case SignalDone:
276 if n.state != nodeStateHealthy {
277 panic(fmt.Errorf("node %s signaled done", n))
278 }
279 n.state = nodeStateDone
280 n.bo.Reset()
281 }
282}