blob: a3bf5e43fc2f51373bb2791d18a953dbe739383f [file] [log] [blame]
Serge Bazanski9c09c4e2020-03-24 13:58:01 +01001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package supervisor
18
19import (
20 "context"
21 "fmt"
22 "regexp"
23 "strings"
24
25 "github.com/cenkalti/backoff/v4"
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010026)
27
Serge Bazanski216fe7b2021-05-21 18:36:16 +020028// node is a supervision tree node. It represents the state of a Runnable
29// within this tree, its relation to other tree elements, and contains
30// supporting data needed to actually supervise it.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010031type node struct {
Serge Bazanski216fe7b2021-05-21 18:36:16 +020032 // The name of this node. Opaque string. It's used to make up the 'dn'
33 // (distinguished name) of a node within the tree. When starting a runnable
34 // inside a tree, this is where that name gets used.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010035 name string
36 runnable Runnable
37
38 // The supervisor managing this tree.
39 sup *supervisor
Serge Bazanski216fe7b2021-05-21 18:36:16 +020040 // The parent, within the tree, of this node. If this is the root node of
41 // the tree, this is nil.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010042 parent *node
Serge Bazanski216fe7b2021-05-21 18:36:16 +020043 // Children of this tree. This is represented by a map keyed from child
44 // node names, for easy access.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010045 children map[string]*node
Serge Bazanski216fe7b2021-05-21 18:36:16 +020046 // Supervision groups. Each group is a set of names of children. Sets, and
47 // as such groups, don't overlap between each other. A supervision group
48 // indicates that if any child within that group fails, all others should
49 // be canceled and restarted together.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010050 groups []map[string]bool
51
52 // The current state of the runnable in this node.
53 state nodeState
54
55 // Backoff used to keep runnables from being restarted too fast.
56 bo *backoff.ExponentialBackOff
57
58 // Context passed to the runnable, and its cancel function.
59 ctx context.Context
60 ctxC context.CancelFunc
61}
62
Serge Bazanski216fe7b2021-05-21 18:36:16 +020063// nodeState is the state of a runnable within a node, and in a way the node
64// itself. This follows the state diagram from go/supervision.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010065type nodeState int
66
67const (
Serge Bazanski216fe7b2021-05-21 18:36:16 +020068 // A node that has just been created, and whose runnable has been started
69 // already but hasn't signaled anything yet.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010070 nodeStateNew nodeState = iota
Serge Bazanski216fe7b2021-05-21 18:36:16 +020071 // A node whose runnable has signaled being healthy - this means it's ready
72 // to serve/act.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010073 nodeStateHealthy
74 // A node that has unexpectedly returned or panicked.
75 nodeStateDead
Serge Bazanski216fe7b2021-05-21 18:36:16 +020076 // A node that has declared that its done with its work and should not be
77 // restarted, unless a supervision tree failure requires that.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010078 nodeStateDone
79 // A node that has returned after being requested to cancel.
80 nodeStateCanceled
81)
82
83func (s nodeState) String() string {
84 switch s {
85 case nodeStateNew:
86 return "NODE_STATE_NEW"
87 case nodeStateHealthy:
88 return "NODE_STATE_HEALTHY"
89 case nodeStateDead:
90 return "NODE_STATE_DEAD"
91 case nodeStateDone:
92 return "NODE_STATE_DONE"
93 case nodeStateCanceled:
94 return "NODE_STATE_CANCELED"
95 }
96 return "UNKNOWN"
97}
98
99func (n *node) String() string {
100 return fmt.Sprintf("%s (%s)", n.dn(), n.state.String())
101}
102
103// contextKey is a type used to keep data within context values.
104type contextKey string
105
106var (
107 supervisorKey = contextKey("supervisor")
108 dnKey = contextKey("dn")
109)
110
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200111// fromContext retrieves a tree node from a runnable context. It takes a lock
112// on the tree and returns an unlock function. This unlock function needs to be
113// called once mutations on the tree/supervisor/node are done.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100114func fromContext(ctx context.Context) (*node, func()) {
115 sup, ok := ctx.Value(supervisorKey).(*supervisor)
116 if !ok {
117 panic("supervisor function called from non-runnable context")
118 }
119
120 sup.mu.Lock()
121
122 dnParent, ok := ctx.Value(dnKey).(string)
123 if !ok {
124 sup.mu.Unlock()
125 panic("supervisor function called from non-runnable context")
126 }
127
128 return sup.nodeByDN(dnParent), sup.mu.Unlock
129}
130
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200131// All the following 'internal' supervisor functions must only be called with
132// the supervisor lock taken. Getting a lock via fromContext is enough.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100133
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200134// dn returns the distinguished name of a node. This distinguished name is a
135// period-separated, inverse-DNS-like name. For instance, the runnable 'foo'
136// within the runnable 'bar' will be called 'root.bar.foo'. The root of the
137// tree is always named, and has the dn, 'root'.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100138func (n *node) dn() string {
139 if n.parent != nil {
140 return fmt.Sprintf("%s.%s", n.parent.dn(), n.name)
141 }
142 return n.name
143}
144
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200145// groupSiblings is a helper function to get all runnable group siblings of a
146// given runnable name within this node. All children are always in a group,
147// even if that group is unary.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100148func (n *node) groupSiblings(name string) map[string]bool {
149 for _, m := range n.groups {
150 if _, ok := m[name]; ok {
151 return m
152 }
153 }
154 return nil
155}
156
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200157// newNode creates a new node with a given parent. It does not register it with
158// the parent (as that depends on group placement).
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100159func newNode(name string, runnable Runnable, sup *supervisor, parent *node) *node {
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200160 // We use exponential backoff for failed runnables, but at some point we
161 // cap at a given backoff time. To achieve this, we set MaxElapsedTime to
162 // 0, which will cap the backoff at MaxInterval.
Serge Bazanskib9431c92020-08-24 18:16:51 +0200163 bo := backoff.NewExponentialBackOff()
164 bo.MaxElapsedTime = 0
165
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100166 n := &node{
167 name: name,
168 runnable: runnable,
169
Serge Bazanskib9431c92020-08-24 18:16:51 +0200170 bo: bo,
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100171
172 sup: sup,
173 parent: parent,
174 }
175 n.reset()
176 return n
177}
178
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200179// resetNode sets up all the dynamic fields of the node, in preparation of
180// starting a runnable. It clears the node's children, groups and resets its
181// context.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100182func (n *node) reset() {
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200183 // Make new context. First, acquire parent context. For the root node
184 // that's Background, otherwise it's the parent's context.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100185 var pCtx context.Context
186 if n.parent == nil {
187 pCtx = context.Background()
188 } else {
189 pCtx = n.parent.ctx
190 }
191 // Mark DN and supervisor in context.
192 ctx := context.WithValue(pCtx, dnKey, n.dn())
193 ctx = context.WithValue(ctx, supervisorKey, n.sup)
194 ctx, ctxC := context.WithCancel(ctx)
195 // Set context
196 n.ctx = ctx
197 n.ctxC = ctxC
198
199 // Clear children and state
200 n.state = nodeStateNew
201 n.children = make(map[string]*node)
202 n.groups = nil
203
204 // The node is now ready to be scheduled.
205}
206
207// nodeByDN returns a node by given DN from the supervisor.
208func (s *supervisor) nodeByDN(dn string) *node {
209 parts := strings.Split(dn, ".")
210 if parts[0] != "root" {
211 panic("DN does not start with root.")
212 }
213 parts = parts[1:]
214 cur := s.root
215 for {
216 if len(parts) == 0 {
217 return cur
218 }
219
220 next, ok := cur.children[parts[0]]
221 if !ok {
222 panic(fmt.Errorf("could not find %v (%s) in %s", parts, dn, cur))
223 }
224 cur = next
225 parts = parts[1:]
226 }
227}
228
229// reNodeName validates a node name against constraints.
230var reNodeName = regexp.MustCompile(`[a-z90-9_]{1,64}`)
231
232// runGroup schedules a new group of runnables to run on a node.
233func (n *node) runGroup(runnables map[string]Runnable) error {
234 // Check that the parent node is in the right state.
235 if n.state != nodeStateNew {
236 return fmt.Errorf("cannot run new runnable on non-NEW node")
237 }
238
239 // Check the requested runnable names.
240 for name, _ := range runnables {
241 if !reNodeName.MatchString(name) {
242 return fmt.Errorf("runnable name %q is invalid", name)
243 }
244 if _, ok := n.children[name]; ok {
245 return fmt.Errorf("runnable %q already exists", name)
246 }
247 }
248
249 // Create child nodes.
250 dns := make(map[string]string)
251 group := make(map[string]bool)
252 for name, runnable := range runnables {
253 if g := n.groupSiblings(name); g != nil {
254 return fmt.Errorf("duplicate child name %q", name)
255 }
256 node := newNode(name, runnable, n.sup, n)
257 n.children[name] = node
258
259 dns[name] = node.dn()
260 group[name] = true
261 }
262 // Add group.
263 n.groups = append(n.groups, group)
264
265 // Schedule execution of group members.
266 go func() {
267 for name, _ := range runnables {
268 n.sup.pReq <- &processorRequest{
269 schedule: &processorRequestSchedule{
270 dn: dns[name],
271 },
272 }
273 }
274 }()
275 return nil
276}
277
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200278// signal sequences state changes by signals received from runnables and
279// updates a node's status accordingly.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100280func (n *node) signal(signal SignalType) {
281 switch signal {
282 case SignalHealthy:
283 if n.state != nodeStateNew {
284 panic(fmt.Errorf("node %s signaled healthy", n))
285 }
286 n.state = nodeStateHealthy
287 n.bo.Reset()
288 case SignalDone:
289 if n.state != nodeStateHealthy {
290 panic(fmt.Errorf("node %s signaled done", n))
291 }
292 n.state = nodeStateDone
293 n.bo.Reset()
294 }
295}