blob: 4f4d1807b5c2c9e7812495913d22f626bbce224b [file] [log] [blame]
Serge Bazanski9c09c4e2020-03-24 13:58:01 +01001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package supervisor
18
19import (
20 "context"
21 "fmt"
22 "regexp"
23 "strings"
24
25 "github.com/cenkalti/backoff/v4"
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010026)
27
Serge Bazanski216fe7b2021-05-21 18:36:16 +020028// node is a supervision tree node. It represents the state of a Runnable
29// within this tree, its relation to other tree elements, and contains
30// supporting data needed to actually supervise it.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010031type node struct {
Serge Bazanski216fe7b2021-05-21 18:36:16 +020032 // The name of this node. Opaque string. It's used to make up the 'dn'
33 // (distinguished name) of a node within the tree. When starting a runnable
34 // inside a tree, this is where that name gets used.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010035 name string
36 runnable Runnable
37
38 // The supervisor managing this tree.
39 sup *supervisor
Serge Bazanski216fe7b2021-05-21 18:36:16 +020040 // The parent, within the tree, of this node. If this is the root node of
41 // the tree, this is nil.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010042 parent *node
Serge Bazanski216fe7b2021-05-21 18:36:16 +020043 // Children of this tree. This is represented by a map keyed from child
44 // node names, for easy access.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010045 children map[string]*node
Serge Bazanski26d52252022-02-07 15:57:54 +010046 // Reserved nodes that may not be used as child names. This is currently
47 // used by sub-loggers (see SubLogger function), preventing a sub-logger
48 // name from colliding with a node name.
49 reserved map[string]bool
Serge Bazanski216fe7b2021-05-21 18:36:16 +020050 // Supervision groups. Each group is a set of names of children. Sets, and
51 // as such groups, don't overlap between each other. A supervision group
52 // indicates that if any child within that group fails, all others should
53 // be canceled and restarted together.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010054 groups []map[string]bool
55
56 // The current state of the runnable in this node.
Serge Bazanskieca8ee32024-07-30 14:32:19 +000057 state NodeState
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010058
Jan Schär08c1c722024-12-19 12:03:17 +010059 // signaledDone is set when the runnable has signaled Done. The transition to
60 // DONE state only happens after the runnable returns.
61 signaledDone bool
62
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010063 // Backoff used to keep runnables from being restarted too fast.
64 bo *backoff.ExponentialBackOff
65
66 // Context passed to the runnable, and its cancel function.
67 ctx context.Context
68 ctxC context.CancelFunc
69}
70
Serge Bazanskieca8ee32024-07-30 14:32:19 +000071// NodeState is the state of a runnable within a node, and in a way the node
Serge Bazanski216fe7b2021-05-21 18:36:16 +020072// itself. This follows the state diagram from go/supervision.
Serge Bazanskieca8ee32024-07-30 14:32:19 +000073type NodeState int
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010074
75const (
Serge Bazanski216fe7b2021-05-21 18:36:16 +020076 // A node that has just been created, and whose runnable has been started
77 // already but hasn't signaled anything yet.
Serge Bazanskieca8ee32024-07-30 14:32:19 +000078 NodeStateNew NodeState = iota
Serge Bazanski216fe7b2021-05-21 18:36:16 +020079 // A node whose runnable has signaled being healthy - this means it's ready
80 // to serve/act.
Serge Bazanskieca8ee32024-07-30 14:32:19 +000081 NodeStateHealthy
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010082 // A node that has unexpectedly returned or panicked.
Serge Bazanskieca8ee32024-07-30 14:32:19 +000083 NodeStateDead
Serge Bazanski216fe7b2021-05-21 18:36:16 +020084 // A node that has declared that its done with its work and should not be
85 // restarted, unless a supervision tree failure requires that.
Serge Bazanskieca8ee32024-07-30 14:32:19 +000086 NodeStateDone
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010087 // A node that has returned after being requested to cancel.
Serge Bazanskieca8ee32024-07-30 14:32:19 +000088 NodeStateCanceled
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010089)
90
Serge Bazanskif9a8dcd2024-07-31 14:46:06 +000091// NodeStates is a list of all possible values of a NodeState.
92var NodeStates = []NodeState{
93 NodeStateNew,
94 NodeStateHealthy,
95 NodeStateDead,
96 NodeStateDone,
97 NodeStateCanceled,
98}
99
Serge Bazanskieca8ee32024-07-30 14:32:19 +0000100func (s NodeState) String() string {
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100101 switch s {
Serge Bazanskieca8ee32024-07-30 14:32:19 +0000102 case NodeStateNew:
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100103 return "NODE_STATE_NEW"
Serge Bazanskieca8ee32024-07-30 14:32:19 +0000104 case NodeStateHealthy:
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100105 return "NODE_STATE_HEALTHY"
Serge Bazanskieca8ee32024-07-30 14:32:19 +0000106 case NodeStateDead:
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100107 return "NODE_STATE_DEAD"
Serge Bazanskieca8ee32024-07-30 14:32:19 +0000108 case NodeStateDone:
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100109 return "NODE_STATE_DONE"
Serge Bazanskieca8ee32024-07-30 14:32:19 +0000110 case NodeStateCanceled:
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100111 return "NODE_STATE_CANCELED"
112 }
113 return "UNKNOWN"
114}
115
116func (n *node) String() string {
117 return fmt.Sprintf("%s (%s)", n.dn(), n.state.String())
118}
119
120// contextKey is a type used to keep data within context values.
121type contextKey string
122
123var (
124 supervisorKey = contextKey("supervisor")
125 dnKey = contextKey("dn")
126)
127
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200128// fromContext retrieves a tree node from a runnable context. It takes a lock
129// on the tree and returns an unlock function. This unlock function needs to be
130// called once mutations on the tree/supervisor/node are done.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100131func fromContext(ctx context.Context) (*node, func()) {
132 sup, ok := ctx.Value(supervisorKey).(*supervisor)
133 if !ok {
134 panic("supervisor function called from non-runnable context")
135 }
136
137 sup.mu.Lock()
138
139 dnParent, ok := ctx.Value(dnKey).(string)
140 if !ok {
141 sup.mu.Unlock()
142 panic("supervisor function called from non-runnable context")
143 }
144
145 return sup.nodeByDN(dnParent), sup.mu.Unlock
146}
147
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200148// All the following 'internal' supervisor functions must only be called with
149// the supervisor lock taken. Getting a lock via fromContext is enough.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100150
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200151// dn returns the distinguished name of a node. This distinguished name is a
152// period-separated, inverse-DNS-like name. For instance, the runnable 'foo'
153// within the runnable 'bar' will be called 'root.bar.foo'. The root of the
154// tree is always named, and has the dn, 'root'.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100155func (n *node) dn() string {
156 if n.parent != nil {
157 return fmt.Sprintf("%s.%s", n.parent.dn(), n.name)
158 }
159 return n.name
160}
161
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200162// groupSiblings is a helper function to get all runnable group siblings of a
163// given runnable name within this node. All children are always in a group,
164// even if that group is unary.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100165func (n *node) groupSiblings(name string) map[string]bool {
166 for _, m := range n.groups {
167 if _, ok := m[name]; ok {
168 return m
169 }
170 }
171 return nil
172}
173
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200174// newNode creates a new node with a given parent. It does not register it with
175// the parent (as that depends on group placement).
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100176func newNode(name string, runnable Runnable, sup *supervisor, parent *node) *node {
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200177 // We use exponential backoff for failed runnables, but at some point we
178 // cap at a given backoff time. To achieve this, we set MaxElapsedTime to
179 // 0, which will cap the backoff at MaxInterval.
Serge Bazanskib9431c92020-08-24 18:16:51 +0200180 bo := backoff.NewExponentialBackOff()
181 bo.MaxElapsedTime = 0
182
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100183 n := &node{
184 name: name,
185 runnable: runnable,
186
Serge Bazanskib9431c92020-08-24 18:16:51 +0200187 bo: bo,
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100188
189 sup: sup,
190 parent: parent,
191 }
192 n.reset()
193 return n
194}
195
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200196// resetNode sets up all the dynamic fields of the node, in preparation of
197// starting a runnable. It clears the node's children, groups and resets its
198// context.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100199func (n *node) reset() {
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200200 // Make new context. First, acquire parent context. For the root node
201 // that's Background, otherwise it's the parent's context.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100202 var pCtx context.Context
203 if n.parent == nil {
204 pCtx = context.Background()
205 } else {
206 pCtx = n.parent.ctx
207 }
208 // Mark DN and supervisor in context.
209 ctx := context.WithValue(pCtx, dnKey, n.dn())
210 ctx = context.WithValue(ctx, supervisorKey, n.sup)
211 ctx, ctxC := context.WithCancel(ctx)
212 // Set context
213 n.ctx = ctx
214 n.ctxC = ctxC
215
216 // Clear children and state
Serge Bazanskieca8ee32024-07-30 14:32:19 +0000217 n.state = NodeStateNew
Jan Schär08c1c722024-12-19 12:03:17 +0100218 n.signaledDone = false
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100219 n.children = make(map[string]*node)
Serge Bazanski26d52252022-02-07 15:57:54 +0100220 n.reserved = make(map[string]bool)
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100221 n.groups = nil
222
223 // The node is now ready to be scheduled.
224}
225
226// nodeByDN returns a node by given DN from the supervisor.
227func (s *supervisor) nodeByDN(dn string) *node {
228 parts := strings.Split(dn, ".")
229 if parts[0] != "root" {
230 panic("DN does not start with root.")
231 }
232 parts = parts[1:]
233 cur := s.root
234 for {
235 if len(parts) == 0 {
236 return cur
237 }
238
239 next, ok := cur.children[parts[0]]
240 if !ok {
241 panic(fmt.Errorf("could not find %v (%s) in %s", parts, dn, cur))
242 }
243 cur = next
244 parts = parts[1:]
245 }
246}
247
248// reNodeName validates a node name against constraints.
249var reNodeName = regexp.MustCompile(`[a-z90-9_]{1,64}`)
250
251// runGroup schedules a new group of runnables to run on a node.
252func (n *node) runGroup(runnables map[string]Runnable) error {
253 // Check that the parent node is in the right state.
Serge Bazanskieca8ee32024-07-30 14:32:19 +0000254 if n.state != NodeStateNew {
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100255 return fmt.Errorf("cannot run new runnable on non-NEW node")
256 }
257
258 // Check the requested runnable names.
Tim Windelschmidt6b6428d2024-04-11 01:35:41 +0200259 for name := range runnables {
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100260 if !reNodeName.MatchString(name) {
261 return fmt.Errorf("runnable name %q is invalid", name)
262 }
263 if _, ok := n.children[name]; ok {
264 return fmt.Errorf("runnable %q already exists", name)
265 }
Serge Bazanski26d52252022-02-07 15:57:54 +0100266 if _, ok := n.reserved[name]; ok {
267 return fmt.Errorf("runnable %q would shadow reserved name (eg. sub-logger)", name)
268 }
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100269 }
270
271 // Create child nodes.
272 dns := make(map[string]string)
273 group := make(map[string]bool)
274 for name, runnable := range runnables {
275 if g := n.groupSiblings(name); g != nil {
276 return fmt.Errorf("duplicate child name %q", name)
277 }
278 node := newNode(name, runnable, n.sup, n)
279 n.children[name] = node
280
281 dns[name] = node.dn()
282 group[name] = true
283 }
284 // Add group.
285 n.groups = append(n.groups, group)
286
287 // Schedule execution of group members.
288 go func() {
Tim Windelschmidt6b6428d2024-04-11 01:35:41 +0200289 for name := range runnables {
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100290 n.sup.pReq <- &processorRequest{
291 schedule: &processorRequestSchedule{
292 dn: dns[name],
293 },
294 }
295 }
296 }()
297 return nil
298}
299
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200300// signal sequences state changes by signals received from runnables and
301// updates a node's status accordingly.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100302func (n *node) signal(signal SignalType) {
303 switch signal {
304 case SignalHealthy:
Serge Bazanskieca8ee32024-07-30 14:32:19 +0000305 if n.state != NodeStateNew {
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100306 panic(fmt.Errorf("node %s signaled healthy", n))
307 }
Serge Bazanskieca8ee32024-07-30 14:32:19 +0000308 n.state = NodeStateHealthy
Serge Bazanskicf864da2024-07-31 11:23:34 +0000309 n.sup.metrics.NotifyNodeState(n.dn(), n.state)
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100310 n.bo.Reset()
311 case SignalDone:
Serge Bazanskieca8ee32024-07-30 14:32:19 +0000312 if n.state != NodeStateHealthy {
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100313 panic(fmt.Errorf("node %s signaled done", n))
314 }
Jan Schär08c1c722024-12-19 12:03:17 +0100315 if n.signaledDone {
316 panic(fmt.Errorf("node %s signaled done twice", n))
317 }
318 n.signaledDone = true
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100319 n.bo.Reset()
320 }
321}