blob: 272b6505293a3b9c2b6924bf17d57269852fe251 [file] [log] [blame]
Serge Bazanski9c09c4e2020-03-24 13:58:01 +01001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package supervisor
18
19import (
20 "context"
21 "fmt"
22 "regexp"
23 "strings"
24
25 "github.com/cenkalti/backoff/v4"
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010026)
27
Serge Bazanski216fe7b2021-05-21 18:36:16 +020028// node is a supervision tree node. It represents the state of a Runnable
29// within this tree, its relation to other tree elements, and contains
30// supporting data needed to actually supervise it.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010031type node struct {
Serge Bazanski216fe7b2021-05-21 18:36:16 +020032 // The name of this node. Opaque string. It's used to make up the 'dn'
33 // (distinguished name) of a node within the tree. When starting a runnable
34 // inside a tree, this is where that name gets used.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010035 name string
36 runnable Runnable
37
38 // The supervisor managing this tree.
39 sup *supervisor
Serge Bazanski216fe7b2021-05-21 18:36:16 +020040 // The parent, within the tree, of this node. If this is the root node of
41 // the tree, this is nil.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010042 parent *node
Serge Bazanski216fe7b2021-05-21 18:36:16 +020043 // Children of this tree. This is represented by a map keyed from child
44 // node names, for easy access.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010045 children map[string]*node
Serge Bazanski26d52252022-02-07 15:57:54 +010046 // Reserved nodes that may not be used as child names. This is currently
47 // used by sub-loggers (see SubLogger function), preventing a sub-logger
48 // name from colliding with a node name.
49 reserved map[string]bool
Serge Bazanski216fe7b2021-05-21 18:36:16 +020050 // Supervision groups. Each group is a set of names of children. Sets, and
51 // as such groups, don't overlap between each other. A supervision group
52 // indicates that if any child within that group fails, all others should
53 // be canceled and restarted together.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010054 groups []map[string]bool
55
56 // The current state of the runnable in this node.
Serge Bazanskieca8ee32024-07-30 14:32:19 +000057 state NodeState
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010058
59 // Backoff used to keep runnables from being restarted too fast.
60 bo *backoff.ExponentialBackOff
61
62 // Context passed to the runnable, and its cancel function.
63 ctx context.Context
64 ctxC context.CancelFunc
65}
66
Serge Bazanskieca8ee32024-07-30 14:32:19 +000067// NodeState is the state of a runnable within a node, and in a way the node
Serge Bazanski216fe7b2021-05-21 18:36:16 +020068// itself. This follows the state diagram from go/supervision.
Serge Bazanskieca8ee32024-07-30 14:32:19 +000069type NodeState int
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010070
71const (
Serge Bazanski216fe7b2021-05-21 18:36:16 +020072 // A node that has just been created, and whose runnable has been started
73 // already but hasn't signaled anything yet.
Serge Bazanskieca8ee32024-07-30 14:32:19 +000074 NodeStateNew NodeState = iota
Serge Bazanski216fe7b2021-05-21 18:36:16 +020075 // A node whose runnable has signaled being healthy - this means it's ready
76 // to serve/act.
Serge Bazanskieca8ee32024-07-30 14:32:19 +000077 NodeStateHealthy
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010078 // A node that has unexpectedly returned or panicked.
Serge Bazanskieca8ee32024-07-30 14:32:19 +000079 NodeStateDead
Serge Bazanski216fe7b2021-05-21 18:36:16 +020080 // A node that has declared that its done with its work and should not be
81 // restarted, unless a supervision tree failure requires that.
Serge Bazanskieca8ee32024-07-30 14:32:19 +000082 NodeStateDone
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010083 // A node that has returned after being requested to cancel.
Serge Bazanskieca8ee32024-07-30 14:32:19 +000084 NodeStateCanceled
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010085)
86
Serge Bazanskif9a8dcd2024-07-31 14:46:06 +000087// NodeStates is a list of all possible values of a NodeState.
88var NodeStates = []NodeState{
89 NodeStateNew,
90 NodeStateHealthy,
91 NodeStateDead,
92 NodeStateDone,
93 NodeStateCanceled,
94}
95
Serge Bazanskieca8ee32024-07-30 14:32:19 +000096func (s NodeState) String() string {
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010097 switch s {
Serge Bazanskieca8ee32024-07-30 14:32:19 +000098 case NodeStateNew:
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010099 return "NODE_STATE_NEW"
Serge Bazanskieca8ee32024-07-30 14:32:19 +0000100 case NodeStateHealthy:
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100101 return "NODE_STATE_HEALTHY"
Serge Bazanskieca8ee32024-07-30 14:32:19 +0000102 case NodeStateDead:
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100103 return "NODE_STATE_DEAD"
Serge Bazanskieca8ee32024-07-30 14:32:19 +0000104 case NodeStateDone:
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100105 return "NODE_STATE_DONE"
Serge Bazanskieca8ee32024-07-30 14:32:19 +0000106 case NodeStateCanceled:
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100107 return "NODE_STATE_CANCELED"
108 }
109 return "UNKNOWN"
110}
111
112func (n *node) String() string {
113 return fmt.Sprintf("%s (%s)", n.dn(), n.state.String())
114}
115
116// contextKey is a type used to keep data within context values.
117type contextKey string
118
119var (
120 supervisorKey = contextKey("supervisor")
121 dnKey = contextKey("dn")
122)
123
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200124// fromContext retrieves a tree node from a runnable context. It takes a lock
125// on the tree and returns an unlock function. This unlock function needs to be
126// called once mutations on the tree/supervisor/node are done.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100127func fromContext(ctx context.Context) (*node, func()) {
128 sup, ok := ctx.Value(supervisorKey).(*supervisor)
129 if !ok {
130 panic("supervisor function called from non-runnable context")
131 }
132
133 sup.mu.Lock()
134
135 dnParent, ok := ctx.Value(dnKey).(string)
136 if !ok {
137 sup.mu.Unlock()
138 panic("supervisor function called from non-runnable context")
139 }
140
141 return sup.nodeByDN(dnParent), sup.mu.Unlock
142}
143
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200144// All the following 'internal' supervisor functions must only be called with
145// the supervisor lock taken. Getting a lock via fromContext is enough.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100146
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200147// dn returns the distinguished name of a node. This distinguished name is a
148// period-separated, inverse-DNS-like name. For instance, the runnable 'foo'
149// within the runnable 'bar' will be called 'root.bar.foo'. The root of the
150// tree is always named, and has the dn, 'root'.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100151func (n *node) dn() string {
152 if n.parent != nil {
153 return fmt.Sprintf("%s.%s", n.parent.dn(), n.name)
154 }
155 return n.name
156}
157
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200158// groupSiblings is a helper function to get all runnable group siblings of a
159// given runnable name within this node. All children are always in a group,
160// even if that group is unary.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100161func (n *node) groupSiblings(name string) map[string]bool {
162 for _, m := range n.groups {
163 if _, ok := m[name]; ok {
164 return m
165 }
166 }
167 return nil
168}
169
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200170// newNode creates a new node with a given parent. It does not register it with
171// the parent (as that depends on group placement).
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100172func newNode(name string, runnable Runnable, sup *supervisor, parent *node) *node {
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200173 // We use exponential backoff for failed runnables, but at some point we
174 // cap at a given backoff time. To achieve this, we set MaxElapsedTime to
175 // 0, which will cap the backoff at MaxInterval.
Serge Bazanskib9431c92020-08-24 18:16:51 +0200176 bo := backoff.NewExponentialBackOff()
177 bo.MaxElapsedTime = 0
178
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100179 n := &node{
180 name: name,
181 runnable: runnable,
182
Serge Bazanskib9431c92020-08-24 18:16:51 +0200183 bo: bo,
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100184
185 sup: sup,
186 parent: parent,
187 }
188 n.reset()
189 return n
190}
191
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200192// resetNode sets up all the dynamic fields of the node, in preparation of
193// starting a runnable. It clears the node's children, groups and resets its
194// context.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100195func (n *node) reset() {
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200196 // Make new context. First, acquire parent context. For the root node
197 // that's Background, otherwise it's the parent's context.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100198 var pCtx context.Context
199 if n.parent == nil {
200 pCtx = context.Background()
201 } else {
202 pCtx = n.parent.ctx
203 }
204 // Mark DN and supervisor in context.
205 ctx := context.WithValue(pCtx, dnKey, n.dn())
206 ctx = context.WithValue(ctx, supervisorKey, n.sup)
207 ctx, ctxC := context.WithCancel(ctx)
208 // Set context
209 n.ctx = ctx
210 n.ctxC = ctxC
211
212 // Clear children and state
Serge Bazanskieca8ee32024-07-30 14:32:19 +0000213 n.state = NodeStateNew
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100214 n.children = make(map[string]*node)
Serge Bazanski26d52252022-02-07 15:57:54 +0100215 n.reserved = make(map[string]bool)
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100216 n.groups = nil
217
218 // The node is now ready to be scheduled.
219}
220
221// nodeByDN returns a node by given DN from the supervisor.
222func (s *supervisor) nodeByDN(dn string) *node {
223 parts := strings.Split(dn, ".")
224 if parts[0] != "root" {
225 panic("DN does not start with root.")
226 }
227 parts = parts[1:]
228 cur := s.root
229 for {
230 if len(parts) == 0 {
231 return cur
232 }
233
234 next, ok := cur.children[parts[0]]
235 if !ok {
236 panic(fmt.Errorf("could not find %v (%s) in %s", parts, dn, cur))
237 }
238 cur = next
239 parts = parts[1:]
240 }
241}
242
243// reNodeName validates a node name against constraints.
244var reNodeName = regexp.MustCompile(`[a-z90-9_]{1,64}`)
245
246// runGroup schedules a new group of runnables to run on a node.
247func (n *node) runGroup(runnables map[string]Runnable) error {
248 // Check that the parent node is in the right state.
Serge Bazanskieca8ee32024-07-30 14:32:19 +0000249 if n.state != NodeStateNew {
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100250 return fmt.Errorf("cannot run new runnable on non-NEW node")
251 }
252
253 // Check the requested runnable names.
Tim Windelschmidt6b6428d2024-04-11 01:35:41 +0200254 for name := range runnables {
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100255 if !reNodeName.MatchString(name) {
256 return fmt.Errorf("runnable name %q is invalid", name)
257 }
258 if _, ok := n.children[name]; ok {
259 return fmt.Errorf("runnable %q already exists", name)
260 }
Serge Bazanski26d52252022-02-07 15:57:54 +0100261 if _, ok := n.reserved[name]; ok {
262 return fmt.Errorf("runnable %q would shadow reserved name (eg. sub-logger)", name)
263 }
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100264 }
265
266 // Create child nodes.
267 dns := make(map[string]string)
268 group := make(map[string]bool)
269 for name, runnable := range runnables {
270 if g := n.groupSiblings(name); g != nil {
271 return fmt.Errorf("duplicate child name %q", name)
272 }
273 node := newNode(name, runnable, n.sup, n)
274 n.children[name] = node
275
276 dns[name] = node.dn()
277 group[name] = true
278 }
279 // Add group.
280 n.groups = append(n.groups, group)
281
282 // Schedule execution of group members.
283 go func() {
Tim Windelschmidt6b6428d2024-04-11 01:35:41 +0200284 for name := range runnables {
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100285 n.sup.pReq <- &processorRequest{
286 schedule: &processorRequestSchedule{
287 dn: dns[name],
288 },
289 }
290 }
291 }()
292 return nil
293}
294
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200295// signal sequences state changes by signals received from runnables and
296// updates a node's status accordingly.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100297func (n *node) signal(signal SignalType) {
298 switch signal {
299 case SignalHealthy:
Serge Bazanskieca8ee32024-07-30 14:32:19 +0000300 if n.state != NodeStateNew {
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100301 panic(fmt.Errorf("node %s signaled healthy", n))
302 }
Serge Bazanskieca8ee32024-07-30 14:32:19 +0000303 n.state = NodeStateHealthy
Serge Bazanskicf864da2024-07-31 11:23:34 +0000304 n.sup.metrics.NotifyNodeState(n.dn(), n.state)
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100305 n.bo.Reset()
306 case SignalDone:
Serge Bazanskieca8ee32024-07-30 14:32:19 +0000307 if n.state != NodeStateHealthy {
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100308 panic(fmt.Errorf("node %s signaled done", n))
309 }
Serge Bazanskieca8ee32024-07-30 14:32:19 +0000310 n.state = NodeStateDone
Serge Bazanskicf864da2024-07-31 11:23:34 +0000311 n.sup.metrics.NotifyNodeState(n.dn(), n.state)
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100312 n.bo.Reset()
313 }
314}