blob: df7492c150113067cde805d59f39528c4ba065c3 [file] [log] [blame]
Serge Bazanski9c09c4e2020-03-24 13:58:01 +01001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package supervisor
18
19// The service supervision library allows for writing of reliable, service-style software within Smalltown.
20// It builds upon the Erlang/OTP supervision tree system, adapted to be more Go-ish.
21// For detailed design see go/supervision.
22
23import (
24 "context"
Serge Bazanskic7359672020-10-30 16:38:57 +010025 "io"
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010026 "sync"
27
Serge Bazanski77cb6c52020-12-19 00:09:22 +010028 "git.monogon.dev/source/nexantic.git/metropolis/node/core/logtree"
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010029)
30
31// A Runnable is a function that will be run in a goroutine, and supervised throughout its lifetime. It can in turn
32// start more runnables as its children, and those will form part of a supervision tree.
33// The context passed to a runnable is very important and needs to be handled properly. It will be live (non-errored) as
34// long as the runnable should be running, and canceled (ctx.Err() will be non-nil) when the supervisor wants it to
35// exit. This means this context is also perfectly usable for performing any blocking operations.
36type Runnable func(ctx context.Context) error
37
38// RunGroup starts a set of runnables as a group. These runnables will run together, and if any one of them quits
39// unexpectedly, the result will be canceled and restarted.
40// The context here must be an existing Runnable context, and the spawned runnables will run under the node that this
41// context represents.
42func RunGroup(ctx context.Context, runnables map[string]Runnable) error {
43 node, unlock := fromContext(ctx)
44 defer unlock()
45 return node.runGroup(runnables)
46}
47
48// Run starts a single runnable in its own group.
49func Run(ctx context.Context, name string, runnable Runnable) error {
50 return RunGroup(ctx, map[string]Runnable{
51 name: runnable,
52 })
53}
54
55// Signal tells the supervisor that the calling runnable has reached a certain state of its lifecycle. All runnables
56// should SignalHealthy when they are ready with set up, running other child runnables and are now 'serving'.
57func Signal(ctx context.Context, signal SignalType) {
58 node, unlock := fromContext(ctx)
59 defer unlock()
60 node.signal(signal)
61}
62
63type SignalType int
64
65const (
66 // The runnable is healthy, done with setup, done with spawning more Runnables, and ready to serve in a loop.
67 // The runnable needs to check the parent context and ensure that if that context is done, the runnable exits.
68 SignalHealthy SignalType = iota
69 // The runnable is done - it does not need to run any loop. This is useful for Runnables that only set up other
70 // child runnables. This runnable will be restarted if a related failure happens somewhere in the supervision tree.
71 SignalDone
72)
73
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010074// supervisor represents and instance of the supervision system. It keeps track of a supervision tree and a request
75// channel to its internal processor goroutine.
76type supervisor struct {
77 // mu guards the entire state of the supervisor.
78 mu sync.RWMutex
79 // root is the root node of the supervision tree, named 'root'. It represents the Runnable started with the
80 // supervisor.New call.
81 root *node
Serge Bazanskic7359672020-10-30 16:38:57 +010082 // logtree is the main logtree exposed to runnables and used internally.
83 logtree *logtree.LogTree
84 // ilogger is the internal logger logging to "supervisor" in the logtree.
85 ilogger logtree.LeveledLogger
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010086
87 // pReq is an interface channel to the lifecycle processor of the supervisor.
88 pReq chan *processorRequest
Serge Bazanski19bb4122020-05-04 17:57:50 +020089
90 // propagate panics, ie. don't catch them.
91 propagatePanic bool
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010092}
93
Serge Bazanski19bb4122020-05-04 17:57:50 +020094// SupervisorOpt are runtime configurable options for the supervisor.
95type SupervisorOpt func(s *supervisor)
96
97var (
98 // WithPropagatePanic prevents the Supervisor from catching panics in runnables and treating them as failures.
99 // This is useful to enable for testing and local debugging.
100 WithPropagatePanic = func(s *supervisor) {
101 s.propagatePanic = true
102 }
103)
104
Serge Bazanskic7359672020-10-30 16:38:57 +0100105func WithExistingLogtree(lt *logtree.LogTree) SupervisorOpt {
106 return func(s *supervisor) {
107 s.logtree = lt
108 }
109}
110
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100111// New creates a new supervisor with its root running the given root runnable.
112// The given context can be used to cancel the entire supervision tree.
Serge Bazanskic7359672020-10-30 16:38:57 +0100113func New(ctx context.Context, rootRunnable Runnable, opts ...SupervisorOpt) *supervisor {
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100114 sup := &supervisor{
Serge Bazanskic7359672020-10-30 16:38:57 +0100115 logtree: logtree.New(),
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100116 pReq: make(chan *processorRequest),
117 }
Serge Bazanski19bb4122020-05-04 17:57:50 +0200118
119 for _, o := range opts {
120 o(sup)
121 }
122
Serge Bazanskic7359672020-10-30 16:38:57 +0100123 sup.ilogger = sup.logtree.MustLeveledFor("supervisor")
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100124 sup.root = newNode("root", rootRunnable, sup, nil)
125
126 go sup.processor(ctx)
127
128 sup.pReq <- &processorRequest{
129 schedule: &processorRequestSchedule{dn: "root"},
130 }
Serge Bazanskiac6b6442020-05-06 19:13:43 +0200131
132 return sup
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100133}
Serge Bazanskic7359672020-10-30 16:38:57 +0100134
135func Logger(ctx context.Context) logtree.LeveledLogger {
136 node, unlock := fromContext(ctx)
137 defer unlock()
138 return node.sup.logtree.MustLeveledFor(logtree.DN(node.dn()))
139}
140
141func RawLogger(ctx context.Context) io.Writer {
142 node, unlock := fromContext(ctx)
143 defer unlock()
144 return node.sup.logtree.MustRawFor(logtree.DN(node.dn()))
145}