blob: 4d15a791b445f4f583e1bc263530b71afee43a16 [file] [log] [blame]
Serge Bazanski35e8d792022-10-11 11:32:30 +02001-- name: NewMachine :one
2INSERT INTO machines (
3 machine_created_at
4) VALUES (
5 now()
6)
7RETURNING *;
8
9-- name: NewSession :one
10INSERT INTO sessions (
11 session_component_name, session_runtime_info, session_created_at, session_interval_seconds, session_deadline
12) VALUES (
13 $1, $2, now(), $3, (now() + $3 * interval '1 second')
14)
15RETURNING *;
16
17-- name: SessionPoke :exec
18-- Update a given session with a new deadline. Must be called in the same
19-- transaction as SessionCheck to ensure the session is still alive.
20UPDATE sessions
21SET session_deadline = now() + session_interval_seconds * interval '1 second'
22WHERE session_id = $1;
23
24-- name: SessionCheck :many
25-- SessionCheck returns a session by ID if that session is still valid (ie. its
26-- deadline hasn't expired).
27SELECT *
28FROM sessions
29WHERE session_id = $1
30AND session_deadline > now();
31
32-- name: StartWork :exec
33INSERT INTO work (
34 machine_id, session_id, process
35) VALUES (
36 $1, $2, $3
37);
38
39-- name: FinishWork :exec
40DELETE FROM work
41WHERE machine_id = $1
42 AND session_id = $2
43 AND process = $3;
44
45-- Example tag processing queries follow.
46
47-- name: MachineAddProvided :exec
48INSERT INTO machine_provided (
49 machine_id, provider, provider_id
50) VALUES (
51 $1, $2, $3
52);
53
Serge Bazanski68ca3702022-11-02 17:30:44 +010054-- name: MachineSetAgentStarted :exec
55INSERT INTO machine_agent_started (
56 machine_id, agent_started_at, agent_public_key
57) VALUES (
58 $1, $2, $3
59) ON CONFLICT (machine_id) DO UPDATE SET
60 agent_started_at = $2,
61 agent_public_key = $3
62;
63
64-- name: MachineSetAgentHeartbeat :exec
65INSERT INTO machine_agent_heartbeat (
66 machine_id, agent_heartbeat_at
67) VALUES (
68 $1, $2
69) ON CONFLICT (machine_id) DO UPDATE SET
70 agent_heartbeat_at = $2
71;
72
73-- name: GetMachinesForAgentStart :many
74-- Get machines that need agent installed for the first time. Machine can be
75-- assumed to be 'new', with no previous attempts or failures.
Serge Bazanski35e8d792022-10-11 11:32:30 +020076SELECT
77 machine_provided.*
78FROM machines
79INNER JOIN machine_provided ON machines.machine_id = machine_provided.machine_id
Serge Bazanski68ca3702022-11-02 17:30:44 +010080LEFT JOIN work ON machines.machine_id = work.machine_id AND work.process = 'ShepherdInstall'
81LEFT JOIN machine_agent_started ON machines.machine_id = machine_agent_started.machine_id
82WHERE
83 machine_agent_started.machine_id IS NULL
84 -- TODO(q3k): exclude machines which are not expected to run the agent (eg.
85 -- are already exposed to a user).
Serge Bazanski35e8d792022-10-11 11:32:30 +020086 AND work.machine_id IS NULL
87LIMIT $1;
88
Serge Bazanski68ca3702022-11-02 17:30:44 +010089-- name: GetMachineForAgentRecovery :many
90-- Get machines that need agent installed after something went wrong. Either
91-- the agent started but never responded, or the agent stopped responding at
92-- some point, or the machine is being reinstalled after failure. Assume some
93-- work needs to be performed on the shepherd side to diagnose and recover
94-- whatever state the machine truly is in.
95SELECT
96 machine_provided.*
97FROM machines
98INNER JOIN machine_provided ON machines.machine_id = machine_provided.machine_id
99LEFT JOIN work ON machines.machine_id = work.machine_id AND work.process = 'ShepherdInstall'
100LEFT JOIN machine_agent_started ON machines.machine_id = machine_agent_started.machine_id
101LEFT JOIN machine_agent_heartbeat ON machines.machine_id = machine_agent_heartbeat.machine_id
102WHERE
103 -- Only act on machines where the agent is expected to have been started.
104 machine_agent_started.machine_id IS NOT NULL
105 AND (
106 -- No heartbeat 30 minutes after starting the agent.
107 (
108 machine_agent_heartbeat.machine_id IS NULL
109 AND now() > (machine_agent_started.agent_started_at + interval '30 minutes')
110 )
111 -- Heartbeats ceased for 10 minutes.
112 OR (
113 machine_agent_heartbeat.machine_id IS NOT NULL
114 AND now() > (machine_agent_heartbeat.agent_heartbeat_at + interval '10 minutes')
115 )
116 )
117 AND work.machine_id IS NULL
118LIMIT $1;