c/bmaas/bmdb: implement backoff and history
This augments the existing Work mechanism with a Fail outcome/method
which allows insertion of a machine & process backoff until a deadline
expires.
We also add a history/audit table which contains information about the
work history of a machine - when some work started, finished, failed or
got cancelled.
Change-Id: If890a412977c1d3c7ff3baa69987fb74932818a0
Reviewed-on: https://review.monogon.dev/c/monogon/+/1086
Tested-by: Jenkins CI
Reviewed-by: Leopold Schabel <leo@monogon.tech>
diff --git a/cloud/bmaas/bmdb/model/queries_workflows.sql b/cloud/bmaas/bmdb/model/queries_workflows.sql
index d2e9966..b7b60fe 100644
--- a/cloud/bmaas/bmdb/model/queries_workflows.sql
+++ b/cloud/bmaas/bmdb/model/queries_workflows.sql
@@ -6,12 +6,14 @@
FROM machines
INNER JOIN machine_provided ON machines.machine_id = machine_provided.machine_id
LEFT JOIN work ON machines.machine_id = work.machine_id AND work.process = 'ShepherdInstall'
+LEFT JOIN work_backoff ON machines.machine_id = work_backoff.machine_id AND work_backoff.until > now() AND work_backoff.process = 'ShepherdInstall'
LEFT JOIN machine_agent_started ON machines.machine_id = machine_agent_started.machine_id
WHERE
machine_agent_started.machine_id IS NULL
-- TODO(q3k): exclude machines which are not expected to run the agent (eg.
-- are already exposed to a user).
AND work.machine_id IS NULL
+ AND work_backoff.machine_id IS NULL
LIMIT $1;
-- name: GetMachineForAgentRecovery :many
@@ -25,6 +27,7 @@
FROM machines
INNER JOIN machine_provided ON machines.machine_id = machine_provided.machine_id
LEFT JOIN work ON machines.machine_id = work.machine_id AND work.process = 'ShepherdInstall'
+LEFT JOIN work_backoff ON machines.machine_id = work_backoff.machine_id AND work_backoff.until > now() AND work_backoff.process = 'ShepherdInstall'
LEFT JOIN machine_agent_started ON machines.machine_id = machine_agent_started.machine_id
LEFT JOIN machine_agent_heartbeat ON machines.machine_id = machine_agent_heartbeat.machine_id
WHERE
@@ -43,6 +46,7 @@
)
)
AND work.machine_id IS NULL
+ AND work_backoff.machine_id IS NULL
LIMIT $1;
-- name: AuthenticateAgentConnection :many
@@ -75,5 +79,4 @@
machine_os_installation_report.machine_id IS NULL
OR machine_os_installation_report.generation != machine_os_installation_request.generation
)
-LIMIT $2;
-
+LIMIT $2;
\ No newline at end of file