m/p/watchdog: add watchdog package

This adds the watchdog package for interfacing with devices implementing
the Linux watchdog API. There is one alternative implementation at
github.com/mdlayher/watchdog but it is too minimal for our use case.

The kernel API contains a bunch of weird status bits only relevant for
physical watchdog cards which are not made accessible through the
Go package.

Sadly there are no integration tests for this package as ktest wouldn't
work for this because a) no watchdog HW and b) the test success depends
on a condition outside the kernel.

Change-Id: If138a97bc655025da4426665f8cbf1f093cc3bb1
Reviewed-on: https://review.monogon.dev/c/monogon/+/2142
Reviewed-by: Serge Bazanski <serge@monogon.tech>
Tested-by: Jenkins CI
diff --git a/metropolis/pkg/watchdog/watchdog.go b/metropolis/pkg/watchdog/watchdog.go
new file mode 100644
index 0000000..d9a14bf
--- /dev/null
+++ b/metropolis/pkg/watchdog/watchdog.go
@@ -0,0 +1,215 @@
+// Package watchdog provides access to hardware watchdogs. These can be used to
+// automatically reset/reboot a system if they are no longer pinged.
+package watchdog
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"math"
+	"os"
+	"syscall"
+	"time"
+
+	"golang.org/x/sys/unix"
+)
+
+// Device represents a handle to a hardware watchdog.
+type Device struct {
+	// Type identifies the type of watchdog device. It corresponds to the Linux
+	// driver's watchdog_info.identity value.
+	Type string
+	// HasConfiguratbleTimeout indicates if the device supports the SetTimeout
+	// call.
+	HasConfigurableTimeout bool
+	// HasPretimeout indicates if the device supports notifying the system of
+	// an impending reset and the functions to control this
+	// (Get/SetPreTimeout).
+	HasPretimeout bool
+	// Indicates if the watchdog is capable of reporting that it is responsible
+	// for the last system reset.
+	ReportsWatchdogReset bool
+
+	raw syscall.RawConn
+	f   *os.File
+}
+
+// Open opens a watchdog device identified by the path to its device inode.
+func Open(name string) (*Device, error) {
+	f, err := os.Open(name)
+	if err != nil {
+		// Already wrapped by PathError
+		return nil, err
+	}
+	raw, err := f.SyscallConn()
+	if err != nil {
+		f.Close()
+		return nil, fmt.Errorf("while obtaining RawConn: %w", err)
+	}
+	var wdInfo *unix.WatchdogInfo
+	ctrlErr := raw.Control(func(fd uintptr) {
+		wdInfo, err = unix.IoctlGetWatchdogInfo(int(fd))
+	})
+	if ctrlErr != nil {
+		f.Close()
+		return nil, fmt.Errorf("when calling RawConn.Control: %w", err)
+	}
+	if errors.Is(err, unix.ENOTTY) {
+		f.Close()
+		return nil, errors.New("device is not a watchdog")
+	}
+	if err != nil {
+		return nil, fmt.Errorf("while getting watchdog metadata: %w", err)
+	}
+	w := &Device{
+		Type:                   string(bytes.Trim(wdInfo.Identity[:], "\x00")),
+		f:                      f,
+		raw:                    raw,
+		HasConfigurableTimeout: wdInfo.Options&unix.WDIOF_SETTIMEOUT != 0,
+		HasPretimeout:          wdInfo.Options&unix.WDIOF_PRETIMEOUT != 0,
+		ReportsWatchdogReset:   wdInfo.Options&unix.WDIOF_CARDRESET != 0,
+	}
+	return w, nil
+}
+
+// SetTimeout sets the duration since the last ping after which it performs
+// a recovery actions (usually a reset or reboot).
+// Due to hardware limitations this function may approximate the set duration
+// or not be a available at all. GetTimeout returns the active timeout.
+func (w *Device) SetTimeout(t time.Duration) error {
+	if !w.HasConfigurableTimeout {
+		return errors.New("watchdog does not have a configurable timeout, check HasConfigurableTimeout")
+	}
+	var err error
+	ctrlErr := w.raw.Control(func(fd uintptr) {
+		err = unix.IoctlSetInt(int(fd), unix.WDIOC_SETTIMEOUT, int(math.Ceil(t.Seconds())))
+	})
+	if ctrlErr != nil {
+		return fmt.Errorf("when calling RawConn.Control: %w", err)
+	}
+	if err != nil {
+		return fmt.Errorf("ioctl(WDIOC_SETTIMEOUT): %w", err)
+	}
+	return nil
+}
+
+// GetTimeout returns the configured timeout duration.
+func (w *Device) GetTimeout() (time.Duration, error) {
+	var err error
+	var t int
+	ctrlErr := w.raw.Control(func(fd uintptr) {
+		t, err = unix.IoctlGetInt(int(fd), unix.WDIOC_GETTIMEOUT)
+	})
+	if ctrlErr != nil {
+		return 0, fmt.Errorf("when calling RawConn.Control: %w", err)
+	}
+	if err != nil {
+		return 0, fmt.Errorf("ioctl(WDIOC_GETTIMEOUT): %w", err)
+	}
+	return time.Duration(t) * time.Second, nil
+}
+
+// SetPreTimeout sets the minimum duration left on the expiry timer where when
+// it drops below that, the system is notified (via some high-priority
+// interrupt, usually an NMI). This is only available if HasPretimeout is true.
+// This can be used by the system (if it's still in a sem-working state) to
+// recover or dump diagnostic information before it gets forcibly reset by the
+// watchdog. To disable this functionality, set the duration to zero.
+func (w *Device) SetPreTimeout(t time.Duration) error {
+	if !w.HasPretimeout {
+		return errors.New("watchdog does not have a pretimeout, check HasPretimeout")
+	}
+	var err error
+	ctrlErr := w.raw.Control(func(fd uintptr) {
+		err = unix.IoctlSetInt(int(fd), unix.WDIOC_SETPRETIMEOUT, int(math.Ceil(t.Seconds())))
+	})
+	if ctrlErr != nil {
+		return fmt.Errorf("when calling RawConn.Control: %w", err)
+	}
+	if err != nil {
+		return fmt.Errorf("ioctl(WDIOC_SETPRETIMEOUT): %w", err)
+	}
+	return nil
+}
+
+// GetPreTimeout gets the current pre-timeout (see SetPreTimeout for more).
+func (w *Device) GetPreTimeout() (time.Duration, error) {
+	if !w.HasPretimeout {
+		return 0, errors.New("watchdog does not have a pretimeout, check HasPretimeout")
+	}
+	var err error
+	var t int
+	ctrlErr := w.raw.Control(func(fd uintptr) {
+		t, err = unix.IoctlGetInt(int(fd), unix.WDIOC_GETPRETIMEOUT)
+	})
+	if ctrlErr != nil {
+		return 0, fmt.Errorf("when calling RawConn.Control: %w", err)
+	}
+	if err != nil {
+		return 0, fmt.Errorf("ioctl(WDIOC_GETPRETIMEOUT): %w", err)
+	}
+	return time.Duration(t) * time.Second, nil
+
+}
+
+// Ping the watchdog. This needs to be called regularly before the
+// watchdog timeout expires, otherwise the system resets.
+func (w *Device) Ping() error {
+	var err error
+	ctrlErr := w.raw.Control(func(fd uintptr) {
+		err = unix.IoctlWatchdogKeepalive(int(fd))
+	})
+	if ctrlErr != nil {
+		return fmt.Errorf("when calling RawConn.Control: %w", err)
+	}
+	if err != nil {
+		return fmt.Errorf("ioctl(WDIOC_KEEPALIVE): %w", err)
+	}
+	return nil
+}
+
+// LastResetByWatchdog returns true if the last system reset was caused by
+// this watchdog. Not all watchdogs report this accurately.
+func (w *Device) LastResetByWatchdog() (bool, error) {
+	if !w.ReportsWatchdogReset {
+		return false, errors.New("watchdog does not report resets, check ReportsWatchdogReset")
+	}
+	var err error
+	var flags int
+	ctrlErr := w.raw.Control(func(fd uintptr) {
+		flags, err = unix.IoctlGetInt(int(fd), unix.WDIOC_GETBOOTSTATUS)
+	})
+	if ctrlErr != nil {
+		return false, fmt.Errorf("when calling RawConn.Control: %w", err)
+	}
+	if err != nil {
+		return false, fmt.Errorf("ioctl(WDIOC_GETBOOTSTATUS): %w", err)
+	}
+	return flags&unix.WDIOF_CARDRESET != 0, nil
+}
+
+// Close disables the watchdog and releases all associated resources.
+func (w *Device) Close() error {
+	if w.f != nil {
+		_, err := w.f.Write([]byte{'V'})
+		errClose := w.f.Close()
+		w.f = nil
+		if err != nil {
+			return err
+		}
+		return errClose
+	}
+	return nil
+}
+
+// CloseActive releases all resources and file handles, but keeps the
+// watchdog active. Another system must reopen it and ping it before
+// it expires to avoid a reset.
+func (w *Device) CloseActive() error {
+	if w.f != nil {
+		err := w.f.Close()
+		w.f = nil
+		return err
+	}
+	return nil
+}