pkg/nvme: add NVMe package

This adds a NVMe package for performing various low-level operations on
NVMe devices. Only the most important (to us) calls are implemented as
NVMe has a vast API surface.

Change-Id: I532894c3c2eb780309993a1688226c92c91cdedf
Reviewed-on: https://review.monogon.dev/c/monogon/+/999
Reviewed-by: Mateusz Zalega <mateusz@monogon.tech>
Tested-by: Jenkins CI
diff --git a/metropolis/pkg/nvme/health.go b/metropolis/pkg/nvme/health.go
new file mode 100644
index 0000000..775742f
--- /dev/null
+++ b/metropolis/pkg/nvme/health.go
@@ -0,0 +1,196 @@
+package nvme
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"math/big"
+	"time"
+)
+
+// healthPage represents the raw data from a NVMe Health/SMART page.
+// See Figure 93 in the spec.
+type healthPage struct {
+	CriticalWarning         uint8
+	CompositeTemperature    uint16
+	AvailableSpare          uint8
+	AvailableSpareThreshold uint8
+	PercentageUsed          uint8
+
+	_ [26]byte
+
+	DataUnitsRead               uint128le
+	DataUnitsWritten            uint128le
+	HostReadCommands            uint128le
+	HostWriteCommands           uint128le
+	ControllerBusyTime          uint128le
+	PowerCycles                 uint128le
+	PowerOnHours                uint128le
+	UnsafeSHutdowns             uint128le
+	MediaAndDataIntegrityErrors uint128le
+	ErrorInformationLogEntries  uint128le
+
+	WarningCompositeTemperatureTime  uint32
+	CriticalCompositeTemperatureTime uint32
+
+	TemperatureSensors [8]uint16
+
+	ThermalMgmtTemperature1TransitionCount uint32
+	ThermalMgmtTemperature2TransitionCount uint32
+
+	_ [8]byte
+
+	TotalTimeForThermalMgmtTemperature1 uint32
+	TotalTimeForThermalMgmtTemperature2 uint32
+}
+
+// HealthInfo contains information related to the health of the NVMe device.
+//
+// Note that some values might be clamped under highly abnormal circumstances
+// as they are reported as 128-bit integers which Go doesn't support.
+// For easier handling values which are very unlikely to exceed 64 bits are
+// exposed as 64 bit integers.
+type HealthInfo struct {
+	// AvailableSpareSpaceCritical is set if the avilable spare threshold has
+	// fallen below the critical threshold.
+	AvailableSpareSpaceCritical bool
+	// TemperatureCritical is set if a temperature is outside the acceptable
+	// operating thresholds.
+	TemperatureCritical bool
+	// MediaCritical is set if significant media or internal issues affect the
+	// operation of the device.
+	MediaCritical bool
+	// ForcedReadOnly is set if the device is forced into read-only mode due
+	// to an error.
+	ForcedReadOnly bool
+	// VolatileMemoryBackupFailed is set if the volatile memory backup device
+	// has failed.
+	VolatileMemoryBackupFailed bool
+	// CompositeTemperatureKelvin contains a derived value representing the
+	// composite state of controller and namespace/flash temperature.
+	// The exact mechanism used to derive it is vendor-specific.
+	CompositeTemperatureKelvin uint16
+	// AvailableSpare represents the relative amount (0-1) of spare capacity
+	// still unnused.
+	AvailableSpare float32
+	// AvailableSpareThreshold represents the vendor-defined threshold which
+	// AvailableSpare shuld not fall under.
+	AvailableSpareThreshold float32
+	// LifeUsed represents vendor-defined relative estimate of the life of
+	// the device which has been used up. It is allowed to exceed 1 and will
+	// be clamped by the device somewhere between 1.0 and 2.55.
+	LifeUsed float32
+	// BytesRead contains the number of bytes read from the device.
+	// This value is only updated in 512KiB increments.
+	BytesRead *big.Int
+	// BytesWritten contains the number of bytes written to the device.
+	// This value is only updated in 512KiB increments.
+	BytesWritten *big.Int
+	// HostReadCommands contains the number of read commands completed by the
+	// controller.
+	HostReadCommands *big.Int
+	// HostWriteCommands contains the number of write commands completed by the
+	// controller.
+	HostWriteCommands *big.Int
+	// ControllerBusyTime contains the cumulative amount of time the controller
+	// has spent being busy (i.e. having at least one command outstanding on an
+	// I/O queue). This value is only updated in 1m increments.
+	ControllerBusyTime time.Duration
+	// PowerCycles contains the number of power cycles.
+	PowerCycles uint64
+	// PowerOnHours contains the number of hours the controller has been
+	// powered on. Depending on the vendor implementation it may or may
+	// not contain time spent in a non-operational power state.
+	PowerOnHours uint64
+	// UnsafeShutdown contains the number of power loss events without
+	// a prior shutdown notification from the host.
+	UnsafeShutdowns uint64
+	// MediaAndDataIntegrityErrors contains the number of occurrences where the
+	// controller detecte an unrecovered data integrity error.
+	MediaAndDataIntegrityErrors uint64
+	// ErrorInformationLogEntriesCount contains the number of Error
+	// Information log entries over the life of the controller.
+	ErrorInformationLogEntriesCount uint64
+	// WarningCompositeTemperatureTime contains the amount of time the
+	// controller is operational while the composite temperature is greater
+	// than the warning composite threshold.
+	WarningCompositeTemperatureTime time.Duration
+	// CriticalCompositeTemperatureTime contains the amount of time the
+	// controller is operational while the composite temperature is greater
+	// than the critical composite threshold.
+	CriticalCompositeTemperatureTime time.Duration
+	// TemperatureSensorValues contains the current temperature in Kelvin as
+	// reported by up to 8 sensors on the device. A value of zero means that
+	// the given sensor is not available.
+	TemperatureSensorValues [8]uint16
+	// ThermalMgmtTemperature1TransitionCount contains the number of times the
+	// controller transitioned to lower power active power states or performed
+	// vendor-specific thermal management actions to reduce temperature.
+	ThermalMgmtTemperature1TransitionCount uint32
+	// ThermalMgmtTemperature2TransitionCount is the same as above, but
+	// for "heavier" thermal management actions including heavy throttling.
+	// The actual difference is vendor-specific.
+	ThermalMgmtTemperature2TransitionCount uint32
+	// TotalTimeForThermalMgmtTemperature1 contains the total time the
+	// controller spent under "light" thermal management.
+	TotalTimeForThermalMgmtTemperature1 time.Duration
+	// TotalTimeForThermalMgmtTemperature2 contains the total time the
+	// controller spent under "heavy" thermal management.
+	TotalTimeForThermalMgmtTemperature2 time.Duration
+}
+
+// HasCriticalWarning returns true if any of the critical warnings
+// (AvailableSpareSpaceCritical, TemperatureCritical, MediaCritical,
+// ForcedReadOnly, VolatileMemoryBackupFailed) are active.
+// If this returns true the NVMe medium has reason to believe that
+// data availability or integrity is endangered.
+func (h *HealthInfo) HasCriticalWarning() bool {
+	return h.AvailableSpareSpaceCritical || h.TemperatureCritical || h.MediaCritical || h.ForcedReadOnly || h.VolatileMemoryBackupFailed
+}
+
+// See Figure 93 Data Units Read
+var dataUnit = big.NewInt(512 * 1000)
+
+const (
+	healthLogPage = 0x02
+)
+
+// GetHealthInfo gets health information from the NVMe device's health log page.
+func (d *Device) GetHealthInfo() (*HealthInfo, error) {
+	var buf [512]byte
+
+	if err := d.GetLogPage(GlobalNamespace, healthLogPage, 0, 0, buf[:]); err != nil {
+		return nil, fmt.Errorf("unable to get health log page: %w", err)
+	}
+
+	var page healthPage
+	binary.Read(bytes.NewReader(buf[:]), binary.LittleEndian, &page)
+	var res HealthInfo
+	res.AvailableSpareSpaceCritical = page.CriticalWarning&(1<<0) != 0
+	res.TemperatureCritical = page.CriticalWarning&(1<<1) != 0
+	res.MediaCritical = page.CriticalWarning&(1<<2) != 0
+	res.ForcedReadOnly = page.CriticalWarning&(1<<3) != 0
+	res.VolatileMemoryBackupFailed = page.CriticalWarning&(1<<4) != 0
+	res.CompositeTemperatureKelvin = page.CompositeTemperature
+	res.AvailableSpare = float32(page.AvailableSpare) / 100.
+	res.AvailableSpareThreshold = float32(page.AvailableSpareThreshold) / 100.
+	res.LifeUsed = float32(page.PercentageUsed) / 100.
+	res.BytesRead = new(big.Int).Mul(page.DataUnitsRead.BigInt(), dataUnit)
+	res.BytesWritten = new(big.Int).Mul(page.DataUnitsWritten.BigInt(), dataUnit)
+	res.HostReadCommands = page.HostReadCommands.BigInt()
+	res.HostWriteCommands = page.HostWriteCommands.BigInt()
+	res.ControllerBusyTime = time.Duration(page.ControllerBusyTime.Uint64()) * time.Minute
+	res.PowerCycles = page.PowerCycles.Uint64()
+	res.PowerOnHours = page.PowerOnHours.Uint64()
+	res.UnsafeShutdowns = page.UnsafeSHutdowns.Uint64()
+	res.MediaAndDataIntegrityErrors = page.MediaAndDataIntegrityErrors.Uint64()
+	res.ErrorInformationLogEntriesCount = page.ErrorInformationLogEntries.Uint64()
+	res.WarningCompositeTemperatureTime = time.Duration(page.WarningCompositeTemperatureTime) * time.Minute
+	res.CriticalCompositeTemperatureTime = time.Duration(page.CriticalCompositeTemperatureTime) * time.Minute
+	res.TemperatureSensorValues = page.TemperatureSensors
+	res.ThermalMgmtTemperature1TransitionCount = page.ThermalMgmtTemperature1TransitionCount
+	res.ThermalMgmtTemperature2TransitionCount = page.ThermalMgmtTemperature2TransitionCount
+	res.TotalTimeForThermalMgmtTemperature1 = time.Duration(page.TotalTimeForThermalMgmtTemperature1) * time.Second
+	res.TotalTimeForThermalMgmtTemperature2 = time.Duration(page.TotalTimeForThermalMgmtTemperature2) * time.Second
+	return &res, nil
+}