pkg/nvme: add NVMe package
This adds a NVMe package for performing various low-level operations on
NVMe devices. Only the most important (to us) calls are implemented as
NVMe has a vast API surface.
Change-Id: I532894c3c2eb780309993a1688226c92c91cdedf
Reviewed-on: https://review.monogon.dev/c/monogon/+/999
Reviewed-by: Mateusz Zalega <mateusz@monogon.tech>
Tested-by: Jenkins CI
diff --git a/metropolis/pkg/nvme/health.go b/metropolis/pkg/nvme/health.go
new file mode 100644
index 0000000..775742f
--- /dev/null
+++ b/metropolis/pkg/nvme/health.go
@@ -0,0 +1,196 @@
+package nvme
+
+import (
+ "bytes"
+ "encoding/binary"
+ "fmt"
+ "math/big"
+ "time"
+)
+
+// healthPage represents the raw data from a NVMe Health/SMART page.
+// See Figure 93 in the spec.
+type healthPage struct {
+ CriticalWarning uint8
+ CompositeTemperature uint16
+ AvailableSpare uint8
+ AvailableSpareThreshold uint8
+ PercentageUsed uint8
+
+ _ [26]byte
+
+ DataUnitsRead uint128le
+ DataUnitsWritten uint128le
+ HostReadCommands uint128le
+ HostWriteCommands uint128le
+ ControllerBusyTime uint128le
+ PowerCycles uint128le
+ PowerOnHours uint128le
+ UnsafeSHutdowns uint128le
+ MediaAndDataIntegrityErrors uint128le
+ ErrorInformationLogEntries uint128le
+
+ WarningCompositeTemperatureTime uint32
+ CriticalCompositeTemperatureTime uint32
+
+ TemperatureSensors [8]uint16
+
+ ThermalMgmtTemperature1TransitionCount uint32
+ ThermalMgmtTemperature2TransitionCount uint32
+
+ _ [8]byte
+
+ TotalTimeForThermalMgmtTemperature1 uint32
+ TotalTimeForThermalMgmtTemperature2 uint32
+}
+
+// HealthInfo contains information related to the health of the NVMe device.
+//
+// Note that some values might be clamped under highly abnormal circumstances
+// as they are reported as 128-bit integers which Go doesn't support.
+// For easier handling values which are very unlikely to exceed 64 bits are
+// exposed as 64 bit integers.
+type HealthInfo struct {
+ // AvailableSpareSpaceCritical is set if the avilable spare threshold has
+ // fallen below the critical threshold.
+ AvailableSpareSpaceCritical bool
+ // TemperatureCritical is set if a temperature is outside the acceptable
+ // operating thresholds.
+ TemperatureCritical bool
+ // MediaCritical is set if significant media or internal issues affect the
+ // operation of the device.
+ MediaCritical bool
+ // ForcedReadOnly is set if the device is forced into read-only mode due
+ // to an error.
+ ForcedReadOnly bool
+ // VolatileMemoryBackupFailed is set if the volatile memory backup device
+ // has failed.
+ VolatileMemoryBackupFailed bool
+ // CompositeTemperatureKelvin contains a derived value representing the
+ // composite state of controller and namespace/flash temperature.
+ // The exact mechanism used to derive it is vendor-specific.
+ CompositeTemperatureKelvin uint16
+ // AvailableSpare represents the relative amount (0-1) of spare capacity
+ // still unnused.
+ AvailableSpare float32
+ // AvailableSpareThreshold represents the vendor-defined threshold which
+ // AvailableSpare shuld not fall under.
+ AvailableSpareThreshold float32
+ // LifeUsed represents vendor-defined relative estimate of the life of
+ // the device which has been used up. It is allowed to exceed 1 and will
+ // be clamped by the device somewhere between 1.0 and 2.55.
+ LifeUsed float32
+ // BytesRead contains the number of bytes read from the device.
+ // This value is only updated in 512KiB increments.
+ BytesRead *big.Int
+ // BytesWritten contains the number of bytes written to the device.
+ // This value is only updated in 512KiB increments.
+ BytesWritten *big.Int
+ // HostReadCommands contains the number of read commands completed by the
+ // controller.
+ HostReadCommands *big.Int
+ // HostWriteCommands contains the number of write commands completed by the
+ // controller.
+ HostWriteCommands *big.Int
+ // ControllerBusyTime contains the cumulative amount of time the controller
+ // has spent being busy (i.e. having at least one command outstanding on an
+ // I/O queue). This value is only updated in 1m increments.
+ ControllerBusyTime time.Duration
+ // PowerCycles contains the number of power cycles.
+ PowerCycles uint64
+ // PowerOnHours contains the number of hours the controller has been
+ // powered on. Depending on the vendor implementation it may or may
+ // not contain time spent in a non-operational power state.
+ PowerOnHours uint64
+ // UnsafeShutdown contains the number of power loss events without
+ // a prior shutdown notification from the host.
+ UnsafeShutdowns uint64
+ // MediaAndDataIntegrityErrors contains the number of occurrences where the
+ // controller detecte an unrecovered data integrity error.
+ MediaAndDataIntegrityErrors uint64
+ // ErrorInformationLogEntriesCount contains the number of Error
+ // Information log entries over the life of the controller.
+ ErrorInformationLogEntriesCount uint64
+ // WarningCompositeTemperatureTime contains the amount of time the
+ // controller is operational while the composite temperature is greater
+ // than the warning composite threshold.
+ WarningCompositeTemperatureTime time.Duration
+ // CriticalCompositeTemperatureTime contains the amount of time the
+ // controller is operational while the composite temperature is greater
+ // than the critical composite threshold.
+ CriticalCompositeTemperatureTime time.Duration
+ // TemperatureSensorValues contains the current temperature in Kelvin as
+ // reported by up to 8 sensors on the device. A value of zero means that
+ // the given sensor is not available.
+ TemperatureSensorValues [8]uint16
+ // ThermalMgmtTemperature1TransitionCount contains the number of times the
+ // controller transitioned to lower power active power states or performed
+ // vendor-specific thermal management actions to reduce temperature.
+ ThermalMgmtTemperature1TransitionCount uint32
+ // ThermalMgmtTemperature2TransitionCount is the same as above, but
+ // for "heavier" thermal management actions including heavy throttling.
+ // The actual difference is vendor-specific.
+ ThermalMgmtTemperature2TransitionCount uint32
+ // TotalTimeForThermalMgmtTemperature1 contains the total time the
+ // controller spent under "light" thermal management.
+ TotalTimeForThermalMgmtTemperature1 time.Duration
+ // TotalTimeForThermalMgmtTemperature2 contains the total time the
+ // controller spent under "heavy" thermal management.
+ TotalTimeForThermalMgmtTemperature2 time.Duration
+}
+
+// HasCriticalWarning returns true if any of the critical warnings
+// (AvailableSpareSpaceCritical, TemperatureCritical, MediaCritical,
+// ForcedReadOnly, VolatileMemoryBackupFailed) are active.
+// If this returns true the NVMe medium has reason to believe that
+// data availability or integrity is endangered.
+func (h *HealthInfo) HasCriticalWarning() bool {
+ return h.AvailableSpareSpaceCritical || h.TemperatureCritical || h.MediaCritical || h.ForcedReadOnly || h.VolatileMemoryBackupFailed
+}
+
+// See Figure 93 Data Units Read
+var dataUnit = big.NewInt(512 * 1000)
+
+const (
+ healthLogPage = 0x02
+)
+
+// GetHealthInfo gets health information from the NVMe device's health log page.
+func (d *Device) GetHealthInfo() (*HealthInfo, error) {
+ var buf [512]byte
+
+ if err := d.GetLogPage(GlobalNamespace, healthLogPage, 0, 0, buf[:]); err != nil {
+ return nil, fmt.Errorf("unable to get health log page: %w", err)
+ }
+
+ var page healthPage
+ binary.Read(bytes.NewReader(buf[:]), binary.LittleEndian, &page)
+ var res HealthInfo
+ res.AvailableSpareSpaceCritical = page.CriticalWarning&(1<<0) != 0
+ res.TemperatureCritical = page.CriticalWarning&(1<<1) != 0
+ res.MediaCritical = page.CriticalWarning&(1<<2) != 0
+ res.ForcedReadOnly = page.CriticalWarning&(1<<3) != 0
+ res.VolatileMemoryBackupFailed = page.CriticalWarning&(1<<4) != 0
+ res.CompositeTemperatureKelvin = page.CompositeTemperature
+ res.AvailableSpare = float32(page.AvailableSpare) / 100.
+ res.AvailableSpareThreshold = float32(page.AvailableSpareThreshold) / 100.
+ res.LifeUsed = float32(page.PercentageUsed) / 100.
+ res.BytesRead = new(big.Int).Mul(page.DataUnitsRead.BigInt(), dataUnit)
+ res.BytesWritten = new(big.Int).Mul(page.DataUnitsWritten.BigInt(), dataUnit)
+ res.HostReadCommands = page.HostReadCommands.BigInt()
+ res.HostWriteCommands = page.HostWriteCommands.BigInt()
+ res.ControllerBusyTime = time.Duration(page.ControllerBusyTime.Uint64()) * time.Minute
+ res.PowerCycles = page.PowerCycles.Uint64()
+ res.PowerOnHours = page.PowerOnHours.Uint64()
+ res.UnsafeShutdowns = page.UnsafeSHutdowns.Uint64()
+ res.MediaAndDataIntegrityErrors = page.MediaAndDataIntegrityErrors.Uint64()
+ res.ErrorInformationLogEntriesCount = page.ErrorInformationLogEntries.Uint64()
+ res.WarningCompositeTemperatureTime = time.Duration(page.WarningCompositeTemperatureTime) * time.Minute
+ res.CriticalCompositeTemperatureTime = time.Duration(page.CriticalCompositeTemperatureTime) * time.Minute
+ res.TemperatureSensorValues = page.TemperatureSensors
+ res.ThermalMgmtTemperature1TransitionCount = page.ThermalMgmtTemperature1TransitionCount
+ res.ThermalMgmtTemperature2TransitionCount = page.ThermalMgmtTemperature2TransitionCount
+ res.TotalTimeForThermalMgmtTemperature1 = time.Duration(page.TotalTimeForThermalMgmtTemperature1) * time.Second
+ res.TotalTimeForThermalMgmtTemperature2 = time.Duration(page.TotalTimeForThermalMgmtTemperature2) * time.Second
+ return &res, nil
+}