| package nvme | 
 |  | 
 | import ( | 
 | 	"bytes" | 
 | 	"encoding/binary" | 
 | 	"fmt" | 
 | 	"math/big" | 
 | 	"time" | 
 | ) | 
 |  | 
 | // healthPage represents the raw data from a NVMe Health/SMART page. | 
 | // See Figure 93 in the spec. | 
 | type healthPage struct { | 
 | 	CriticalWarning         uint8 | 
 | 	CompositeTemperature    uint16 | 
 | 	AvailableSpare          uint8 | 
 | 	AvailableSpareThreshold uint8 | 
 | 	PercentageUsed          uint8 | 
 |  | 
 | 	_ [26]byte | 
 |  | 
 | 	DataUnitsRead               uint128le | 
 | 	DataUnitsWritten            uint128le | 
 | 	HostReadCommands            uint128le | 
 | 	HostWriteCommands           uint128le | 
 | 	ControllerBusyTime          uint128le | 
 | 	PowerCycles                 uint128le | 
 | 	PowerOnHours                uint128le | 
 | 	UnsafeSHutdowns             uint128le | 
 | 	MediaAndDataIntegrityErrors uint128le | 
 | 	ErrorInformationLogEntries  uint128le | 
 |  | 
 | 	WarningCompositeTemperatureTime  uint32 | 
 | 	CriticalCompositeTemperatureTime uint32 | 
 |  | 
 | 	TemperatureSensors [8]uint16 | 
 |  | 
 | 	ThermalMgmtTemperature1TransitionCount uint32 | 
 | 	ThermalMgmtTemperature2TransitionCount uint32 | 
 |  | 
 | 	_ [8]byte | 
 |  | 
 | 	TotalTimeForThermalMgmtTemperature1 uint32 | 
 | 	TotalTimeForThermalMgmtTemperature2 uint32 | 
 | } | 
 |  | 
 | // HealthInfo contains information related to the health of the NVMe device. | 
 | // | 
 | // Note that some values might be clamped under highly abnormal circumstances | 
 | // as they are reported as 128-bit integers which Go doesn't support. | 
 | // For easier handling values which are very unlikely to exceed 64 bits are | 
 | // exposed as 64 bit integers. | 
 | type HealthInfo struct { | 
 | 	// AvailableSpareSpaceCritical is set if the avilable spare threshold has | 
 | 	// fallen below the critical threshold. | 
 | 	AvailableSpareSpaceCritical bool | 
 | 	// TemperatureCritical is set if a temperature is outside the acceptable | 
 | 	// operating thresholds. | 
 | 	TemperatureCritical bool | 
 | 	// MediaCritical is set if significant media or internal issues affect the | 
 | 	// operation of the device. | 
 | 	MediaCritical bool | 
 | 	// ForcedReadOnly is set if the device is forced into read-only mode due | 
 | 	// to an error. | 
 | 	ForcedReadOnly bool | 
 | 	// VolatileMemoryBackupFailed is set if the volatile memory backup device | 
 | 	// has failed. | 
 | 	VolatileMemoryBackupFailed bool | 
 | 	// CompositeTemperatureKelvin contains a derived value representing the | 
 | 	// composite state of controller and namespace/flash temperature. | 
 | 	// The exact mechanism used to derive it is vendor-specific. | 
 | 	CompositeTemperatureKelvin uint16 | 
 | 	// AvailableSpare represents the relative amount (0-1) of spare capacity | 
 | 	// still unnused. | 
 | 	AvailableSpare float32 | 
 | 	// AvailableSpareThreshold represents the vendor-defined threshold which | 
 | 	// AvailableSpare shuld not fall under. | 
 | 	AvailableSpareThreshold float32 | 
 | 	// LifeUsed represents vendor-defined relative estimate of the life of | 
 | 	// the device which has been used up. It is allowed to exceed 1 and will | 
 | 	// be clamped by the device somewhere between 1.0 and 2.55. | 
 | 	LifeUsed float32 | 
 | 	// BytesRead contains the number of bytes read from the device. | 
 | 	// This value is only updated in 512KiB increments. | 
 | 	BytesRead *big.Int | 
 | 	// BytesWritten contains the number of bytes written to the device. | 
 | 	// This value is only updated in 512KiB increments. | 
 | 	BytesWritten *big.Int | 
 | 	// HostReadCommands contains the number of read commands completed by the | 
 | 	// controller. | 
 | 	HostReadCommands *big.Int | 
 | 	// HostWriteCommands contains the number of write commands completed by the | 
 | 	// controller. | 
 | 	HostWriteCommands *big.Int | 
 | 	// ControllerBusyTime contains the cumulative amount of time the controller | 
 | 	// has spent being busy (i.e. having at least one command outstanding on an | 
 | 	// I/O queue). This value is only updated in 1m increments. | 
 | 	ControllerBusyTime time.Duration | 
 | 	// PowerCycles contains the number of power cycles. | 
 | 	PowerCycles uint64 | 
 | 	// PowerOnHours contains the number of hours the controller has been | 
 | 	// powered on. Depending on the vendor implementation it may or may | 
 | 	// not contain time spent in a non-operational power state. | 
 | 	PowerOnHours uint64 | 
 | 	// UnsafeShutdown contains the number of power loss events without | 
 | 	// a prior shutdown notification from the host. | 
 | 	UnsafeShutdowns uint64 | 
 | 	// MediaAndDataIntegrityErrors contains the number of occurrences where the | 
 | 	// controller detecte an unrecovered data integrity error. | 
 | 	MediaAndDataIntegrityErrors uint64 | 
 | 	// ErrorInformationLogEntriesCount contains the number of Error | 
 | 	// Information log entries over the life of the controller. | 
 | 	ErrorInformationLogEntriesCount uint64 | 
 | 	// WarningCompositeTemperatureTime contains the amount of time the | 
 | 	// controller is operational while the composite temperature is greater | 
 | 	// than the warning composite threshold. | 
 | 	WarningCompositeTemperatureTime time.Duration | 
 | 	// CriticalCompositeTemperatureTime contains the amount of time the | 
 | 	// controller is operational while the composite temperature is greater | 
 | 	// than the critical composite threshold. | 
 | 	CriticalCompositeTemperatureTime time.Duration | 
 | 	// TemperatureSensorValues contains the current temperature in Kelvin as | 
 | 	// reported by up to 8 sensors on the device. A value of zero means that | 
 | 	// the given sensor is not available. | 
 | 	TemperatureSensorValues [8]uint16 | 
 | 	// ThermalMgmtTemperature1TransitionCount contains the number of times the | 
 | 	// controller transitioned to lower power active power states or performed | 
 | 	// vendor-specific thermal management actions to reduce temperature. | 
 | 	ThermalMgmtTemperature1TransitionCount uint32 | 
 | 	// ThermalMgmtTemperature2TransitionCount is the same as above, but | 
 | 	// for "heavier" thermal management actions including heavy throttling. | 
 | 	// The actual difference is vendor-specific. | 
 | 	ThermalMgmtTemperature2TransitionCount uint32 | 
 | 	// TotalTimeForThermalMgmtTemperature1 contains the total time the | 
 | 	// controller spent under "light" thermal management. | 
 | 	TotalTimeForThermalMgmtTemperature1 time.Duration | 
 | 	// TotalTimeForThermalMgmtTemperature2 contains the total time the | 
 | 	// controller spent under "heavy" thermal management. | 
 | 	TotalTimeForThermalMgmtTemperature2 time.Duration | 
 | } | 
 |  | 
 | // HasCriticalWarning returns true if any of the critical warnings | 
 | // (AvailableSpareSpaceCritical, TemperatureCritical, MediaCritical, | 
 | // ForcedReadOnly, VolatileMemoryBackupFailed) are active. | 
 | // If this returns true the NVMe medium has reason to believe that | 
 | // data availability or integrity is endangered. | 
 | func (h *HealthInfo) HasCriticalWarning() bool { | 
 | 	return h.AvailableSpareSpaceCritical || h.TemperatureCritical || h.MediaCritical || h.ForcedReadOnly || h.VolatileMemoryBackupFailed | 
 | } | 
 |  | 
 | // See Figure 93 Data Units Read | 
 | var dataUnit = big.NewInt(512 * 1000) | 
 |  | 
 | const ( | 
 | 	healthLogPage = 0x02 | 
 | ) | 
 |  | 
 | // GetHealthInfo gets health information from the NVMe device's health log page. | 
 | func (d *Device) GetHealthInfo() (*HealthInfo, error) { | 
 | 	var buf [512]byte | 
 |  | 
 | 	if err := d.GetLogPage(GlobalNamespace, healthLogPage, 0, 0, buf[:]); err != nil { | 
 | 		return nil, fmt.Errorf("unable to get health log page: %w", err) | 
 | 	} | 
 |  | 
 | 	var page healthPage | 
 | 	binary.Read(bytes.NewReader(buf[:]), binary.LittleEndian, &page) | 
 | 	var res HealthInfo | 
 | 	res.AvailableSpareSpaceCritical = page.CriticalWarning&(1<<0) != 0 | 
 | 	res.TemperatureCritical = page.CriticalWarning&(1<<1) != 0 | 
 | 	res.MediaCritical = page.CriticalWarning&(1<<2) != 0 | 
 | 	res.ForcedReadOnly = page.CriticalWarning&(1<<3) != 0 | 
 | 	res.VolatileMemoryBackupFailed = page.CriticalWarning&(1<<4) != 0 | 
 | 	res.CompositeTemperatureKelvin = page.CompositeTemperature | 
 | 	res.AvailableSpare = float32(page.AvailableSpare) / 100. | 
 | 	res.AvailableSpareThreshold = float32(page.AvailableSpareThreshold) / 100. | 
 | 	res.LifeUsed = float32(page.PercentageUsed) / 100. | 
 | 	res.BytesRead = new(big.Int).Mul(page.DataUnitsRead.BigInt(), dataUnit) | 
 | 	res.BytesWritten = new(big.Int).Mul(page.DataUnitsWritten.BigInt(), dataUnit) | 
 | 	res.HostReadCommands = page.HostReadCommands.BigInt() | 
 | 	res.HostWriteCommands = page.HostWriteCommands.BigInt() | 
 | 	res.ControllerBusyTime = time.Duration(page.ControllerBusyTime.Uint64()) * time.Minute | 
 | 	res.PowerCycles = page.PowerCycles.Uint64() | 
 | 	res.PowerOnHours = page.PowerOnHours.Uint64() | 
 | 	res.UnsafeShutdowns = page.UnsafeSHutdowns.Uint64() | 
 | 	res.MediaAndDataIntegrityErrors = page.MediaAndDataIntegrityErrors.Uint64() | 
 | 	res.ErrorInformationLogEntriesCount = page.ErrorInformationLogEntries.Uint64() | 
 | 	res.WarningCompositeTemperatureTime = time.Duration(page.WarningCompositeTemperatureTime) * time.Minute | 
 | 	res.CriticalCompositeTemperatureTime = time.Duration(page.CriticalCompositeTemperatureTime) * time.Minute | 
 | 	res.TemperatureSensorValues = page.TemperatureSensors | 
 | 	res.ThermalMgmtTemperature1TransitionCount = page.ThermalMgmtTemperature1TransitionCount | 
 | 	res.ThermalMgmtTemperature2TransitionCount = page.ThermalMgmtTemperature2TransitionCount | 
 | 	res.TotalTimeForThermalMgmtTemperature1 = time.Duration(page.TotalTimeForThermalMgmtTemperature1) * time.Second | 
 | 	res.TotalTimeForThermalMgmtTemperature2 = time.Duration(page.TotalTimeForThermalMgmtTemperature2) * time.Second | 
 | 	return &res, nil | 
 | } |