m/p/blockdev: init

Adds blockdev, a package providing a Go interface for generic block
devices as well as an implementation of it for Linux and auxiliary
types.

This will replace most ad-hoc block device handling in the monorepo.

Change-Id: I3a4e3b7c31a8344f7859210bbb4942977d1ad1d2
Reviewed-on: https://review.monogon.dev/c/monogon/+/1871
Tested-by: Jenkins CI
Reviewed-by: Serge Bazanski <serge@monogon.tech>
diff --git a/metropolis/pkg/blockdev/blockdev.go b/metropolis/pkg/blockdev/blockdev.go
new file mode 100644
index 0000000..1cb9551
--- /dev/null
+++ b/metropolis/pkg/blockdev/blockdev.go
@@ -0,0 +1,200 @@
+package blockdev
+
+import (
+	"errors"
+	"fmt"
+	"io"
+)
+
+// Replace with errors.ErrUnsupported once we migrate to Go 1.21
+var ErrUnsupported = errors.New("unsupported")
+
+var ErrNotBlockDevice = errors.New("not a block device")
+
+// BlockDev represents a generic block device made up of equally-sized blocks.
+// All offsets and intervals are expressed in bytes and must be aligned to
+// BlockSize and are recommended to be aligned to OptimalBlockSize if feasible.
+// Unless stated otherwise, intervals are inclusive-exclusive, i.e. the
+// start byte is included but the end byte is not.
+type BlockDev interface {
+	io.ReaderAt
+	io.WriterAt
+	// BlockSize returns the block size of the block device in bytes. This must
+	// be a power of two and is commonly (but not always) either 512 or 4096.
+	BlockSize() int64
+
+	// BlockCount returns the number of blocks on the block device or -1 if it
+	// is an image with an undefined size.
+	BlockCount() int64
+
+	// OptimalBlockSize returns the optimal block size in bytes for aligning
+	// to as well as issuing I/O. IO operations with block sizes below this
+	// one might incur read-write overhead. This is the larger of the physical
+	// block size and a device-reported value if available.
+	OptimalBlockSize() int64
+
+	// Discard discards a continuous set of blocks. Discarding means the
+	// underlying device gets notified that the data in these blocks is no
+	// longer needed. This can improve performance of the device device (as it
+	// no longer needs to preserve the unused data) as well as bulk erase
+	// operations. This command is advisory and not all implementations support
+	// it. The contents of discarded blocks are implementation-defined.
+	Discard(startByte int64, endByte int64) error
+
+	// Zero zeroes a continouous set of blocks. On certain implementations this
+	// can be significantly faster than just calling Write with zeroes.
+	Zero(startByte, endByte int64) error
+}
+
+func NewRWS(b BlockDev) *ReadWriteSeeker {
+	return &ReadWriteSeeker{b: b}
+}
+
+// ReadWriteSeeker provides an adapter implementing ReadWriteSeeker on top of
+// a blockdev.
+type ReadWriteSeeker struct {
+	b       BlockDev
+	currPos int64
+}
+
+func (s *ReadWriteSeeker) Read(p []byte) (n int, err error) {
+	n, err = s.b.ReadAt(p, s.currPos)
+	s.currPos += int64(n)
+	return
+}
+
+func (s *ReadWriteSeeker) Write(p []byte) (n int, err error) {
+	n, err = s.b.WriteAt(p, s.currPos)
+	s.currPos += int64(n)
+	return
+}
+
+func (s *ReadWriteSeeker) Seek(offset int64, whence int) (int64, error) {
+	switch whence {
+	case io.SeekCurrent:
+		s.currPos += offset
+	case io.SeekStart:
+		s.currPos = offset
+	case io.SeekEnd:
+		s.currPos = (s.b.BlockCount() * s.b.BlockSize()) - offset
+	}
+	return s.currPos, nil
+}
+
+var ErrOutOfBounds = errors.New("write out of bounds")
+
+// NewSection returns a new Section, implementing BlockDev over that subset
+// of blocks. The interval is inclusive-exclusive.
+func NewSection(b BlockDev, startBlock, endBlock int64) *Section {
+	return &Section{
+		b:          b,
+		startBlock: startBlock,
+		endBlock:   endBlock,
+	}
+}
+
+// Section implements BlockDev on a slice of another BlockDev given a startBlock
+// and endBlock.
+type Section struct {
+	b                    BlockDev
+	startBlock, endBlock int64
+}
+
+func (s *Section) ReadAt(p []byte, off int64) (n int, err error) {
+	bOff := off + (s.startBlock * s.b.BlockSize())
+	bytesToEnd := (s.endBlock * s.b.BlockSize()) - bOff
+	if bytesToEnd <= 0 {
+		return 0, io.EOF
+	}
+	if bytesToEnd < int64(len(p)) {
+		return s.b.ReadAt(p[:bytesToEnd], bOff)
+	}
+	return s.b.ReadAt(p, bOff)
+}
+
+func (s *Section) WriteAt(p []byte, off int64) (n int, err error) {
+	bOff := off + (s.startBlock * s.b.BlockSize())
+	bytesToEnd := (s.endBlock * s.b.BlockSize()) - bOff
+	if bytesToEnd <= 0 {
+		return 0, ErrOutOfBounds
+	}
+	if bytesToEnd < int64(len(p)) {
+		n, err := s.b.WriteAt(p[:bytesToEnd], off+(s.startBlock*s.b.BlockSize()))
+		if err != nil {
+			// If an error happened, prioritize that error
+			return n, err
+		}
+		// Otherwise, return ErrOutOfBounds as even short writes must return an
+		// error.
+		return n, ErrOutOfBounds
+	}
+	return s.b.WriteAt(p, off+(s.startBlock*s.b.BlockSize()))
+}
+
+func (s *Section) BlockCount() int64 {
+	return s.endBlock - s.startBlock
+}
+
+func (s *Section) BlockSize() int64 {
+	return s.b.BlockSize()
+}
+
+func (s *Section) inRange(startByte, endByte int64) error {
+	if startByte > endByte {
+		return fmt.Errorf("invalid range: startByte (%d) bigger than endByte (%d)", startByte, endByte)
+	}
+	sectionLen := s.BlockCount() * s.BlockSize()
+	if startByte >= sectionLen {
+		return fmt.Errorf("startByte (%d) out of range (%d)", startByte, sectionLen)
+	}
+	if endByte > sectionLen {
+		return fmt.Errorf("endBlock (%d) out of range (%d)", endByte, sectionLen)
+	}
+	return nil
+}
+
+func (s *Section) Discard(startByte, endByte int64) error {
+	if err := s.inRange(startByte, endByte); err != nil {
+		return err
+	}
+	return s.b.Discard(s.startBlock+startByte, s.startBlock+endByte)
+}
+
+func (s *Section) OptimalBlockSize() int64 {
+	return s.b.OptimalBlockSize()
+}
+
+func (s *Section) Zero(startByte, endByte int64) error {
+	if err := s.inRange(startByte, endByte); err != nil {
+		return err
+	}
+	return s.b.Zero(s.startBlock+startByte, s.startBlock+endByte)
+}
+
+// GenericZero implements software-based zeroing. This can be used to implement
+// Zero when no acceleration is available or desired.
+func GenericZero(b BlockDev, startByte, endByte int64) error {
+	if startByte%b.BlockSize() != 0 {
+		return fmt.Errorf("startByte (%d) needs to be aligned to block size (%d)", startByte, b.BlockSize())
+	}
+	if endByte%b.BlockSize() != 0 {
+		return fmt.Errorf("endByte (%d) needs to be aligned to block size (%d)", endByte, b.BlockSize())
+	}
+	// Choose buffer size close to 16MiB or the range to be zeroed, whatever
+	// is smaller.
+	bufSizeTarget := int64(16 * 1024 * 1024)
+	if endByte-startByte < bufSizeTarget {
+		bufSizeTarget = endByte - startByte
+	}
+	bufSize := (bufSizeTarget / b.BlockSize()) * b.BlockSize()
+	buf := make([]byte, bufSize)
+	for i := startByte; i < endByte; i += bufSize {
+		if endByte-i < bufSize {
+			buf = buf[:endByte-i]
+		}
+		if _, err := b.WriteAt(buf, i); err != nil {
+			return fmt.Errorf("while writing zeroes: %w", err)
+		}
+	}
+	return nil
+}