blob: a8e6a8a511e3a86c1cc03fa921f014c25846fae8 [file] [log] [blame]
//go:build linux
package blockdev
import (
"errors"
"fmt"
"math/bits"
"os"
"syscall"
"unsafe"
"golang.org/x/sys/unix"
)
type Device struct {
backend *os.File
rawConn syscall.RawConn
blockSize int64
blockCount int64
}
func (d *Device) ReadAt(p []byte, off int64) (n int, err error) {
return d.backend.ReadAt(p, off)
}
func (d *Device) WriteAt(p []byte, off int64) (n int, err error) {
return d.backend.WriteAt(p, off)
}
func (d *Device) Close() error {
return d.backend.Close()
}
func (d *Device) BlockCount() int64 {
return d.blockCount
}
func (d *Device) BlockSize() int64 {
return d.blockSize
}
func (d *Device) Discard(startByte int64, endByte int64) error {
var args [2]uint64
var err unix.Errno
args[0] = uint64(startByte)
args[1] = uint64(endByte)
if ctrlErr := d.rawConn.Control(func(fd uintptr) {
_, _, err = unix.Syscall(unix.SYS_IOCTL, fd, unix.BLKDISCARD, uintptr(unsafe.Pointer(&args[0])))
}); ctrlErr != nil {
return ctrlErr
}
if err == unix.EOPNOTSUPP {
return ErrUnsupported
}
if err != unix.Errno(0) {
return fmt.Errorf("failed to discard: %w", err)
}
return nil
}
func (d *Device) OptimalBlockSize() int64 {
return d.blockSize
}
func (d *Device) Zero(startByte int64, endByte int64) error {
var args [2]uint64
var err error
args[0] = uint64(startByte)
args[1] = uint64(endByte)
if ctrlErr := d.rawConn.Control(func(fd uintptr) {
// Attempts to leverage discard guarantees to provide extremely quick
// metadata-only zeroing.
err = unix.Fallocate(int(fd), unix.FALLOC_FL_PUNCH_HOLE|unix.FALLOC_FL_KEEP_SIZE, startByte, endByte-startByte)
if err == unix.EOPNOTSUPP {
// Tries Write Same and friends and then just falls back to writing
// zeroes.
_, _, err = unix.Syscall(unix.SYS_IOCTL, fd, unix.BLKZEROOUT, uintptr(unsafe.Pointer(&args[0])))
if err == unix.Errno(0) {
err = nil
}
}
}); ctrlErr != nil {
return ctrlErr
}
if err != nil {
return fmt.Errorf("failed to zero out: %w", err)
}
return nil
}
// RefreshPartitionTable refreshes the kernel's view of the partition table
// after changes made from userspace.
func (d *Device) RefreshPartitionTable() error {
var err unix.Errno
if ctrlErr := d.rawConn.Control(func(fd uintptr) {
_, _, err = unix.Syscall(unix.SYS_IOCTL, fd, unix.BLKRRPART, 0)
}); ctrlErr != nil {
return ctrlErr
}
if err != unix.Errno(0) {
return fmt.Errorf("ioctl(BLKRRPART): %w", err)
}
return nil
}
// Open opens a block device given a path to its inode.
// TODO: exclusive, O_DIRECT
func Open(path string) (*Device, error) {
outFile, err := os.OpenFile(path, os.O_RDWR, 0640)
if err != nil {
return nil, fmt.Errorf("failed to open block device: %w", err)
}
return FromFileHandle(outFile)
}
// FromFileHandle creates a blockdev from a device handle. The device handle is
// not duplicated, closing the returned Device will close it. If the handle is
// not a block device, i.e does not implement block device ioctls, an error is
// returned.
func FromFileHandle(handle *os.File) (*Device, error) {
outFileC, err := handle.SyscallConn()
if err != nil {
return nil, fmt.Errorf("error getting SyscallConn: %w", err)
}
var blockSize uint32
outFileC.Control(func(fd uintptr) {
blockSize, err = unix.IoctlGetUint32(int(fd), unix.BLKSSZGET)
})
if errors.Is(err, unix.ENOTTY) || errors.Is(err, unix.EINVAL) {
return nil, ErrNotBlockDevice
} else if err != nil {
return nil, fmt.Errorf("when querying disk block size: %w", err)
}
var sizeBytes uint64
var getSizeErr error
outFileC.Control(func(fd uintptr) {
_, _, getSizeErr = unix.Syscall(unix.SYS_IOCTL, fd, unix.BLKGETSIZE64, uintptr(unsafe.Pointer(&sizeBytes)))
})
if getSizeErr != unix.Errno(0) {
return nil, fmt.Errorf("when querying disk block count: %w", err)
}
if sizeBytes%uint64(blockSize) != 0 {
return nil, fmt.Errorf("block device size is not an integer multiple of its block size (%d %% %d = %d)", sizeBytes, blockSize, sizeBytes%uint64(blockSize))
}
return &Device{
backend: handle,
rawConn: outFileC,
blockSize: int64(blockSize),
blockCount: int64(sizeBytes) / int64(blockSize),
}, nil
}
type File struct {
backend *os.File
rawConn syscall.RawConn
blockSize int64
blockCount int64
}
func CreateFile(name string, blockSize int64, blockCount int64) (*File, error) {
if blockSize < 512 {
return nil, fmt.Errorf("blockSize must be bigger than 512 bytes")
}
if bits.OnesCount64(uint64(blockSize)) != 1 {
return nil, fmt.Errorf("blockSize must be a power of two")
}
out, err := os.Create(name)
if err != nil {
return nil, fmt.Errorf("when creating backing file: %w", err)
}
rawConn, err := out.SyscallConn()
if err != nil {
return nil, fmt.Errorf("unable to get SyscallConn: %w", err)
}
return &File{
backend: out,
blockSize: blockSize,
rawConn: rawConn,
blockCount: blockCount,
}, nil
}
func (d *File) ReadAt(p []byte, off int64) (n int, err error) {
return d.backend.ReadAt(p, off)
}
func (d *File) WriteAt(p []byte, off int64) (n int, err error) {
return d.backend.WriteAt(p, off)
}
func (d *File) Close() error {
return d.backend.Close()
}
func (d *File) BlockCount() int64 {
return d.blockCount
}
func (d *File) BlockSize() int64 {
return d.blockSize
}
func (d *File) Discard(startByte int64, endByte int64) error {
var err error
if ctrlErr := d.rawConn.Control(func(fd uintptr) {
// There is FALLOC_FL_NO_HIDE_STALE, but it's not implemented by
// any filesystem right now, so let's not attempt it for the time being.
err = unix.Fallocate(int(fd), unix.FALLOC_FL_PUNCH_HOLE|unix.FALLOC_FL_KEEP_SIZE, startByte, endByte-startByte)
}); ctrlErr != nil {
return ctrlErr
}
if errors.Is(err, unix.EOPNOTSUPP) {
return ErrUnsupported
}
if err != unix.Errno(0) {
return fmt.Errorf("failed to discard: %w", err)
}
return nil
}
func (d *File) OptimalBlockSize() int64 {
return d.blockSize
}
func (d *File) Zero(startByte int64, endByte int64) error {
var err error
if ctrlErr := d.rawConn.Control(func(fd uintptr) {
// Tell the filesystem to punch out the given blocks.
err = unix.Fallocate(int(fd), unix.FALLOC_FL_PUNCH_HOLE|unix.FALLOC_FL_KEEP_SIZE, startByte, endByte-startByte)
}); ctrlErr != nil {
return ctrlErr
}
// If unsupported or the syscall is not available (for example in a sandbox)
// fall back to the generic software implementation.
if errors.Is(err, unix.EOPNOTSUPP) || errors.Is(err, unix.ENOSYS) {
return GenericZero(d, startByte, endByte)
}
if err != nil {
return fmt.Errorf("failed to zero out: %w", err)
}
return nil
}