| //go:build linux | 
 |  | 
 | package blockdev | 
 |  | 
 | import ( | 
 | 	"errors" | 
 | 	"fmt" | 
 | 	"math/bits" | 
 | 	"os" | 
 | 	"syscall" | 
 | 	"unsafe" | 
 |  | 
 | 	"golang.org/x/sys/unix" | 
 | ) | 
 |  | 
 | type Device struct { | 
 | 	backend    *os.File | 
 | 	rawConn    syscall.RawConn | 
 | 	blockSize  int64 | 
 | 	blockCount int64 | 
 | } | 
 |  | 
 | func (d *Device) ReadAt(p []byte, off int64) (n int, err error) { | 
 | 	return d.backend.ReadAt(p, off) | 
 | } | 
 |  | 
 | func (d *Device) WriteAt(p []byte, off int64) (n int, err error) { | 
 | 	return d.backend.WriteAt(p, off) | 
 | } | 
 |  | 
 | func (d *Device) Close() error { | 
 | 	return d.backend.Close() | 
 | } | 
 |  | 
 | func (d *Device) BlockCount() int64 { | 
 | 	return d.blockCount | 
 | } | 
 |  | 
 | func (d *Device) BlockSize() int64 { | 
 | 	return d.blockSize | 
 | } | 
 |  | 
 | func (d *Device) Discard(startByte int64, endByte int64) error { | 
 | 	var args [2]uint64 | 
 | 	var err unix.Errno | 
 | 	args[0] = uint64(startByte) | 
 | 	args[1] = uint64(endByte) | 
 | 	if ctrlErr := d.rawConn.Control(func(fd uintptr) { | 
 | 		_, _, err = unix.Syscall(unix.SYS_IOCTL, fd, unix.BLKDISCARD, uintptr(unsafe.Pointer(&args[0]))) | 
 | 	}); ctrlErr != nil { | 
 | 		return ctrlErr | 
 | 	} | 
 | 	if err == unix.EOPNOTSUPP { | 
 | 		return errors.ErrUnsupported | 
 | 	} | 
 | 	if err != unix.Errno(0) { | 
 | 		return fmt.Errorf("failed to discard: %w", err) | 
 | 	} | 
 | 	return nil | 
 | } | 
 |  | 
 | func (d *Device) OptimalBlockSize() int64 { | 
 | 	return d.blockSize | 
 | } | 
 |  | 
 | func (d *Device) Zero(startByte int64, endByte int64) error { | 
 | 	var args [2]uint64 | 
 | 	var err error | 
 | 	args[0] = uint64(startByte) | 
 | 	args[1] = uint64(endByte) | 
 | 	if ctrlErr := d.rawConn.Control(func(fd uintptr) { | 
 | 		// Attempts to leverage discard guarantees to provide extremely quick | 
 | 		// metadata-only zeroing. | 
 | 		err = unix.Fallocate(int(fd), unix.FALLOC_FL_PUNCH_HOLE|unix.FALLOC_FL_KEEP_SIZE, startByte, endByte-startByte) | 
 | 		if err == unix.EOPNOTSUPP { | 
 | 			// Tries Write Same and friends and then just falls back to writing | 
 | 			// zeroes. | 
 | 			_, _, err = unix.Syscall(unix.SYS_IOCTL, fd, unix.BLKZEROOUT, uintptr(unsafe.Pointer(&args[0]))) | 
 | 			if err == unix.Errno(0) { | 
 | 				err = nil | 
 | 			} | 
 | 		} | 
 | 	}); ctrlErr != nil { | 
 | 		return ctrlErr | 
 | 	} | 
 | 	if err != nil { | 
 | 		return fmt.Errorf("failed to zero out: %w", err) | 
 | 	} | 
 | 	return nil | 
 | } | 
 |  | 
 | // RefreshPartitionTable refreshes the kernel's view of the partition table | 
 | // after changes made from userspace. | 
 | func (d *Device) RefreshPartitionTable() error { | 
 | 	var err unix.Errno | 
 | 	if ctrlErr := d.rawConn.Control(func(fd uintptr) { | 
 | 		_, _, err = unix.Syscall(unix.SYS_IOCTL, fd, unix.BLKRRPART, 0) | 
 | 	}); ctrlErr != nil { | 
 | 		return ctrlErr | 
 | 	} | 
 | 	if err != unix.Errno(0) { | 
 | 		return fmt.Errorf("ioctl(BLKRRPART): %w", err) | 
 | 	} | 
 | 	return nil | 
 | } | 
 |  | 
 | // Open opens a block device given a path to its inode. | 
 | // TODO: exclusive, O_DIRECT | 
 | func Open(path string) (*Device, error) { | 
 | 	outFile, err := os.OpenFile(path, os.O_RDWR, 0640) | 
 | 	if err != nil { | 
 | 		return nil, fmt.Errorf("failed to open block device: %w", err) | 
 | 	} | 
 | 	return FromFileHandle(outFile) | 
 | } | 
 |  | 
 | // FromFileHandle creates a blockdev from a device handle. The device handle is | 
 | // not duplicated, closing the returned Device will close it. If the handle is | 
 | // not a block device, i.e does not implement block device ioctls, an error is | 
 | // returned. | 
 | func FromFileHandle(handle *os.File) (*Device, error) { | 
 | 	outFileC, err := handle.SyscallConn() | 
 | 	if err != nil { | 
 | 		return nil, fmt.Errorf("error getting SyscallConn: %w", err) | 
 | 	} | 
 | 	var blockSize uint32 | 
 | 	outFileC.Control(func(fd uintptr) { | 
 | 		blockSize, err = unix.IoctlGetUint32(int(fd), unix.BLKSSZGET) | 
 | 	}) | 
 | 	if errors.Is(err, unix.ENOTTY) || errors.Is(err, unix.EINVAL) { | 
 | 		return nil, ErrNotBlockDevice | 
 | 	} else if err != nil { | 
 | 		return nil, fmt.Errorf("when querying disk block size: %w", err) | 
 | 	} | 
 |  | 
 | 	var sizeBytes uint64 | 
 | 	var getSizeErr error | 
 | 	outFileC.Control(func(fd uintptr) { | 
 | 		_, _, getSizeErr = unix.Syscall(unix.SYS_IOCTL, fd, unix.BLKGETSIZE64, uintptr(unsafe.Pointer(&sizeBytes))) | 
 | 	}) | 
 |  | 
 | 	if getSizeErr != unix.Errno(0) { | 
 | 		return nil, fmt.Errorf("when querying disk block count: %w", err) | 
 | 	} | 
 | 	if sizeBytes%uint64(blockSize) != 0 { | 
 | 		return nil, fmt.Errorf("block device size is not an integer multiple of its block size (%d %% %d = %d)", sizeBytes, blockSize, sizeBytes%uint64(blockSize)) | 
 | 	} | 
 | 	return &Device{ | 
 | 		backend:    handle, | 
 | 		rawConn:    outFileC, | 
 | 		blockSize:  int64(blockSize), | 
 | 		blockCount: int64(sizeBytes) / int64(blockSize), | 
 | 	}, nil | 
 | } | 
 |  | 
 | type File struct { | 
 | 	backend    *os.File | 
 | 	rawConn    syscall.RawConn | 
 | 	blockSize  int64 | 
 | 	blockCount int64 | 
 | } | 
 |  | 
 | func CreateFile(name string, blockSize int64, blockCount int64) (*File, error) { | 
 | 	if blockSize < 512 { | 
 | 		return nil, fmt.Errorf("blockSize must be bigger than 512 bytes") | 
 | 	} | 
 | 	if bits.OnesCount64(uint64(blockSize)) != 1 { | 
 | 		return nil, fmt.Errorf("blockSize must be a power of two") | 
 | 	} | 
 | 	out, err := os.Create(name) | 
 | 	if err != nil { | 
 | 		return nil, fmt.Errorf("when creating backing file: %w", err) | 
 | 	} | 
 | 	rawConn, err := out.SyscallConn() | 
 | 	if err != nil { | 
 | 		return nil, fmt.Errorf("unable to get SyscallConn: %w", err) | 
 | 	} | 
 | 	return &File{ | 
 | 		backend:    out, | 
 | 		blockSize:  blockSize, | 
 | 		rawConn:    rawConn, | 
 | 		blockCount: blockCount, | 
 | 	}, nil | 
 | } | 
 |  | 
 | func (d *File) ReadAt(p []byte, off int64) (n int, err error) { | 
 | 	return d.backend.ReadAt(p, off) | 
 | } | 
 |  | 
 | func (d *File) WriteAt(p []byte, off int64) (n int, err error) { | 
 | 	return d.backend.WriteAt(p, off) | 
 | } | 
 |  | 
 | func (d *File) Close() error { | 
 | 	return d.backend.Close() | 
 | } | 
 |  | 
 | func (d *File) BlockCount() int64 { | 
 | 	return d.blockCount | 
 | } | 
 |  | 
 | func (d *File) BlockSize() int64 { | 
 | 	return d.blockSize | 
 | } | 
 |  | 
 | func (d *File) Discard(startByte int64, endByte int64) error { | 
 | 	var err error | 
 | 	if ctrlErr := d.rawConn.Control(func(fd uintptr) { | 
 | 		// There is FALLOC_FL_NO_HIDE_STALE, but it's not implemented by | 
 | 		// any filesystem right now, so let's not attempt it for the time being. | 
 | 		err = unix.Fallocate(int(fd), unix.FALLOC_FL_PUNCH_HOLE|unix.FALLOC_FL_KEEP_SIZE, startByte, endByte-startByte) | 
 | 	}); ctrlErr != nil { | 
 | 		return ctrlErr | 
 | 	} | 
 | 	if errors.Is(err, unix.EOPNOTSUPP) { | 
 | 		return errors.ErrUnsupported | 
 | 	} | 
 | 	if err != unix.Errno(0) { | 
 | 		return fmt.Errorf("failed to discard: %w", err) | 
 | 	} | 
 | 	return nil | 
 | } | 
 |  | 
 | func (d *File) OptimalBlockSize() int64 { | 
 | 	return d.blockSize | 
 | } | 
 |  | 
 | func (d *File) Zero(startByte int64, endByte int64) error { | 
 | 	var err error | 
 | 	if ctrlErr := d.rawConn.Control(func(fd uintptr) { | 
 | 		// Tell the filesystem to punch out the given blocks. | 
 | 		err = unix.Fallocate(int(fd), unix.FALLOC_FL_PUNCH_HOLE|unix.FALLOC_FL_KEEP_SIZE, startByte, endByte-startByte) | 
 | 	}); ctrlErr != nil { | 
 | 		return ctrlErr | 
 | 	} | 
 | 	// If unsupported or the syscall is not available (for example in a sandbox) | 
 | 	// fall back to the generic software implementation. | 
 | 	if errors.Is(err, unix.EOPNOTSUPP) || errors.Is(err, unix.ENOSYS) { | 
 | 		return GenericZero(d, startByte, endByte) | 
 | 	} | 
 | 	if err != nil { | 
 | 		return fmt.Errorf("failed to zero out: %w", err) | 
 | 	} | 
 | 	return nil | 
 | } |