blob: c5fa7842acf3309d6317c56729d3e646d2fe1d0b [file] [log] [blame]
Lorenz Brun1e0e3a42023-06-28 16:40:18 +02001//go:build linux
2
3package blockdev
4
5import (
6 "errors"
7 "fmt"
8 "math/bits"
9 "os"
10 "syscall"
11 "unsafe"
12
13 "golang.org/x/sys/unix"
14)
15
16type Device struct {
17 backend *os.File
18 rawConn syscall.RawConn
19 blockSize int64
20 blockCount int64
21}
22
23func (d *Device) ReadAt(p []byte, off int64) (n int, err error) {
24 return d.backend.ReadAt(p, off)
25}
26
27func (d *Device) WriteAt(p []byte, off int64) (n int, err error) {
28 return d.backend.WriteAt(p, off)
29}
30
31func (d *Device) Close() error {
32 return d.backend.Close()
33}
34
35func (d *Device) BlockCount() int64 {
36 return d.blockCount
37}
38
39func (d *Device) BlockSize() int64 {
40 return d.blockSize
41}
42
43func (d *Device) Discard(startByte int64, endByte int64) error {
44 var args [2]uint64
45 var err unix.Errno
46 args[0] = uint64(startByte)
Jan Schär0ea961c2024-04-11 13:41:40 +020047 args[1] = uint64(endByte - startByte)
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020048 if ctrlErr := d.rawConn.Control(func(fd uintptr) {
49 _, _, err = unix.Syscall(unix.SYS_IOCTL, fd, unix.BLKDISCARD, uintptr(unsafe.Pointer(&args[0])))
50 }); ctrlErr != nil {
51 return ctrlErr
52 }
53 if err == unix.EOPNOTSUPP {
Lorenz Brun65b1c682023-09-14 15:49:39 +020054 return errors.ErrUnsupported
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020055 }
56 if err != unix.Errno(0) {
57 return fmt.Errorf("failed to discard: %w", err)
58 }
59 return nil
60}
61
62func (d *Device) OptimalBlockSize() int64 {
63 return d.blockSize
64}
65
66func (d *Device) Zero(startByte int64, endByte int64) error {
67 var args [2]uint64
68 var err error
69 args[0] = uint64(startByte)
Jan Schär0ea961c2024-04-11 13:41:40 +020070 args[1] = uint64(endByte - startByte)
Tim Windelschmidt06c19642024-04-23 15:07:40 +020071 ctrlErr := d.rawConn.Control(func(fd uintptr) {
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020072 // Attempts to leverage discard guarantees to provide extremely quick
73 // metadata-only zeroing.
74 err = unix.Fallocate(int(fd), unix.FALLOC_FL_PUNCH_HOLE|unix.FALLOC_FL_KEEP_SIZE, startByte, endByte-startByte)
Tim Windelschmidtd5f851b2024-04-23 14:59:37 +020075 if errors.Is(err, unix.EOPNOTSUPP) {
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020076 // Tries Write Same and friends and then just falls back to writing
77 // zeroes.
Tim Windelschmidt06c19642024-04-23 15:07:40 +020078 _, _, errNo := unix.Syscall(unix.SYS_IOCTL, fd, unix.BLKZEROOUT, uintptr(unsafe.Pointer(&args[0])))
79 if errNo == unix.Errno(0) {
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020080 err = nil
Tim Windelschmidt06c19642024-04-23 15:07:40 +020081 } else {
82 err = errNo
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020083 }
84 }
Tim Windelschmidt06c19642024-04-23 15:07:40 +020085 })
86 if ctrlErr != nil {
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020087 return ctrlErr
88 }
89 if err != nil {
90 return fmt.Errorf("failed to zero out: %w", err)
91 }
92 return nil
93}
94
95// RefreshPartitionTable refreshes the kernel's view of the partition table
96// after changes made from userspace.
97func (d *Device) RefreshPartitionTable() error {
98 var err unix.Errno
99 if ctrlErr := d.rawConn.Control(func(fd uintptr) {
100 _, _, err = unix.Syscall(unix.SYS_IOCTL, fd, unix.BLKRRPART, 0)
101 }); ctrlErr != nil {
102 return ctrlErr
103 }
104 if err != unix.Errno(0) {
105 return fmt.Errorf("ioctl(BLKRRPART): %w", err)
106 }
107 return nil
108}
109
110// Open opens a block device given a path to its inode.
111// TODO: exclusive, O_DIRECT
112func Open(path string) (*Device, error) {
113 outFile, err := os.OpenFile(path, os.O_RDWR, 0640)
114 if err != nil {
115 return nil, fmt.Errorf("failed to open block device: %w", err)
116 }
117 return FromFileHandle(outFile)
118}
119
120// FromFileHandle creates a blockdev from a device handle. The device handle is
121// not duplicated, closing the returned Device will close it. If the handle is
122// not a block device, i.e does not implement block device ioctls, an error is
123// returned.
124func FromFileHandle(handle *os.File) (*Device, error) {
125 outFileC, err := handle.SyscallConn()
126 if err != nil {
127 return nil, fmt.Errorf("error getting SyscallConn: %w", err)
128 }
129 var blockSize uint32
130 outFileC.Control(func(fd uintptr) {
131 blockSize, err = unix.IoctlGetUint32(int(fd), unix.BLKSSZGET)
132 })
133 if errors.Is(err, unix.ENOTTY) || errors.Is(err, unix.EINVAL) {
134 return nil, ErrNotBlockDevice
135 } else if err != nil {
136 return nil, fmt.Errorf("when querying disk block size: %w", err)
137 }
138
139 var sizeBytes uint64
Tim Windelschmidt06c19642024-04-23 15:07:40 +0200140 var getSizeErr syscall.Errno
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200141 outFileC.Control(func(fd uintptr) {
142 _, _, getSizeErr = unix.Syscall(unix.SYS_IOCTL, fd, unix.BLKGETSIZE64, uintptr(unsafe.Pointer(&sizeBytes)))
143 })
144
145 if getSizeErr != unix.Errno(0) {
146 return nil, fmt.Errorf("when querying disk block count: %w", err)
147 }
148 if sizeBytes%uint64(blockSize) != 0 {
149 return nil, fmt.Errorf("block device size is not an integer multiple of its block size (%d %% %d = %d)", sizeBytes, blockSize, sizeBytes%uint64(blockSize))
150 }
151 return &Device{
152 backend: handle,
153 rawConn: outFileC,
154 blockSize: int64(blockSize),
155 blockCount: int64(sizeBytes) / int64(blockSize),
156 }, nil
157}
158
159type File struct {
160 backend *os.File
161 rawConn syscall.RawConn
162 blockSize int64
163 blockCount int64
164}
165
166func CreateFile(name string, blockSize int64, blockCount int64) (*File, error) {
167 if blockSize < 512 {
168 return nil, fmt.Errorf("blockSize must be bigger than 512 bytes")
169 }
170 if bits.OnesCount64(uint64(blockSize)) != 1 {
171 return nil, fmt.Errorf("blockSize must be a power of two")
172 }
173 out, err := os.Create(name)
174 if err != nil {
175 return nil, fmt.Errorf("when creating backing file: %w", err)
176 }
177 rawConn, err := out.SyscallConn()
178 if err != nil {
179 return nil, fmt.Errorf("unable to get SyscallConn: %w", err)
180 }
181 return &File{
182 backend: out,
183 blockSize: blockSize,
184 rawConn: rawConn,
185 blockCount: blockCount,
186 }, nil
187}
188
189func (d *File) ReadAt(p []byte, off int64) (n int, err error) {
190 return d.backend.ReadAt(p, off)
191}
192
193func (d *File) WriteAt(p []byte, off int64) (n int, err error) {
194 return d.backend.WriteAt(p, off)
195}
196
197func (d *File) Close() error {
198 return d.backend.Close()
199}
200
201func (d *File) BlockCount() int64 {
202 return d.blockCount
203}
204
205func (d *File) BlockSize() int64 {
206 return d.blockSize
207}
208
209func (d *File) Discard(startByte int64, endByte int64) error {
210 var err error
211 if ctrlErr := d.rawConn.Control(func(fd uintptr) {
212 // There is FALLOC_FL_NO_HIDE_STALE, but it's not implemented by
213 // any filesystem right now, so let's not attempt it for the time being.
214 err = unix.Fallocate(int(fd), unix.FALLOC_FL_PUNCH_HOLE|unix.FALLOC_FL_KEEP_SIZE, startByte, endByte-startByte)
215 }); ctrlErr != nil {
216 return ctrlErr
217 }
218 if errors.Is(err, unix.EOPNOTSUPP) {
Lorenz Brun65b1c682023-09-14 15:49:39 +0200219 return errors.ErrUnsupported
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200220 }
Jan Schär0ea961c2024-04-11 13:41:40 +0200221 if err != nil {
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200222 return fmt.Errorf("failed to discard: %w", err)
223 }
224 return nil
225}
226
227func (d *File) OptimalBlockSize() int64 {
228 return d.blockSize
229}
230
231func (d *File) Zero(startByte int64, endByte int64) error {
232 var err error
233 if ctrlErr := d.rawConn.Control(func(fd uintptr) {
234 // Tell the filesystem to punch out the given blocks.
235 err = unix.Fallocate(int(fd), unix.FALLOC_FL_PUNCH_HOLE|unix.FALLOC_FL_KEEP_SIZE, startByte, endByte-startByte)
236 }); ctrlErr != nil {
237 return ctrlErr
238 }
239 // If unsupported or the syscall is not available (for example in a sandbox)
240 // fall back to the generic software implementation.
241 if errors.Is(err, unix.EOPNOTSUPP) || errors.Is(err, unix.ENOSYS) {
242 return GenericZero(d, startByte, endByte)
243 }
244 if err != nil {
245 return fmt.Errorf("failed to zero out: %w", err)
246 }
247 return nil
248}