blob: 7bc31fd0517e38cc4c02cbc9df3af98e655aba1b [file] [log] [blame]
Lorenz Brun1e0e3a42023-06-28 16:40:18 +02001//go:build linux
2
3package blockdev
4
5import (
6 "errors"
7 "fmt"
Jan Schära6da1712024-08-21 15:12:11 +02008 "io"
Lorenz Brun1e0e3a42023-06-28 16:40:18 +02009 "math/bits"
10 "os"
Jan Schär5c82e0d2024-08-26 17:06:13 +020011 "runtime"
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020012 "syscall"
13 "unsafe"
14
15 "golang.org/x/sys/unix"
16)
17
18type Device struct {
19 backend *os.File
20 rawConn syscall.RawConn
21 blockSize int64
22 blockCount int64
23}
24
25func (d *Device) ReadAt(p []byte, off int64) (n int, err error) {
Jan Schära6da1712024-08-21 15:12:11 +020026 size := d.blockSize * d.blockCount
27 if off > size {
28 return 0, io.EOF
29 }
30 if int64(len(p)) > size-off {
31 n, err = d.backend.ReadAt(p[:size-off], off)
32 if err == nil {
33 err = io.EOF
34 }
35 return
36 }
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020037 return d.backend.ReadAt(p, off)
38}
39
40func (d *Device) WriteAt(p []byte, off int64) (n int, err error) {
Jan Schära6da1712024-08-21 15:12:11 +020041 size := d.blockSize * d.blockCount
42 if off > size {
43 return 0, ErrOutOfBounds
44 }
45 if int64(len(p)) > size-off {
46 n, err = d.backend.WriteAt(p[:size-off], off)
47 if err == nil {
48 err = ErrOutOfBounds
49 }
50 return
51 }
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020052 return d.backend.WriteAt(p, off)
53}
54
55func (d *Device) Close() error {
56 return d.backend.Close()
57}
58
59func (d *Device) BlockCount() int64 {
60 return d.blockCount
61}
62
63func (d *Device) BlockSize() int64 {
64 return d.blockSize
65}
66
Jan Schära6da1712024-08-21 15:12:11 +020067func (d *Device) OptimalBlockSize() int64 {
68 return d.blockSize
69}
70
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020071func (d *Device) Discard(startByte int64, endByte int64) error {
Jan Schära6da1712024-08-21 15:12:11 +020072 if err := validAlignedRange(d, startByte, endByte); err != nil {
73 return err
74 }
75 if startByte == endByte {
76 return nil
77 }
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020078 var args [2]uint64
79 var err unix.Errno
80 args[0] = uint64(startByte)
Jan Schär0ea961c2024-04-11 13:41:40 +020081 args[1] = uint64(endByte - startByte)
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020082 if ctrlErr := d.rawConn.Control(func(fd uintptr) {
83 _, _, err = unix.Syscall(unix.SYS_IOCTL, fd, unix.BLKDISCARD, uintptr(unsafe.Pointer(&args[0])))
84 }); ctrlErr != nil {
85 return ctrlErr
86 }
87 if err == unix.EOPNOTSUPP {
Lorenz Brun65b1c682023-09-14 15:49:39 +020088 return errors.ErrUnsupported
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020089 }
90 if err != unix.Errno(0) {
91 return fmt.Errorf("failed to discard: %w", err)
92 }
93 return nil
94}
95
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020096func (d *Device) Zero(startByte int64, endByte int64) error {
Jan Schära6da1712024-08-21 15:12:11 +020097 if err := validAlignedRange(d, startByte, endByte); err != nil {
98 return err
99 }
100 if startByte == endByte {
101 return nil
102 }
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200103 var args [2]uint64
104 var err error
105 args[0] = uint64(startByte)
Jan Schär0ea961c2024-04-11 13:41:40 +0200106 args[1] = uint64(endByte - startByte)
Tim Windelschmidt06c19642024-04-23 15:07:40 +0200107 ctrlErr := d.rawConn.Control(func(fd uintptr) {
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200108 // Attempts to leverage discard guarantees to provide extremely quick
109 // metadata-only zeroing.
110 err = unix.Fallocate(int(fd), unix.FALLOC_FL_PUNCH_HOLE|unix.FALLOC_FL_KEEP_SIZE, startByte, endByte-startByte)
Tim Windelschmidtd5f851b2024-04-23 14:59:37 +0200111 if errors.Is(err, unix.EOPNOTSUPP) {
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200112 // Tries Write Same and friends and then just falls back to writing
113 // zeroes.
Tim Windelschmidt06c19642024-04-23 15:07:40 +0200114 _, _, errNo := unix.Syscall(unix.SYS_IOCTL, fd, unix.BLKZEROOUT, uintptr(unsafe.Pointer(&args[0])))
115 if errNo == unix.Errno(0) {
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200116 err = nil
Tim Windelschmidt06c19642024-04-23 15:07:40 +0200117 } else {
118 err = errNo
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200119 }
120 }
Tim Windelschmidt06c19642024-04-23 15:07:40 +0200121 })
122 if ctrlErr != nil {
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200123 return ctrlErr
124 }
125 if err != nil {
126 return fmt.Errorf("failed to zero out: %w", err)
127 }
128 return nil
129}
130
Jan Schära6da1712024-08-21 15:12:11 +0200131func (d *Device) Sync() error {
132 return d.backend.Sync()
133}
134
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200135// RefreshPartitionTable refreshes the kernel's view of the partition table
136// after changes made from userspace.
137func (d *Device) RefreshPartitionTable() error {
138 var err unix.Errno
139 if ctrlErr := d.rawConn.Control(func(fd uintptr) {
140 _, _, err = unix.Syscall(unix.SYS_IOCTL, fd, unix.BLKRRPART, 0)
141 }); ctrlErr != nil {
142 return ctrlErr
143 }
144 if err != unix.Errno(0) {
145 return fmt.Errorf("ioctl(BLKRRPART): %w", err)
146 }
147 return nil
148}
149
Jan Schär5c82e0d2024-08-26 17:06:13 +0200150// ResizePartition updates the start and length of one partition in the kernel.
151// This can be used as an alternative to RefreshPartitionTable, which cannot
152// be used if any partition on this device is currently mounted.
153func (d *Device) ResizePartition(partitionNo int32, startByte, lengthBytes int64) error {
154 var ioctlPins runtime.Pinner
155 defer ioctlPins.Unpin()
156
157 partition := unix.BlkpgPartition{
158 Start: startByte,
159 Length: lengthBytes,
160 Pno: partitionNo,
161 }
162 ioctlPins.Pin(&partition)
163 arg := unix.BlkpgIoctlArg{
164 Op: unix.BLKPG_RESIZE_PARTITION,
165 Datalen: int32(unsafe.Sizeof(partition)),
166 Data: (*byte)(unsafe.Pointer(&partition)),
167 }
168
169 var err unix.Errno
170 if ctrlErr := d.rawConn.Control(func(fd uintptr) {
171 _, _, err = unix.Syscall(unix.SYS_IOCTL, fd, unix.BLKPG, uintptr(unsafe.Pointer(&arg)))
172 }); ctrlErr != nil {
173 return ctrlErr
174 }
175 if err != unix.Errno(0) {
176 return fmt.Errorf("ioctl(BLKPG): %w", err)
177 }
178 return nil
179}
180
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200181// Open opens a block device given a path to its inode.
182// TODO: exclusive, O_DIRECT
183func Open(path string) (*Device, error) {
184 outFile, err := os.OpenFile(path, os.O_RDWR, 0640)
185 if err != nil {
186 return nil, fmt.Errorf("failed to open block device: %w", err)
187 }
188 return FromFileHandle(outFile)
189}
190
191// FromFileHandle creates a blockdev from a device handle. The device handle is
192// not duplicated, closing the returned Device will close it. If the handle is
193// not a block device, i.e does not implement block device ioctls, an error is
194// returned.
195func FromFileHandle(handle *os.File) (*Device, error) {
196 outFileC, err := handle.SyscallConn()
197 if err != nil {
198 return nil, fmt.Errorf("error getting SyscallConn: %w", err)
199 }
200 var blockSize uint32
201 outFileC.Control(func(fd uintptr) {
202 blockSize, err = unix.IoctlGetUint32(int(fd), unix.BLKSSZGET)
203 })
204 if errors.Is(err, unix.ENOTTY) || errors.Is(err, unix.EINVAL) {
205 return nil, ErrNotBlockDevice
206 } else if err != nil {
207 return nil, fmt.Errorf("when querying disk block size: %w", err)
208 }
209
210 var sizeBytes uint64
Tim Windelschmidt06c19642024-04-23 15:07:40 +0200211 var getSizeErr syscall.Errno
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200212 outFileC.Control(func(fd uintptr) {
213 _, _, getSizeErr = unix.Syscall(unix.SYS_IOCTL, fd, unix.BLKGETSIZE64, uintptr(unsafe.Pointer(&sizeBytes)))
214 })
215
216 if getSizeErr != unix.Errno(0) {
217 return nil, fmt.Errorf("when querying disk block count: %w", err)
218 }
219 if sizeBytes%uint64(blockSize) != 0 {
220 return nil, fmt.Errorf("block device size is not an integer multiple of its block size (%d %% %d = %d)", sizeBytes, blockSize, sizeBytes%uint64(blockSize))
221 }
222 return &Device{
223 backend: handle,
224 rawConn: outFileC,
225 blockSize: int64(blockSize),
226 blockCount: int64(sizeBytes) / int64(blockSize),
227 }, nil
228}
229
230type File struct {
231 backend *os.File
232 rawConn syscall.RawConn
233 blockSize int64
234 blockCount int64
235}
236
237func CreateFile(name string, blockSize int64, blockCount int64) (*File, error) {
238 if blockSize < 512 {
Jan Schära6da1712024-08-21 15:12:11 +0200239 return nil, fmt.Errorf("blockSize must be at least 512 bytes")
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200240 }
241 if bits.OnesCount64(uint64(blockSize)) != 1 {
242 return nil, fmt.Errorf("blockSize must be a power of two")
243 }
244 out, err := os.Create(name)
245 if err != nil {
246 return nil, fmt.Errorf("when creating backing file: %w", err)
247 }
248 rawConn, err := out.SyscallConn()
249 if err != nil {
250 return nil, fmt.Errorf("unable to get SyscallConn: %w", err)
251 }
252 return &File{
253 backend: out,
254 blockSize: blockSize,
255 rawConn: rawConn,
256 blockCount: blockCount,
257 }, nil
258}
259
260func (d *File) ReadAt(p []byte, off int64) (n int, err error) {
Jan Schära6da1712024-08-21 15:12:11 +0200261 size := d.blockSize * d.blockCount
262 if off > size {
263 return 0, io.EOF
264 }
265 if int64(len(p)) > size-off {
266 n, err = d.backend.ReadAt(p[:size-off], off)
267 if err == nil {
268 err = io.EOF
269 }
270 return
271 }
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200272 return d.backend.ReadAt(p, off)
273}
274
275func (d *File) WriteAt(p []byte, off int64) (n int, err error) {
Jan Schära6da1712024-08-21 15:12:11 +0200276 size := d.blockSize * d.blockCount
277 if off > size {
278 return 0, ErrOutOfBounds
279 }
280 if int64(len(p)) > size-off {
281 n, err = d.backend.WriteAt(p[:size-off], off)
282 if err == nil {
283 err = ErrOutOfBounds
284 }
285 return
286 }
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200287 return d.backend.WriteAt(p, off)
288}
289
290func (d *File) Close() error {
291 return d.backend.Close()
292}
293
294func (d *File) BlockCount() int64 {
295 return d.blockCount
296}
297
298func (d *File) BlockSize() int64 {
299 return d.blockSize
300}
301
Jan Schära6da1712024-08-21 15:12:11 +0200302func (d *File) OptimalBlockSize() int64 {
303 return d.blockSize
304}
305
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200306func (d *File) Discard(startByte int64, endByte int64) error {
Jan Schära6da1712024-08-21 15:12:11 +0200307 if err := validAlignedRange(d, startByte, endByte); err != nil {
308 return err
309 }
310 if startByte == endByte {
311 return nil
312 }
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200313 var err error
314 if ctrlErr := d.rawConn.Control(func(fd uintptr) {
315 // There is FALLOC_FL_NO_HIDE_STALE, but it's not implemented by
316 // any filesystem right now, so let's not attempt it for the time being.
317 err = unix.Fallocate(int(fd), unix.FALLOC_FL_PUNCH_HOLE|unix.FALLOC_FL_KEEP_SIZE, startByte, endByte-startByte)
318 }); ctrlErr != nil {
319 return ctrlErr
320 }
321 if errors.Is(err, unix.EOPNOTSUPP) {
Lorenz Brun65b1c682023-09-14 15:49:39 +0200322 return errors.ErrUnsupported
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200323 }
Jan Schär0ea961c2024-04-11 13:41:40 +0200324 if err != nil {
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200325 return fmt.Errorf("failed to discard: %w", err)
326 }
327 return nil
328}
329
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200330func (d *File) Zero(startByte int64, endByte int64) error {
Jan Schära6da1712024-08-21 15:12:11 +0200331 if err := validAlignedRange(d, startByte, endByte); err != nil {
332 return err
333 }
334 if startByte == endByte {
335 return nil
336 }
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200337 var err error
338 if ctrlErr := d.rawConn.Control(func(fd uintptr) {
339 // Tell the filesystem to punch out the given blocks.
340 err = unix.Fallocate(int(fd), unix.FALLOC_FL_PUNCH_HOLE|unix.FALLOC_FL_KEEP_SIZE, startByte, endByte-startByte)
341 }); ctrlErr != nil {
342 return ctrlErr
343 }
344 // If unsupported or the syscall is not available (for example in a sandbox)
345 // fall back to the generic software implementation.
346 if errors.Is(err, unix.EOPNOTSUPP) || errors.Is(err, unix.ENOSYS) {
347 return GenericZero(d, startByte, endByte)
348 }
349 if err != nil {
350 return fmt.Errorf("failed to zero out: %w", err)
351 }
352 return nil
353}
Jan Schära6da1712024-08-21 15:12:11 +0200354
355func (d *File) Sync() error {
356 return d.backend.Sync()
357}