blob: b8fb5583210f6b9fa072b89cb0868680c3b93cb7 [file] [log] [blame]
Tim Windelschmidt6d33a432025-02-04 14:34:25 +01001// Copyright The Monogon Project Authors.
2// SPDX-License-Identifier: Apache-2.0
3
Lorenz Brun1e0e3a42023-06-28 16:40:18 +02004//go:build linux
5
6package blockdev
7
8import (
9 "errors"
10 "fmt"
Jan Schära6da1712024-08-21 15:12:11 +020011 "io"
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020012 "math/bits"
13 "os"
Jan Schär5c82e0d2024-08-26 17:06:13 +020014 "runtime"
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020015 "syscall"
16 "unsafe"
17
18 "golang.org/x/sys/unix"
19)
20
21type Device struct {
22 backend *os.File
23 rawConn syscall.RawConn
24 blockSize int64
25 blockCount int64
26}
27
28func (d *Device) ReadAt(p []byte, off int64) (n int, err error) {
Jan Schära6da1712024-08-21 15:12:11 +020029 size := d.blockSize * d.blockCount
30 if off > size {
31 return 0, io.EOF
32 }
33 if int64(len(p)) > size-off {
34 n, err = d.backend.ReadAt(p[:size-off], off)
35 if err == nil {
36 err = io.EOF
37 }
38 return
39 }
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020040 return d.backend.ReadAt(p, off)
41}
42
43func (d *Device) WriteAt(p []byte, off int64) (n int, err error) {
Jan Schära6da1712024-08-21 15:12:11 +020044 size := d.blockSize * d.blockCount
45 if off > size {
46 return 0, ErrOutOfBounds
47 }
48 if int64(len(p)) > size-off {
49 n, err = d.backend.WriteAt(p[:size-off], off)
50 if err == nil {
51 err = ErrOutOfBounds
52 }
53 return
54 }
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020055 return d.backend.WriteAt(p, off)
56}
57
58func (d *Device) Close() error {
59 return d.backend.Close()
60}
61
62func (d *Device) BlockCount() int64 {
63 return d.blockCount
64}
65
66func (d *Device) BlockSize() int64 {
67 return d.blockSize
68}
69
Jan Schära6da1712024-08-21 15:12:11 +020070func (d *Device) OptimalBlockSize() int64 {
71 return d.blockSize
72}
73
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020074func (d *Device) Discard(startByte int64, endByte int64) error {
Jan Schära6da1712024-08-21 15:12:11 +020075 if err := validAlignedRange(d, startByte, endByte); err != nil {
76 return err
77 }
78 if startByte == endByte {
79 return nil
80 }
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020081 var args [2]uint64
82 var err unix.Errno
83 args[0] = uint64(startByte)
Jan Schär0ea961c2024-04-11 13:41:40 +020084 args[1] = uint64(endByte - startByte)
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020085 if ctrlErr := d.rawConn.Control(func(fd uintptr) {
86 _, _, err = unix.Syscall(unix.SYS_IOCTL, fd, unix.BLKDISCARD, uintptr(unsafe.Pointer(&args[0])))
87 }); ctrlErr != nil {
88 return ctrlErr
89 }
90 if err == unix.EOPNOTSUPP {
Lorenz Brun65b1c682023-09-14 15:49:39 +020091 return errors.ErrUnsupported
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020092 }
93 if err != unix.Errno(0) {
94 return fmt.Errorf("failed to discard: %w", err)
95 }
96 return nil
97}
98
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020099func (d *Device) Zero(startByte int64, endByte int64) error {
Jan Schära6da1712024-08-21 15:12:11 +0200100 if err := validAlignedRange(d, startByte, endByte); err != nil {
101 return err
102 }
103 if startByte == endByte {
104 return nil
105 }
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200106 var args [2]uint64
107 var err error
108 args[0] = uint64(startByte)
Jan Schär0ea961c2024-04-11 13:41:40 +0200109 args[1] = uint64(endByte - startByte)
Tim Windelschmidt06c19642024-04-23 15:07:40 +0200110 ctrlErr := d.rawConn.Control(func(fd uintptr) {
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200111 // Attempts to leverage discard guarantees to provide extremely quick
112 // metadata-only zeroing.
113 err = unix.Fallocate(int(fd), unix.FALLOC_FL_PUNCH_HOLE|unix.FALLOC_FL_KEEP_SIZE, startByte, endByte-startByte)
Tim Windelschmidtd5f851b2024-04-23 14:59:37 +0200114 if errors.Is(err, unix.EOPNOTSUPP) {
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200115 // Tries Write Same and friends and then just falls back to writing
116 // zeroes.
Tim Windelschmidt06c19642024-04-23 15:07:40 +0200117 _, _, errNo := unix.Syscall(unix.SYS_IOCTL, fd, unix.BLKZEROOUT, uintptr(unsafe.Pointer(&args[0])))
118 if errNo == unix.Errno(0) {
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200119 err = nil
Tim Windelschmidt06c19642024-04-23 15:07:40 +0200120 } else {
121 err = errNo
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200122 }
123 }
Tim Windelschmidt06c19642024-04-23 15:07:40 +0200124 })
125 if ctrlErr != nil {
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200126 return ctrlErr
127 }
128 if err != nil {
129 return fmt.Errorf("failed to zero out: %w", err)
130 }
131 return nil
132}
133
Jan Schära6da1712024-08-21 15:12:11 +0200134func (d *Device) Sync() error {
135 return d.backend.Sync()
136}
137
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200138// RefreshPartitionTable refreshes the kernel's view of the partition table
139// after changes made from userspace.
140func (d *Device) RefreshPartitionTable() error {
141 var err unix.Errno
142 if ctrlErr := d.rawConn.Control(func(fd uintptr) {
143 _, _, err = unix.Syscall(unix.SYS_IOCTL, fd, unix.BLKRRPART, 0)
144 }); ctrlErr != nil {
145 return ctrlErr
146 }
147 if err != unix.Errno(0) {
148 return fmt.Errorf("ioctl(BLKRRPART): %w", err)
149 }
150 return nil
151}
152
Jan Schär5c82e0d2024-08-26 17:06:13 +0200153// ResizePartition updates the start and length of one partition in the kernel.
154// This can be used as an alternative to RefreshPartitionTable, which cannot
155// be used if any partition on this device is currently mounted.
156func (d *Device) ResizePartition(partitionNo int32, startByte, lengthBytes int64) error {
157 var ioctlPins runtime.Pinner
158 defer ioctlPins.Unpin()
159
160 partition := unix.BlkpgPartition{
161 Start: startByte,
162 Length: lengthBytes,
163 Pno: partitionNo,
164 }
165 ioctlPins.Pin(&partition)
166 arg := unix.BlkpgIoctlArg{
167 Op: unix.BLKPG_RESIZE_PARTITION,
168 Datalen: int32(unsafe.Sizeof(partition)),
169 Data: (*byte)(unsafe.Pointer(&partition)),
170 }
171
172 var err unix.Errno
173 if ctrlErr := d.rawConn.Control(func(fd uintptr) {
174 _, _, err = unix.Syscall(unix.SYS_IOCTL, fd, unix.BLKPG, uintptr(unsafe.Pointer(&arg)))
175 }); ctrlErr != nil {
176 return ctrlErr
177 }
178 if err != unix.Errno(0) {
179 return fmt.Errorf("ioctl(BLKPG): %w", err)
180 }
181 return nil
182}
183
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200184// Open opens a block device given a path to its inode.
185// TODO: exclusive, O_DIRECT
186func Open(path string) (*Device, error) {
187 outFile, err := os.OpenFile(path, os.O_RDWR, 0640)
188 if err != nil {
189 return nil, fmt.Errorf("failed to open block device: %w", err)
190 }
191 return FromFileHandle(outFile)
192}
193
194// FromFileHandle creates a blockdev from a device handle. The device handle is
195// not duplicated, closing the returned Device will close it. If the handle is
196// not a block device, i.e does not implement block device ioctls, an error is
197// returned.
198func FromFileHandle(handle *os.File) (*Device, error) {
199 outFileC, err := handle.SyscallConn()
200 if err != nil {
201 return nil, fmt.Errorf("error getting SyscallConn: %w", err)
202 }
203 var blockSize uint32
204 outFileC.Control(func(fd uintptr) {
205 blockSize, err = unix.IoctlGetUint32(int(fd), unix.BLKSSZGET)
206 })
207 if errors.Is(err, unix.ENOTTY) || errors.Is(err, unix.EINVAL) {
208 return nil, ErrNotBlockDevice
209 } else if err != nil {
210 return nil, fmt.Errorf("when querying disk block size: %w", err)
211 }
212
213 var sizeBytes uint64
Tim Windelschmidt06c19642024-04-23 15:07:40 +0200214 var getSizeErr syscall.Errno
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200215 outFileC.Control(func(fd uintptr) {
216 _, _, getSizeErr = unix.Syscall(unix.SYS_IOCTL, fd, unix.BLKGETSIZE64, uintptr(unsafe.Pointer(&sizeBytes)))
217 })
218
219 if getSizeErr != unix.Errno(0) {
220 return nil, fmt.Errorf("when querying disk block count: %w", err)
221 }
222 if sizeBytes%uint64(blockSize) != 0 {
223 return nil, fmt.Errorf("block device size is not an integer multiple of its block size (%d %% %d = %d)", sizeBytes, blockSize, sizeBytes%uint64(blockSize))
224 }
225 return &Device{
226 backend: handle,
227 rawConn: outFileC,
228 blockSize: int64(blockSize),
229 blockCount: int64(sizeBytes) / int64(blockSize),
230 }, nil
231}
232
233type File struct {
234 backend *os.File
235 rawConn syscall.RawConn
236 blockSize int64
237 blockCount int64
238}
239
240func CreateFile(name string, blockSize int64, blockCount int64) (*File, error) {
241 if blockSize < 512 {
Jan Schära6da1712024-08-21 15:12:11 +0200242 return nil, fmt.Errorf("blockSize must be at least 512 bytes")
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200243 }
244 if bits.OnesCount64(uint64(blockSize)) != 1 {
245 return nil, fmt.Errorf("blockSize must be a power of two")
246 }
247 out, err := os.Create(name)
248 if err != nil {
249 return nil, fmt.Errorf("when creating backing file: %w", err)
250 }
251 rawConn, err := out.SyscallConn()
252 if err != nil {
253 return nil, fmt.Errorf("unable to get SyscallConn: %w", err)
254 }
255 return &File{
256 backend: out,
257 blockSize: blockSize,
258 rawConn: rawConn,
259 blockCount: blockCount,
260 }, nil
261}
262
263func (d *File) ReadAt(p []byte, off int64) (n int, err error) {
Jan Schära6da1712024-08-21 15:12:11 +0200264 size := d.blockSize * d.blockCount
265 if off > size {
266 return 0, io.EOF
267 }
268 if int64(len(p)) > size-off {
269 n, err = d.backend.ReadAt(p[:size-off], off)
270 if err == nil {
271 err = io.EOF
272 }
273 return
274 }
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200275 return d.backend.ReadAt(p, off)
276}
277
278func (d *File) WriteAt(p []byte, off int64) (n int, err error) {
Jan Schära6da1712024-08-21 15:12:11 +0200279 size := d.blockSize * d.blockCount
280 if off > size {
281 return 0, ErrOutOfBounds
282 }
283 if int64(len(p)) > size-off {
284 n, err = d.backend.WriteAt(p[:size-off], off)
285 if err == nil {
286 err = ErrOutOfBounds
287 }
288 return
289 }
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200290 return d.backend.WriteAt(p, off)
291}
292
293func (d *File) Close() error {
294 return d.backend.Close()
295}
296
297func (d *File) BlockCount() int64 {
298 return d.blockCount
299}
300
301func (d *File) BlockSize() int64 {
302 return d.blockSize
303}
304
Jan Schära6da1712024-08-21 15:12:11 +0200305func (d *File) OptimalBlockSize() int64 {
306 return d.blockSize
307}
308
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200309func (d *File) Discard(startByte int64, endByte int64) error {
Jan Schära6da1712024-08-21 15:12:11 +0200310 if err := validAlignedRange(d, startByte, endByte); err != nil {
311 return err
312 }
313 if startByte == endByte {
314 return nil
315 }
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200316 var err error
317 if ctrlErr := d.rawConn.Control(func(fd uintptr) {
318 // There is FALLOC_FL_NO_HIDE_STALE, but it's not implemented by
319 // any filesystem right now, so let's not attempt it for the time being.
320 err = unix.Fallocate(int(fd), unix.FALLOC_FL_PUNCH_HOLE|unix.FALLOC_FL_KEEP_SIZE, startByte, endByte-startByte)
321 }); ctrlErr != nil {
322 return ctrlErr
323 }
324 if errors.Is(err, unix.EOPNOTSUPP) {
Lorenz Brun65b1c682023-09-14 15:49:39 +0200325 return errors.ErrUnsupported
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200326 }
Jan Schär0ea961c2024-04-11 13:41:40 +0200327 if err != nil {
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200328 return fmt.Errorf("failed to discard: %w", err)
329 }
330 return nil
331}
332
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200333func (d *File) Zero(startByte int64, endByte int64) error {
Jan Schära6da1712024-08-21 15:12:11 +0200334 if err := validAlignedRange(d, startByte, endByte); err != nil {
335 return err
336 }
337 if startByte == endByte {
338 return nil
339 }
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200340 var err error
341 if ctrlErr := d.rawConn.Control(func(fd uintptr) {
342 // Tell the filesystem to punch out the given blocks.
343 err = unix.Fallocate(int(fd), unix.FALLOC_FL_PUNCH_HOLE|unix.FALLOC_FL_KEEP_SIZE, startByte, endByte-startByte)
344 }); ctrlErr != nil {
345 return ctrlErr
346 }
347 // If unsupported or the syscall is not available (for example in a sandbox)
348 // fall back to the generic software implementation.
349 if errors.Is(err, unix.EOPNOTSUPP) || errors.Is(err, unix.ENOSYS) {
350 return GenericZero(d, startByte, endByte)
351 }
352 if err != nil {
353 return fmt.Errorf("failed to zero out: %w", err)
354 }
355 return nil
356}
Jan Schära6da1712024-08-21 15:12:11 +0200357
358func (d *File) Sync() error {
359 return d.backend.Sync()
360}