blob: a8e6a8a511e3a86c1cc03fa921f014c25846fae8 [file] [log] [blame]
Lorenz Brun1e0e3a42023-06-28 16:40:18 +02001//go:build linux
2
3package blockdev
4
5import (
6 "errors"
7 "fmt"
8 "math/bits"
9 "os"
10 "syscall"
11 "unsafe"
12
13 "golang.org/x/sys/unix"
14)
15
16type Device struct {
17 backend *os.File
18 rawConn syscall.RawConn
19 blockSize int64
20 blockCount int64
21}
22
23func (d *Device) ReadAt(p []byte, off int64) (n int, err error) {
24 return d.backend.ReadAt(p, off)
25}
26
27func (d *Device) WriteAt(p []byte, off int64) (n int, err error) {
28 return d.backend.WriteAt(p, off)
29}
30
31func (d *Device) Close() error {
32 return d.backend.Close()
33}
34
35func (d *Device) BlockCount() int64 {
36 return d.blockCount
37}
38
39func (d *Device) BlockSize() int64 {
40 return d.blockSize
41}
42
43func (d *Device) Discard(startByte int64, endByte int64) error {
44 var args [2]uint64
45 var err unix.Errno
46 args[0] = uint64(startByte)
47 args[1] = uint64(endByte)
48 if ctrlErr := d.rawConn.Control(func(fd uintptr) {
49 _, _, err = unix.Syscall(unix.SYS_IOCTL, fd, unix.BLKDISCARD, uintptr(unsafe.Pointer(&args[0])))
50 }); ctrlErr != nil {
51 return ctrlErr
52 }
53 if err == unix.EOPNOTSUPP {
54 return ErrUnsupported
55 }
56 if err != unix.Errno(0) {
57 return fmt.Errorf("failed to discard: %w", err)
58 }
59 return nil
60}
61
62func (d *Device) OptimalBlockSize() int64 {
63 return d.blockSize
64}
65
66func (d *Device) Zero(startByte int64, endByte int64) error {
67 var args [2]uint64
68 var err error
69 args[0] = uint64(startByte)
70 args[1] = uint64(endByte)
71 if ctrlErr := d.rawConn.Control(func(fd uintptr) {
72 // Attempts to leverage discard guarantees to provide extremely quick
73 // metadata-only zeroing.
74 err = unix.Fallocate(int(fd), unix.FALLOC_FL_PUNCH_HOLE|unix.FALLOC_FL_KEEP_SIZE, startByte, endByte-startByte)
75 if err == unix.EOPNOTSUPP {
76 // Tries Write Same and friends and then just falls back to writing
77 // zeroes.
78 _, _, err = unix.Syscall(unix.SYS_IOCTL, fd, unix.BLKZEROOUT, uintptr(unsafe.Pointer(&args[0])))
79 if err == unix.Errno(0) {
80 err = nil
81 }
82 }
83 }); ctrlErr != nil {
84 return ctrlErr
85 }
86 if err != nil {
87 return fmt.Errorf("failed to zero out: %w", err)
88 }
89 return nil
90}
91
92// RefreshPartitionTable refreshes the kernel's view of the partition table
93// after changes made from userspace.
94func (d *Device) RefreshPartitionTable() error {
95 var err unix.Errno
96 if ctrlErr := d.rawConn.Control(func(fd uintptr) {
97 _, _, err = unix.Syscall(unix.SYS_IOCTL, fd, unix.BLKRRPART, 0)
98 }); ctrlErr != nil {
99 return ctrlErr
100 }
101 if err != unix.Errno(0) {
102 return fmt.Errorf("ioctl(BLKRRPART): %w", err)
103 }
104 return nil
105}
106
107// Open opens a block device given a path to its inode.
108// TODO: exclusive, O_DIRECT
109func Open(path string) (*Device, error) {
110 outFile, err := os.OpenFile(path, os.O_RDWR, 0640)
111 if err != nil {
112 return nil, fmt.Errorf("failed to open block device: %w", err)
113 }
114 return FromFileHandle(outFile)
115}
116
117// FromFileHandle creates a blockdev from a device handle. The device handle is
118// not duplicated, closing the returned Device will close it. If the handle is
119// not a block device, i.e does not implement block device ioctls, an error is
120// returned.
121func FromFileHandle(handle *os.File) (*Device, error) {
122 outFileC, err := handle.SyscallConn()
123 if err != nil {
124 return nil, fmt.Errorf("error getting SyscallConn: %w", err)
125 }
126 var blockSize uint32
127 outFileC.Control(func(fd uintptr) {
128 blockSize, err = unix.IoctlGetUint32(int(fd), unix.BLKSSZGET)
129 })
130 if errors.Is(err, unix.ENOTTY) || errors.Is(err, unix.EINVAL) {
131 return nil, ErrNotBlockDevice
132 } else if err != nil {
133 return nil, fmt.Errorf("when querying disk block size: %w", err)
134 }
135
136 var sizeBytes uint64
137 var getSizeErr error
138 outFileC.Control(func(fd uintptr) {
139 _, _, getSizeErr = unix.Syscall(unix.SYS_IOCTL, fd, unix.BLKGETSIZE64, uintptr(unsafe.Pointer(&sizeBytes)))
140 })
141
142 if getSizeErr != unix.Errno(0) {
143 return nil, fmt.Errorf("when querying disk block count: %w", err)
144 }
145 if sizeBytes%uint64(blockSize) != 0 {
146 return nil, fmt.Errorf("block device size is not an integer multiple of its block size (%d %% %d = %d)", sizeBytes, blockSize, sizeBytes%uint64(blockSize))
147 }
148 return &Device{
149 backend: handle,
150 rawConn: outFileC,
151 blockSize: int64(blockSize),
152 blockCount: int64(sizeBytes) / int64(blockSize),
153 }, nil
154}
155
156type File struct {
157 backend *os.File
158 rawConn syscall.RawConn
159 blockSize int64
160 blockCount int64
161}
162
163func CreateFile(name string, blockSize int64, blockCount int64) (*File, error) {
164 if blockSize < 512 {
165 return nil, fmt.Errorf("blockSize must be bigger than 512 bytes")
166 }
167 if bits.OnesCount64(uint64(blockSize)) != 1 {
168 return nil, fmt.Errorf("blockSize must be a power of two")
169 }
170 out, err := os.Create(name)
171 if err != nil {
172 return nil, fmt.Errorf("when creating backing file: %w", err)
173 }
174 rawConn, err := out.SyscallConn()
175 if err != nil {
176 return nil, fmt.Errorf("unable to get SyscallConn: %w", err)
177 }
178 return &File{
179 backend: out,
180 blockSize: blockSize,
181 rawConn: rawConn,
182 blockCount: blockCount,
183 }, nil
184}
185
186func (d *File) ReadAt(p []byte, off int64) (n int, err error) {
187 return d.backend.ReadAt(p, off)
188}
189
190func (d *File) WriteAt(p []byte, off int64) (n int, err error) {
191 return d.backend.WriteAt(p, off)
192}
193
194func (d *File) Close() error {
195 return d.backend.Close()
196}
197
198func (d *File) BlockCount() int64 {
199 return d.blockCount
200}
201
202func (d *File) BlockSize() int64 {
203 return d.blockSize
204}
205
206func (d *File) Discard(startByte int64, endByte int64) error {
207 var err error
208 if ctrlErr := d.rawConn.Control(func(fd uintptr) {
209 // There is FALLOC_FL_NO_HIDE_STALE, but it's not implemented by
210 // any filesystem right now, so let's not attempt it for the time being.
211 err = unix.Fallocate(int(fd), unix.FALLOC_FL_PUNCH_HOLE|unix.FALLOC_FL_KEEP_SIZE, startByte, endByte-startByte)
212 }); ctrlErr != nil {
213 return ctrlErr
214 }
215 if errors.Is(err, unix.EOPNOTSUPP) {
216 return ErrUnsupported
217 }
218 if err != unix.Errno(0) {
219 return fmt.Errorf("failed to discard: %w", err)
220 }
221 return nil
222}
223
224func (d *File) OptimalBlockSize() int64 {
225 return d.blockSize
226}
227
228func (d *File) Zero(startByte int64, endByte int64) error {
229 var err error
230 if ctrlErr := d.rawConn.Control(func(fd uintptr) {
231 // Tell the filesystem to punch out the given blocks.
232 err = unix.Fallocate(int(fd), unix.FALLOC_FL_PUNCH_HOLE|unix.FALLOC_FL_KEEP_SIZE, startByte, endByte-startByte)
233 }); ctrlErr != nil {
234 return ctrlErr
235 }
236 // If unsupported or the syscall is not available (for example in a sandbox)
237 // fall back to the generic software implementation.
238 if errors.Is(err, unix.EOPNOTSUPP) || errors.Is(err, unix.ENOSYS) {
239 return GenericZero(d, startByte, endByte)
240 }
241 if err != nil {
242 return fmt.Errorf("failed to zero out: %w", err)
243 }
244 return nil
245}