blob: 5eb7fe80e42001625007d4fd54c1ed31f951abf4 [file] [log] [blame]
Tim Windelschmidt6d33a432025-02-04 14:34:25 +01001// Copyright The Monogon Project Authors.
2// SPDX-License-Identifier: Apache-2.0
3
Lorenz Brun1e0e3a42023-06-28 16:40:18 +02004package blockdev
5
6import (
7 "errors"
8 "fmt"
9 "io"
Lorenz Brun8eb02442025-02-25 16:57:52 +010010 "os"
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020011)
12
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020013var ErrNotBlockDevice = errors.New("not a block device")
14
Lorenz Brun8eb02442025-02-25 16:57:52 +010015// options aggregates all open options for all platforms.
16// If these were defined per-platform selecting the right ones per platform
17// would require multiple per-platform files at each call site.
18type options struct {
19 readOnly bool
20 direct bool
21 exclusive bool
22}
23
24func (o *options) collect(opts []Option) {
25 for _, f := range opts {
26 f(o)
27 }
28}
29
30func (o *options) genericFlags() int {
31 if o.readOnly {
32 return os.O_RDONLY
33 } else {
34 return os.O_RDWR
35 }
36}
37
38type Option func(*options)
39
40// WithReadonly opens the block device read-only. Any write calls will fail.
41// Passed as an option to Open.
42func WithReadonly(o *options) {
43 o.readOnly = true
44}
45
46// WithDirect opens the block device bypassing any caching by the kernel.
47// Note that additional alignment requirements might be imposed by the
48// underlying device.
49// Unsupported on non-Linux currently, will return an error.
50func WithDirect(o *options) {
51 o.direct = true
52}
53
54// WithExclusive tries to acquire a pseudo-exclusive lock (only with other
55// exclusive FDs) over the block device.
56// Unsupported on non-Linux currently, will return an error.
57func WithExclusive(o *options) {
58 o.exclusive = true
59}
60
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020061// BlockDev represents a generic block device made up of equally-sized blocks.
62// All offsets and intervals are expressed in bytes and must be aligned to
63// BlockSize and are recommended to be aligned to OptimalBlockSize if feasible.
64// Unless stated otherwise, intervals are inclusive-exclusive, i.e. the
65// start byte is included but the end byte is not.
66type BlockDev interface {
67 io.ReaderAt
68 io.WriterAt
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020069
70 // BlockCount returns the number of blocks on the block device or -1 if it
71 // is an image with an undefined size.
72 BlockCount() int64
73
Jan Schära6da1712024-08-21 15:12:11 +020074 // BlockSize returns the block size of the block device in bytes. This must
75 // be a power of two and is commonly (but not always) either 512 or 4096.
76 BlockSize() int64
77
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020078 // OptimalBlockSize returns the optimal block size in bytes for aligning
79 // to as well as issuing I/O. IO operations with block sizes below this
80 // one might incur read-write overhead. This is the larger of the physical
81 // block size and a device-reported value if available.
82 OptimalBlockSize() int64
83
84 // Discard discards a continuous set of blocks. Discarding means the
85 // underlying device gets notified that the data in these blocks is no
86 // longer needed. This can improve performance of the device device (as it
87 // no longer needs to preserve the unused data) as well as bulk erase
88 // operations. This command is advisory and not all implementations support
89 // it. The contents of discarded blocks are implementation-defined.
90 Discard(startByte int64, endByte int64) error
91
92 // Zero zeroes a continouous set of blocks. On certain implementations this
93 // can be significantly faster than just calling Write with zeroes.
94 Zero(startByte, endByte int64) error
Jan Schära6da1712024-08-21 15:12:11 +020095
96 // Sync commits the current contents to stable storage.
97 Sync() error
Lorenz Brun1e0e3a42023-06-28 16:40:18 +020098}
99
100func NewRWS(b BlockDev) *ReadWriteSeeker {
101 return &ReadWriteSeeker{b: b}
102}
103
104// ReadWriteSeeker provides an adapter implementing ReadWriteSeeker on top of
105// a blockdev.
106type ReadWriteSeeker struct {
107 b BlockDev
108 currPos int64
109}
110
111func (s *ReadWriteSeeker) Read(p []byte) (n int, err error) {
112 n, err = s.b.ReadAt(p, s.currPos)
113 s.currPos += int64(n)
114 return
115}
116
117func (s *ReadWriteSeeker) Write(p []byte) (n int, err error) {
118 n, err = s.b.WriteAt(p, s.currPos)
119 s.currPos += int64(n)
120 return
121}
122
123func (s *ReadWriteSeeker) Seek(offset int64, whence int) (int64, error) {
124 switch whence {
Jan Schära6da1712024-08-21 15:12:11 +0200125 default:
126 return 0, errors.New("Seek: invalid whence")
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200127 case io.SeekStart:
Jan Schära6da1712024-08-21 15:12:11 +0200128 case io.SeekCurrent:
129 offset += s.currPos
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200130 case io.SeekEnd:
Jan Schära6da1712024-08-21 15:12:11 +0200131 offset += s.b.BlockCount() * s.b.BlockSize()
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200132 }
Jan Schära6da1712024-08-21 15:12:11 +0200133 if offset < 0 {
134 return 0, errors.New("Seek: invalid offset")
135 }
136 s.currPos = offset
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200137 return s.currPos, nil
138}
139
140var ErrOutOfBounds = errors.New("write out of bounds")
141
142// NewSection returns a new Section, implementing BlockDev over that subset
143// of blocks. The interval is inclusive-exclusive.
Jan Schära6da1712024-08-21 15:12:11 +0200144func NewSection(b BlockDev, startBlock, endBlock int64) (*Section, error) {
145 if startBlock < 0 {
146 return nil, fmt.Errorf("invalid range: startBlock (%d) negative", startBlock)
147 }
148 if startBlock > endBlock {
149 return nil, fmt.Errorf("invalid range: startBlock (%d) bigger than endBlock (%d)", startBlock, endBlock)
150 }
151 if endBlock > b.BlockCount() {
152 return nil, fmt.Errorf("endBlock (%d) out of range (%d)", endBlock, b.BlockCount())
153 }
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200154 return &Section{
155 b: b,
156 startBlock: startBlock,
157 endBlock: endBlock,
Jan Schära6da1712024-08-21 15:12:11 +0200158 }, nil
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200159}
160
161// Section implements BlockDev on a slice of another BlockDev given a startBlock
162// and endBlock.
163type Section struct {
164 b BlockDev
165 startBlock, endBlock int64
166}
167
168func (s *Section) ReadAt(p []byte, off int64) (n int, err error) {
Jan Schära6da1712024-08-21 15:12:11 +0200169 if off < 0 {
170 return 0, errors.New("blockdev.Section.ReadAt: negative offset")
171 }
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200172 bOff := off + (s.startBlock * s.b.BlockSize())
173 bytesToEnd := (s.endBlock * s.b.BlockSize()) - bOff
Jan Schära6da1712024-08-21 15:12:11 +0200174 if bytesToEnd < 0 {
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200175 return 0, io.EOF
176 }
177 if bytesToEnd < int64(len(p)) {
Jan Schära6da1712024-08-21 15:12:11 +0200178 n, err := s.b.ReadAt(p[:bytesToEnd], bOff)
179 if err == nil {
180 err = io.EOF
181 }
182 return n, err
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200183 }
184 return s.b.ReadAt(p, bOff)
185}
186
187func (s *Section) WriteAt(p []byte, off int64) (n int, err error) {
188 bOff := off + (s.startBlock * s.b.BlockSize())
189 bytesToEnd := (s.endBlock * s.b.BlockSize()) - bOff
Jan Schära6da1712024-08-21 15:12:11 +0200190 if off < 0 || bytesToEnd < 0 {
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200191 return 0, ErrOutOfBounds
192 }
193 if bytesToEnd < int64(len(p)) {
Jan Schära6da1712024-08-21 15:12:11 +0200194 n, err := s.b.WriteAt(p[:bytesToEnd], bOff)
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200195 if err != nil {
196 // If an error happened, prioritize that error
197 return n, err
198 }
199 // Otherwise, return ErrOutOfBounds as even short writes must return an
200 // error.
201 return n, ErrOutOfBounds
202 }
Jan Schära6da1712024-08-21 15:12:11 +0200203 return s.b.WriteAt(p, bOff)
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200204}
205
206func (s *Section) BlockCount() int64 {
207 return s.endBlock - s.startBlock
208}
209
210func (s *Section) BlockSize() int64 {
211 return s.b.BlockSize()
212}
213
Jan Schära6da1712024-08-21 15:12:11 +0200214func (s *Section) OptimalBlockSize() int64 {
215 return s.b.OptimalBlockSize()
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200216}
217
218func (s *Section) Discard(startByte, endByte int64) error {
Jan Schära6da1712024-08-21 15:12:11 +0200219 if err := validAlignedRange(s, startByte, endByte); err != nil {
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200220 return err
221 }
Jan Schär0ea961c2024-04-11 13:41:40 +0200222 offset := s.startBlock * s.b.BlockSize()
223 return s.b.Discard(offset+startByte, offset+endByte)
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200224}
225
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200226func (s *Section) Zero(startByte, endByte int64) error {
Jan Schära6da1712024-08-21 15:12:11 +0200227 if err := validAlignedRange(s, startByte, endByte); err != nil {
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200228 return err
229 }
Jan Schär0ea961c2024-04-11 13:41:40 +0200230 offset := s.startBlock * s.b.BlockSize()
231 return s.b.Zero(offset+startByte, offset+endByte)
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200232}
233
Jan Schära6da1712024-08-21 15:12:11 +0200234func (s *Section) Sync() error {
235 return s.b.Sync()
236}
237
238func validAlignedRange(b BlockDev, startByte, endByte int64) error {
239 if startByte < 0 {
240 return fmt.Errorf("invalid range: startByte (%d) negative", startByte)
241 }
242 if startByte > endByte {
243 return fmt.Errorf("invalid range: startByte (%d) bigger than endByte (%d)", startByte, endByte)
244 }
245 devLen := b.BlockCount() * b.BlockSize()
246 if endByte > devLen {
247 return fmt.Errorf("endByte (%d) out of range (%d)", endByte, devLen)
248 }
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200249 if startByte%b.BlockSize() != 0 {
250 return fmt.Errorf("startByte (%d) needs to be aligned to block size (%d)", startByte, b.BlockSize())
251 }
252 if endByte%b.BlockSize() != 0 {
253 return fmt.Errorf("endByte (%d) needs to be aligned to block size (%d)", endByte, b.BlockSize())
254 }
Jan Schära6da1712024-08-21 15:12:11 +0200255 return nil
256}
257
258// GenericZero implements software-based zeroing. This can be used to implement
259// Zero when no acceleration is available or desired.
260func GenericZero(b BlockDev, startByte, endByte int64) error {
261 if err := validAlignedRange(b, startByte, endByte); err != nil {
262 return err
263 }
Lorenz Brun1e0e3a42023-06-28 16:40:18 +0200264 // Choose buffer size close to 16MiB or the range to be zeroed, whatever
265 // is smaller.
266 bufSizeTarget := int64(16 * 1024 * 1024)
267 if endByte-startByte < bufSizeTarget {
268 bufSizeTarget = endByte - startByte
269 }
270 bufSize := (bufSizeTarget / b.BlockSize()) * b.BlockSize()
271 buf := make([]byte, bufSize)
272 for i := startByte; i < endByte; i += bufSize {
273 if endByte-i < bufSize {
274 buf = buf[:endByte-i]
275 }
276 if _, err := b.WriteAt(buf, i); err != nil {
277 return fmt.Errorf("while writing zeroes: %w", err)
278 }
279 }
280 return nil
281}