| Tim Windelschmidt | 6d33a43 | 2025-02-04 14:34:25 +0100 | [diff] [blame] | 1 | // Copyright The Monogon Project Authors. |
| 2 | // SPDX-License-Identifier: Apache-2.0 |
| 3 | |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 4 | package blockdev |
| 5 | |
| 6 | import ( |
| 7 | "errors" |
| 8 | "fmt" |
| 9 | "io" |
| Lorenz Brun | 8eb0244 | 2025-02-25 16:57:52 +0100 | [diff] [blame] | 10 | "os" |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 11 | ) |
| 12 | |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 13 | var ErrNotBlockDevice = errors.New("not a block device") |
| 14 | |
| Lorenz Brun | 8eb0244 | 2025-02-25 16:57:52 +0100 | [diff] [blame] | 15 | // options aggregates all open options for all platforms. |
| 16 | // If these were defined per-platform selecting the right ones per platform |
| 17 | // would require multiple per-platform files at each call site. |
| 18 | type options struct { |
| 19 | readOnly bool |
| 20 | direct bool |
| 21 | exclusive bool |
| 22 | } |
| 23 | |
| 24 | func (o *options) collect(opts []Option) { |
| 25 | for _, f := range opts { |
| 26 | f(o) |
| 27 | } |
| 28 | } |
| 29 | |
| 30 | func (o *options) genericFlags() int { |
| 31 | if o.readOnly { |
| 32 | return os.O_RDONLY |
| 33 | } else { |
| 34 | return os.O_RDWR |
| 35 | } |
| 36 | } |
| 37 | |
| 38 | type Option func(*options) |
| 39 | |
| 40 | // WithReadonly opens the block device read-only. Any write calls will fail. |
| 41 | // Passed as an option to Open. |
| 42 | func WithReadonly(o *options) { |
| 43 | o.readOnly = true |
| 44 | } |
| 45 | |
| 46 | // WithDirect opens the block device bypassing any caching by the kernel. |
| 47 | // Note that additional alignment requirements might be imposed by the |
| 48 | // underlying device. |
| 49 | // Unsupported on non-Linux currently, will return an error. |
| 50 | func WithDirect(o *options) { |
| 51 | o.direct = true |
| 52 | } |
| 53 | |
| 54 | // WithExclusive tries to acquire a pseudo-exclusive lock (only with other |
| 55 | // exclusive FDs) over the block device. |
| 56 | // Unsupported on non-Linux currently, will return an error. |
| 57 | func WithExclusive(o *options) { |
| 58 | o.exclusive = true |
| 59 | } |
| 60 | |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 61 | // BlockDev represents a generic block device made up of equally-sized blocks. |
| 62 | // All offsets and intervals are expressed in bytes and must be aligned to |
| 63 | // BlockSize and are recommended to be aligned to OptimalBlockSize if feasible. |
| 64 | // Unless stated otherwise, intervals are inclusive-exclusive, i.e. the |
| 65 | // start byte is included but the end byte is not. |
| 66 | type BlockDev interface { |
| 67 | io.ReaderAt |
| 68 | io.WriterAt |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 69 | |
| 70 | // BlockCount returns the number of blocks on the block device or -1 if it |
| 71 | // is an image with an undefined size. |
| 72 | BlockCount() int64 |
| 73 | |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 74 | // BlockSize returns the block size of the block device in bytes. This must |
| 75 | // be a power of two and is commonly (but not always) either 512 or 4096. |
| 76 | BlockSize() int64 |
| 77 | |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 78 | // OptimalBlockSize returns the optimal block size in bytes for aligning |
| 79 | // to as well as issuing I/O. IO operations with block sizes below this |
| 80 | // one might incur read-write overhead. This is the larger of the physical |
| 81 | // block size and a device-reported value if available. |
| 82 | OptimalBlockSize() int64 |
| 83 | |
| 84 | // Discard discards a continuous set of blocks. Discarding means the |
| 85 | // underlying device gets notified that the data in these blocks is no |
| 86 | // longer needed. This can improve performance of the device device (as it |
| 87 | // no longer needs to preserve the unused data) as well as bulk erase |
| 88 | // operations. This command is advisory and not all implementations support |
| 89 | // it. The contents of discarded blocks are implementation-defined. |
| 90 | Discard(startByte int64, endByte int64) error |
| 91 | |
| 92 | // Zero zeroes a continouous set of blocks. On certain implementations this |
| 93 | // can be significantly faster than just calling Write with zeroes. |
| 94 | Zero(startByte, endByte int64) error |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 95 | |
| 96 | // Sync commits the current contents to stable storage. |
| 97 | Sync() error |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 98 | } |
| 99 | |
| Jan Schär | e0db72c | 2025-06-18 18:14:07 +0000 | [diff] [blame] | 100 | // ReaderFromAt is similar to [io.ReaderFrom], except that the write starts at |
| 101 | // offset off instead of using the file offset. |
| 102 | type ReaderFromAt interface { |
| 103 | ReadFromAt(r io.Reader, off int64) (n int64, err error) |
| 104 | } |
| 105 | |
| 106 | // writerOnly wraps an [io.Writer] and hides all methods other than Write |
| 107 | // (such as ReadFrom). |
| 108 | type writerOnly struct { |
| 109 | io.Writer |
| 110 | } |
| 111 | |
| 112 | // genericReadFromAt is a generic implementation which does not use b.ReadFromAt |
| 113 | // to prevent recursive calls. |
| 114 | func genericReadFromAt(b BlockDev, r io.Reader, off int64) (int64, error) { |
| 115 | w := &writerOnly{Writer: &ReadWriteSeeker{b: b, currPos: off}} |
| 116 | return io.Copy(w, r) |
| 117 | } |
| 118 | |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 119 | func NewRWS(b BlockDev) *ReadWriteSeeker { |
| 120 | return &ReadWriteSeeker{b: b} |
| 121 | } |
| 122 | |
| 123 | // ReadWriteSeeker provides an adapter implementing ReadWriteSeeker on top of |
| 124 | // a blockdev. |
| 125 | type ReadWriteSeeker struct { |
| 126 | b BlockDev |
| 127 | currPos int64 |
| 128 | } |
| 129 | |
| 130 | func (s *ReadWriteSeeker) Read(p []byte) (n int, err error) { |
| 131 | n, err = s.b.ReadAt(p, s.currPos) |
| 132 | s.currPos += int64(n) |
| 133 | return |
| 134 | } |
| 135 | |
| 136 | func (s *ReadWriteSeeker) Write(p []byte) (n int, err error) { |
| 137 | n, err = s.b.WriteAt(p, s.currPos) |
| 138 | s.currPos += int64(n) |
| 139 | return |
| 140 | } |
| 141 | |
| Jan Schär | e0db72c | 2025-06-18 18:14:07 +0000 | [diff] [blame] | 142 | func (s *ReadWriteSeeker) ReadFrom(r io.Reader) (n int64, err error) { |
| 143 | rfa, rfaOK := s.b.(ReaderFromAt) |
| 144 | if !rfaOK { |
| 145 | w := &writerOnly{Writer: s} |
| 146 | return io.Copy(w, r) |
| 147 | } |
| 148 | n, err = rfa.ReadFromAt(r, s.currPos) |
| 149 | s.currPos += n |
| 150 | return |
| 151 | } |
| 152 | |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 153 | func (s *ReadWriteSeeker) Seek(offset int64, whence int) (int64, error) { |
| 154 | switch whence { |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 155 | default: |
| 156 | return 0, errors.New("Seek: invalid whence") |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 157 | case io.SeekStart: |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 158 | case io.SeekCurrent: |
| 159 | offset += s.currPos |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 160 | case io.SeekEnd: |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 161 | offset += s.b.BlockCount() * s.b.BlockSize() |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 162 | } |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 163 | if offset < 0 { |
| 164 | return 0, errors.New("Seek: invalid offset") |
| 165 | } |
| 166 | s.currPos = offset |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 167 | return s.currPos, nil |
| 168 | } |
| 169 | |
| 170 | var ErrOutOfBounds = errors.New("write out of bounds") |
| 171 | |
| 172 | // NewSection returns a new Section, implementing BlockDev over that subset |
| 173 | // of blocks. The interval is inclusive-exclusive. |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 174 | func NewSection(b BlockDev, startBlock, endBlock int64) (*Section, error) { |
| 175 | if startBlock < 0 { |
| 176 | return nil, fmt.Errorf("invalid range: startBlock (%d) negative", startBlock) |
| 177 | } |
| 178 | if startBlock > endBlock { |
| 179 | return nil, fmt.Errorf("invalid range: startBlock (%d) bigger than endBlock (%d)", startBlock, endBlock) |
| 180 | } |
| 181 | if endBlock > b.BlockCount() { |
| 182 | return nil, fmt.Errorf("endBlock (%d) out of range (%d)", endBlock, b.BlockCount()) |
| 183 | } |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 184 | return &Section{ |
| 185 | b: b, |
| 186 | startBlock: startBlock, |
| 187 | endBlock: endBlock, |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 188 | }, nil |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 189 | } |
| 190 | |
| 191 | // Section implements BlockDev on a slice of another BlockDev given a startBlock |
| 192 | // and endBlock. |
| 193 | type Section struct { |
| 194 | b BlockDev |
| 195 | startBlock, endBlock int64 |
| 196 | } |
| 197 | |
| 198 | func (s *Section) ReadAt(p []byte, off int64) (n int, err error) { |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 199 | if off < 0 { |
| 200 | return 0, errors.New("blockdev.Section.ReadAt: negative offset") |
| 201 | } |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 202 | bOff := off + (s.startBlock * s.b.BlockSize()) |
| 203 | bytesToEnd := (s.endBlock * s.b.BlockSize()) - bOff |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 204 | if bytesToEnd < 0 { |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 205 | return 0, io.EOF |
| 206 | } |
| 207 | if bytesToEnd < int64(len(p)) { |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 208 | n, err := s.b.ReadAt(p[:bytesToEnd], bOff) |
| 209 | if err == nil { |
| 210 | err = io.EOF |
| 211 | } |
| 212 | return n, err |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 213 | } |
| 214 | return s.b.ReadAt(p, bOff) |
| 215 | } |
| 216 | |
| 217 | func (s *Section) WriteAt(p []byte, off int64) (n int, err error) { |
| 218 | bOff := off + (s.startBlock * s.b.BlockSize()) |
| 219 | bytesToEnd := (s.endBlock * s.b.BlockSize()) - bOff |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 220 | if off < 0 || bytesToEnd < 0 { |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 221 | return 0, ErrOutOfBounds |
| 222 | } |
| 223 | if bytesToEnd < int64(len(p)) { |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 224 | n, err := s.b.WriteAt(p[:bytesToEnd], bOff) |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 225 | if err != nil { |
| 226 | // If an error happened, prioritize that error |
| 227 | return n, err |
| 228 | } |
| 229 | // Otherwise, return ErrOutOfBounds as even short writes must return an |
| 230 | // error. |
| 231 | return n, ErrOutOfBounds |
| 232 | } |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 233 | return s.b.WriteAt(p, bOff) |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 234 | } |
| 235 | |
| Jan Schär | e0db72c | 2025-06-18 18:14:07 +0000 | [diff] [blame] | 236 | func (s *Section) ReadFromAt(r io.Reader, off int64) (n int64, err error) { |
| 237 | rfa, rfaOK := s.b.(ReaderFromAt) |
| 238 | if !rfaOK { |
| 239 | return genericReadFromAt(s, r, off) |
| 240 | } |
| 241 | bOff := off + (s.startBlock * s.b.BlockSize()) |
| 242 | bytesToEnd := (s.endBlock * s.b.BlockSize()) - bOff |
| 243 | if off < 0 || bytesToEnd < 0 { |
| 244 | return 0, ErrOutOfBounds |
| 245 | } |
| 246 | ur := r |
| 247 | lr, lrOK := r.(*io.LimitedReader) |
| 248 | if lrOK { |
| 249 | if bytesToEnd >= lr.N { |
| 250 | return rfa.ReadFromAt(r, bOff) |
| 251 | } |
| 252 | ur = lr.R |
| 253 | } |
| 254 | n, err = rfa.ReadFromAt(io.LimitReader(ur, bytesToEnd), bOff) |
| 255 | if lrOK { |
| 256 | lr.N -= n |
| 257 | } |
| 258 | if err == nil && n == bytesToEnd { |
| 259 | // Return an error if we have not reached EOF. |
| 260 | moreN, moreErr := io.CopyN(io.Discard, r, 1) |
| 261 | if moreN != 0 { |
| 262 | err = ErrOutOfBounds |
| 263 | } else if moreErr != io.EOF { |
| 264 | err = moreErr |
| 265 | } |
| 266 | } |
| 267 | return |
| 268 | } |
| 269 | |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 270 | func (s *Section) BlockCount() int64 { |
| 271 | return s.endBlock - s.startBlock |
| 272 | } |
| 273 | |
| 274 | func (s *Section) BlockSize() int64 { |
| 275 | return s.b.BlockSize() |
| 276 | } |
| 277 | |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 278 | func (s *Section) OptimalBlockSize() int64 { |
| 279 | return s.b.OptimalBlockSize() |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 280 | } |
| 281 | |
| 282 | func (s *Section) Discard(startByte, endByte int64) error { |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 283 | if err := validAlignedRange(s, startByte, endByte); err != nil { |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 284 | return err |
| 285 | } |
| Jan Schär | 0ea961c | 2024-04-11 13:41:40 +0200 | [diff] [blame] | 286 | offset := s.startBlock * s.b.BlockSize() |
| 287 | return s.b.Discard(offset+startByte, offset+endByte) |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 288 | } |
| 289 | |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 290 | func (s *Section) Zero(startByte, endByte int64) error { |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 291 | if err := validAlignedRange(s, startByte, endByte); err != nil { |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 292 | return err |
| 293 | } |
| Jan Schär | 0ea961c | 2024-04-11 13:41:40 +0200 | [diff] [blame] | 294 | offset := s.startBlock * s.b.BlockSize() |
| 295 | return s.b.Zero(offset+startByte, offset+endByte) |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 296 | } |
| 297 | |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 298 | func (s *Section) Sync() error { |
| 299 | return s.b.Sync() |
| 300 | } |
| 301 | |
| 302 | func validAlignedRange(b BlockDev, startByte, endByte int64) error { |
| 303 | if startByte < 0 { |
| 304 | return fmt.Errorf("invalid range: startByte (%d) negative", startByte) |
| 305 | } |
| 306 | if startByte > endByte { |
| 307 | return fmt.Errorf("invalid range: startByte (%d) bigger than endByte (%d)", startByte, endByte) |
| 308 | } |
| 309 | devLen := b.BlockCount() * b.BlockSize() |
| 310 | if endByte > devLen { |
| 311 | return fmt.Errorf("endByte (%d) out of range (%d)", endByte, devLen) |
| 312 | } |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 313 | if startByte%b.BlockSize() != 0 { |
| 314 | return fmt.Errorf("startByte (%d) needs to be aligned to block size (%d)", startByte, b.BlockSize()) |
| 315 | } |
| 316 | if endByte%b.BlockSize() != 0 { |
| 317 | return fmt.Errorf("endByte (%d) needs to be aligned to block size (%d)", endByte, b.BlockSize()) |
| 318 | } |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 319 | return nil |
| 320 | } |
| 321 | |
| 322 | // GenericZero implements software-based zeroing. This can be used to implement |
| 323 | // Zero when no acceleration is available or desired. |
| 324 | func GenericZero(b BlockDev, startByte, endByte int64) error { |
| 325 | if err := validAlignedRange(b, startByte, endByte); err != nil { |
| 326 | return err |
| 327 | } |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 328 | // Choose buffer size close to 16MiB or the range to be zeroed, whatever |
| 329 | // is smaller. |
| 330 | bufSizeTarget := int64(16 * 1024 * 1024) |
| 331 | if endByte-startByte < bufSizeTarget { |
| 332 | bufSizeTarget = endByte - startByte |
| 333 | } |
| 334 | bufSize := (bufSizeTarget / b.BlockSize()) * b.BlockSize() |
| 335 | buf := make([]byte, bufSize) |
| 336 | for i := startByte; i < endByte; i += bufSize { |
| 337 | if endByte-i < bufSize { |
| 338 | buf = buf[:endByte-i] |
| 339 | } |
| 340 | if _, err := b.WriteAt(buf, i); err != nil { |
| 341 | return fmt.Errorf("while writing zeroes: %w", err) |
| 342 | } |
| 343 | } |
| 344 | return nil |
| 345 | } |