| Tim Windelschmidt | 6d33a43 | 2025-02-04 14:34:25 +0100 | [diff] [blame] | 1 | // Copyright The Monogon Project Authors. |
| 2 | // SPDX-License-Identifier: Apache-2.0 |
| 3 | |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 4 | package blockdev |
| 5 | |
| 6 | import ( |
| 7 | "errors" |
| 8 | "fmt" |
| 9 | "io" |
| Lorenz Brun | 8eb0244 | 2025-02-25 16:57:52 +0100 | [diff] [blame] | 10 | "os" |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 11 | ) |
| 12 | |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 13 | var ErrNotBlockDevice = errors.New("not a block device") |
| 14 | |
| Lorenz Brun | 8eb0244 | 2025-02-25 16:57:52 +0100 | [diff] [blame] | 15 | // options aggregates all open options for all platforms. |
| 16 | // If these were defined per-platform selecting the right ones per platform |
| 17 | // would require multiple per-platform files at each call site. |
| 18 | type options struct { |
| 19 | readOnly bool |
| 20 | direct bool |
| 21 | exclusive bool |
| 22 | } |
| 23 | |
| 24 | func (o *options) collect(opts []Option) { |
| 25 | for _, f := range opts { |
| 26 | f(o) |
| 27 | } |
| 28 | } |
| 29 | |
| 30 | func (o *options) genericFlags() int { |
| 31 | if o.readOnly { |
| 32 | return os.O_RDONLY |
| 33 | } else { |
| 34 | return os.O_RDWR |
| 35 | } |
| 36 | } |
| 37 | |
| 38 | type Option func(*options) |
| 39 | |
| 40 | // WithReadonly opens the block device read-only. Any write calls will fail. |
| 41 | // Passed as an option to Open. |
| 42 | func WithReadonly(o *options) { |
| 43 | o.readOnly = true |
| 44 | } |
| 45 | |
| 46 | // WithDirect opens the block device bypassing any caching by the kernel. |
| 47 | // Note that additional alignment requirements might be imposed by the |
| 48 | // underlying device. |
| 49 | // Unsupported on non-Linux currently, will return an error. |
| 50 | func WithDirect(o *options) { |
| 51 | o.direct = true |
| 52 | } |
| 53 | |
| 54 | // WithExclusive tries to acquire a pseudo-exclusive lock (only with other |
| 55 | // exclusive FDs) over the block device. |
| 56 | // Unsupported on non-Linux currently, will return an error. |
| 57 | func WithExclusive(o *options) { |
| 58 | o.exclusive = true |
| 59 | } |
| 60 | |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 61 | // BlockDev represents a generic block device made up of equally-sized blocks. |
| 62 | // All offsets and intervals are expressed in bytes and must be aligned to |
| 63 | // BlockSize and are recommended to be aligned to OptimalBlockSize if feasible. |
| 64 | // Unless stated otherwise, intervals are inclusive-exclusive, i.e. the |
| 65 | // start byte is included but the end byte is not. |
| 66 | type BlockDev interface { |
| 67 | io.ReaderAt |
| 68 | io.WriterAt |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 69 | |
| 70 | // BlockCount returns the number of blocks on the block device or -1 if it |
| 71 | // is an image with an undefined size. |
| 72 | BlockCount() int64 |
| 73 | |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 74 | // BlockSize returns the block size of the block device in bytes. This must |
| 75 | // be a power of two and is commonly (but not always) either 512 or 4096. |
| 76 | BlockSize() int64 |
| 77 | |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 78 | // OptimalBlockSize returns the optimal block size in bytes for aligning |
| 79 | // to as well as issuing I/O. IO operations with block sizes below this |
| 80 | // one might incur read-write overhead. This is the larger of the physical |
| 81 | // block size and a device-reported value if available. |
| 82 | OptimalBlockSize() int64 |
| 83 | |
| 84 | // Discard discards a continuous set of blocks. Discarding means the |
| 85 | // underlying device gets notified that the data in these blocks is no |
| 86 | // longer needed. This can improve performance of the device device (as it |
| 87 | // no longer needs to preserve the unused data) as well as bulk erase |
| 88 | // operations. This command is advisory and not all implementations support |
| 89 | // it. The contents of discarded blocks are implementation-defined. |
| 90 | Discard(startByte int64, endByte int64) error |
| 91 | |
| 92 | // Zero zeroes a continouous set of blocks. On certain implementations this |
| 93 | // can be significantly faster than just calling Write with zeroes. |
| 94 | Zero(startByte, endByte int64) error |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 95 | |
| 96 | // Sync commits the current contents to stable storage. |
| 97 | Sync() error |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 98 | } |
| 99 | |
| 100 | func NewRWS(b BlockDev) *ReadWriteSeeker { |
| 101 | return &ReadWriteSeeker{b: b} |
| 102 | } |
| 103 | |
| 104 | // ReadWriteSeeker provides an adapter implementing ReadWriteSeeker on top of |
| 105 | // a blockdev. |
| 106 | type ReadWriteSeeker struct { |
| 107 | b BlockDev |
| 108 | currPos int64 |
| 109 | } |
| 110 | |
| 111 | func (s *ReadWriteSeeker) Read(p []byte) (n int, err error) { |
| 112 | n, err = s.b.ReadAt(p, s.currPos) |
| 113 | s.currPos += int64(n) |
| 114 | return |
| 115 | } |
| 116 | |
| 117 | func (s *ReadWriteSeeker) Write(p []byte) (n int, err error) { |
| 118 | n, err = s.b.WriteAt(p, s.currPos) |
| 119 | s.currPos += int64(n) |
| 120 | return |
| 121 | } |
| 122 | |
| 123 | func (s *ReadWriteSeeker) Seek(offset int64, whence int) (int64, error) { |
| 124 | switch whence { |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 125 | default: |
| 126 | return 0, errors.New("Seek: invalid whence") |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 127 | case io.SeekStart: |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 128 | case io.SeekCurrent: |
| 129 | offset += s.currPos |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 130 | case io.SeekEnd: |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 131 | offset += s.b.BlockCount() * s.b.BlockSize() |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 132 | } |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 133 | if offset < 0 { |
| 134 | return 0, errors.New("Seek: invalid offset") |
| 135 | } |
| 136 | s.currPos = offset |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 137 | return s.currPos, nil |
| 138 | } |
| 139 | |
| 140 | var ErrOutOfBounds = errors.New("write out of bounds") |
| 141 | |
| 142 | // NewSection returns a new Section, implementing BlockDev over that subset |
| 143 | // of blocks. The interval is inclusive-exclusive. |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 144 | func NewSection(b BlockDev, startBlock, endBlock int64) (*Section, error) { |
| 145 | if startBlock < 0 { |
| 146 | return nil, fmt.Errorf("invalid range: startBlock (%d) negative", startBlock) |
| 147 | } |
| 148 | if startBlock > endBlock { |
| 149 | return nil, fmt.Errorf("invalid range: startBlock (%d) bigger than endBlock (%d)", startBlock, endBlock) |
| 150 | } |
| 151 | if endBlock > b.BlockCount() { |
| 152 | return nil, fmt.Errorf("endBlock (%d) out of range (%d)", endBlock, b.BlockCount()) |
| 153 | } |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 154 | return &Section{ |
| 155 | b: b, |
| 156 | startBlock: startBlock, |
| 157 | endBlock: endBlock, |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 158 | }, nil |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 159 | } |
| 160 | |
| 161 | // Section implements BlockDev on a slice of another BlockDev given a startBlock |
| 162 | // and endBlock. |
| 163 | type Section struct { |
| 164 | b BlockDev |
| 165 | startBlock, endBlock int64 |
| 166 | } |
| 167 | |
| 168 | func (s *Section) ReadAt(p []byte, off int64) (n int, err error) { |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 169 | if off < 0 { |
| 170 | return 0, errors.New("blockdev.Section.ReadAt: negative offset") |
| 171 | } |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 172 | bOff := off + (s.startBlock * s.b.BlockSize()) |
| 173 | bytesToEnd := (s.endBlock * s.b.BlockSize()) - bOff |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 174 | if bytesToEnd < 0 { |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 175 | return 0, io.EOF |
| 176 | } |
| 177 | if bytesToEnd < int64(len(p)) { |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 178 | n, err := s.b.ReadAt(p[:bytesToEnd], bOff) |
| 179 | if err == nil { |
| 180 | err = io.EOF |
| 181 | } |
| 182 | return n, err |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 183 | } |
| 184 | return s.b.ReadAt(p, bOff) |
| 185 | } |
| 186 | |
| 187 | func (s *Section) WriteAt(p []byte, off int64) (n int, err error) { |
| 188 | bOff := off + (s.startBlock * s.b.BlockSize()) |
| 189 | bytesToEnd := (s.endBlock * s.b.BlockSize()) - bOff |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 190 | if off < 0 || bytesToEnd < 0 { |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 191 | return 0, ErrOutOfBounds |
| 192 | } |
| 193 | if bytesToEnd < int64(len(p)) { |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 194 | n, err := s.b.WriteAt(p[:bytesToEnd], bOff) |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 195 | if err != nil { |
| 196 | // If an error happened, prioritize that error |
| 197 | return n, err |
| 198 | } |
| 199 | // Otherwise, return ErrOutOfBounds as even short writes must return an |
| 200 | // error. |
| 201 | return n, ErrOutOfBounds |
| 202 | } |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 203 | return s.b.WriteAt(p, bOff) |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 204 | } |
| 205 | |
| 206 | func (s *Section) BlockCount() int64 { |
| 207 | return s.endBlock - s.startBlock |
| 208 | } |
| 209 | |
| 210 | func (s *Section) BlockSize() int64 { |
| 211 | return s.b.BlockSize() |
| 212 | } |
| 213 | |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 214 | func (s *Section) OptimalBlockSize() int64 { |
| 215 | return s.b.OptimalBlockSize() |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 216 | } |
| 217 | |
| 218 | func (s *Section) Discard(startByte, endByte int64) error { |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 219 | if err := validAlignedRange(s, startByte, endByte); err != nil { |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 220 | return err |
| 221 | } |
| Jan Schär | 0ea961c | 2024-04-11 13:41:40 +0200 | [diff] [blame] | 222 | offset := s.startBlock * s.b.BlockSize() |
| 223 | return s.b.Discard(offset+startByte, offset+endByte) |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 224 | } |
| 225 | |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 226 | func (s *Section) Zero(startByte, endByte int64) error { |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 227 | if err := validAlignedRange(s, startByte, endByte); err != nil { |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 228 | return err |
| 229 | } |
| Jan Schär | 0ea961c | 2024-04-11 13:41:40 +0200 | [diff] [blame] | 230 | offset := s.startBlock * s.b.BlockSize() |
| 231 | return s.b.Zero(offset+startByte, offset+endByte) |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 232 | } |
| 233 | |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 234 | func (s *Section) Sync() error { |
| 235 | return s.b.Sync() |
| 236 | } |
| 237 | |
| 238 | func validAlignedRange(b BlockDev, startByte, endByte int64) error { |
| 239 | if startByte < 0 { |
| 240 | return fmt.Errorf("invalid range: startByte (%d) negative", startByte) |
| 241 | } |
| 242 | if startByte > endByte { |
| 243 | return fmt.Errorf("invalid range: startByte (%d) bigger than endByte (%d)", startByte, endByte) |
| 244 | } |
| 245 | devLen := b.BlockCount() * b.BlockSize() |
| 246 | if endByte > devLen { |
| 247 | return fmt.Errorf("endByte (%d) out of range (%d)", endByte, devLen) |
| 248 | } |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 249 | if startByte%b.BlockSize() != 0 { |
| 250 | return fmt.Errorf("startByte (%d) needs to be aligned to block size (%d)", startByte, b.BlockSize()) |
| 251 | } |
| 252 | if endByte%b.BlockSize() != 0 { |
| 253 | return fmt.Errorf("endByte (%d) needs to be aligned to block size (%d)", endByte, b.BlockSize()) |
| 254 | } |
| Jan Schär | a6da171 | 2024-08-21 15:12:11 +0200 | [diff] [blame] | 255 | return nil |
| 256 | } |
| 257 | |
| 258 | // GenericZero implements software-based zeroing. This can be used to implement |
| 259 | // Zero when no acceleration is available or desired. |
| 260 | func GenericZero(b BlockDev, startByte, endByte int64) error { |
| 261 | if err := validAlignedRange(b, startByte, endByte); err != nil { |
| 262 | return err |
| 263 | } |
| Lorenz Brun | 1e0e3a4 | 2023-06-28 16:40:18 +0200 | [diff] [blame] | 264 | // Choose buffer size close to 16MiB or the range to be zeroed, whatever |
| 265 | // is smaller. |
| 266 | bufSizeTarget := int64(16 * 1024 * 1024) |
| 267 | if endByte-startByte < bufSizeTarget { |
| 268 | bufSizeTarget = endByte - startByte |
| 269 | } |
| 270 | bufSize := (bufSizeTarget / b.BlockSize()) * b.BlockSize() |
| 271 | buf := make([]byte, bufSize) |
| 272 | for i := startByte; i < endByte; i += bufSize { |
| 273 | if endByte-i < bufSize { |
| 274 | buf = buf[:endByte-i] |
| 275 | } |
| 276 | if _, err := b.WriteAt(buf, i); err != nil { |
| 277 | return fmt.Errorf("while writing zeroes: %w", err) |
| 278 | } |
| 279 | } |
| 280 | return nil |
| 281 | } |