blob: 3d3cadb1708d11659231b99ddaf497b262ac68c7 [file] [log] [blame]
Tim Windelschmidt6d33a432025-02-04 14:34:25 +01001// Copyright The Monogon Project Authors.
2// SPDX-License-Identifier: Apache-2.0
3
Lorenz Brunee17d832022-10-18 12:02:45 +00004// Package gpt implements reading and writing GUID Partition Tables as specified
5// in the UEFI Specification. It only implements up to 128 partitions per table
6// (same as most other implementations) as more would require a dynamic table
7// size, significantly complicating the code for little gain.
8package gpt
9
10import (
11 "bytes"
12 "encoding/binary"
13 "errors"
14 "fmt"
15 "hash/crc32"
Jan Schäre479eee2024-08-21 16:01:39 +020016 "slices"
Lorenz Brunee17d832022-10-18 12:02:45 +000017 "strings"
18 "unicode/utf16"
19
20 "github.com/google/uuid"
Lorenz Brun60d6b902023-06-20 16:02:40 +020021
Tim Windelschmidt9f21f532024-05-07 15:14:20 +020022 "source.monogon.dev/osbase/blockdev"
23 "source.monogon.dev/osbase/msguid"
Lorenz Brunee17d832022-10-18 12:02:45 +000024)
25
26var gptSignature = [8]byte{'E', 'F', 'I', ' ', 'P', 'A', 'R', 'T'}
Jan Schäre479eee2024-08-21 16:01:39 +020027
28const gptRevision uint32 = 0x00010000 // First 2 bytes major, second 2 bytes minor
Lorenz Brunee17d832022-10-18 12:02:45 +000029
30// See UEFI Specification 2.9 Table 5-5
31type header struct {
32 Signature [8]byte
33 Revision uint32
34 HeaderSize uint32
35 HeaderCRC32 uint32
36 _ [4]byte
37
38 HeaderBlock uint64
39 AlternateHeaderBlock uint64
40 FirstUsableBlock uint64
41 LastUsableBlock uint64
42
43 ID [16]byte
44
45 PartitionEntriesStartBlock uint64
46 PartitionEntryCount uint32
47 PartitionEntrySize uint32
48 PartitionEntriesCRC32 uint32
49}
50
51// See UEFI Specification 2.9 Table 5-6
52type partition struct {
53 Type [16]byte
54 ID [16]byte
55 FirstBlock uint64
56 LastBlock uint64
57 Attributes uint64
58 Name [36]uint16
59}
60
61var (
62 PartitionTypeEFISystem = uuid.MustParse("C12A7328-F81F-11D2-BA4B-00A0C93EC93B")
63)
64
Lorenz Brunee17d832022-10-18 12:02:45 +000065// Attribute is a bitfield of attributes set on a partition. Bits 0 to 47 are
66// reserved for UEFI specification use and all current assignments are in the
67// following const block. Bits 48 to 64 are available for per-Type use by
68// the organization controlling the partition Type.
69type Attribute uint64
70
71const (
72 // AttrRequiredPartition indicates that this partition is required for the
73 // platform to function. Mostly used by vendors to mark things like recovery
74 // partitions.
75 AttrRequiredPartition = 1 << 0
76 // AttrNoBlockIOProto indicates that EFI firmware must not provide an EFI
77 // block device (EFI_BLOCK_IO_PROTOCOL) for this partition.
78 AttrNoBlockIOProto = 1 << 1
79 // AttrLegacyBIOSBootable indicates to special-purpose software outside of
80 // UEFI that this partition can be booted using a traditional PC BIOS.
81 // Don't use this unless you know that you need it specifically.
82 AttrLegacyBIOSBootable = 1 << 2
83)
84
85// PerTypeAttrs returns the top 24 bits which are reserved for custom per-Type
86// attributes. The top 8 bits of the returned uint32 are always 0.
87func (a Attribute) PerTypeAttrs() uint32 {
88 return uint32(a >> 48)
89}
90
91// SetPerTypeAttrs sets the top 24 bits which are reserved for custom per-Type
92// attributes. It does not touch the lower attributes which are specified by the
93// UEFI specification. The top 8 bits of v are silently discarded.
94func (a *Attribute) SetPerTypeAttrs(v uint32) {
95 *a &= 0x000000FF_FFFFFFFF
96 *a |= Attribute(v) << 48
97}
98
99type Partition struct {
100 // Name of the partition, will be truncated if it expands to more than 36
101 // UTF-16 code points. Not all systems can display non-BMP code points.
102 Name string
103 // Type is the type of Table partition, can either be one of the predefined
104 // constants by the UEFI specification or a custom type identifier.
105 // Note that the all-zero UUID denotes an empty partition slot, so this
106 // MUST be set to something, otherwise it is not treated as a partition.
107 Type uuid.UUID
108 // ID is a unique identifier for this specific partition. It should be
109 // changed when cloning the partition.
110 ID uuid.UUID
111 // The first logical block of the partition (inclusive)
112 FirstBlock uint64
113 // The last logical block of the partition (inclusive)
114 LastBlock uint64
115 // Bitset of attributes of this partition.
116 Attributes Attribute
Lorenz Brunad131882023-06-28 16:42:20 +0200117
118 *blockdev.Section
Lorenz Brunee17d832022-10-18 12:02:45 +0000119}
120
121// SizeBlocks returns the size of the partition in blocks
122func (p *Partition) SizeBlocks() uint64 {
123 return 1 + p.LastBlock - p.FirstBlock
124}
125
126// IsUnused checks if the partition is unused, i.e. it is nil or its type is
127// the null UUID.
128func (p *Partition) IsUnused() bool {
129 if p == nil {
130 return true
131 }
Lorenz Brunad131882023-06-28 16:42:20 +0200132 return p.Type == uuid.Nil
133}
134
135// New returns an empty table on the given block device.
136// It does not read any existing GPT on the disk (use Read for that), nor does
137// it write anything until Write is called.
138func New(b blockdev.BlockDev) (*Table, error) {
139 return &Table{
140 b: b,
141 }, nil
Lorenz Brunee17d832022-10-18 12:02:45 +0000142}
143
144type Table struct {
145 // ID is the unique identifier of this specific disk / GPT.
146 // If this is left uninitialized/all-zeroes a new random ID is automatically
147 // generated when writing.
148 ID uuid.UUID
149
150 // Data put at the start of the very first block. Gets loaded and executed
151 // by a legacy BIOS bootloader. This can be used to make GPT-partitioned
152 // disks bootable by legacy systems or display a nice error message.
153 // Maximum length is 440 bytes, if that is exceeded Write returns an error.
154 // Should be left empty if the device is not bootable and/or compatibility
155 // with BIOS booting is not required. Only useful on x86 systems.
156 BootCode []byte
157
Lorenz Brunee17d832022-10-18 12:02:45 +0000158 // Partitions contains the list of partitions in this table. This is
Lorenz Brun5a90d302023-10-09 17:38:13 +0200159 // artificially limited to 128 partitions. Holes in the partition list are
160 // represented as nil values. Call IsUnused before checking any other
161 // properties of the partition.
Lorenz Brunee17d832022-10-18 12:02:45 +0000162 Partitions []*Partition
Lorenz Brunad131882023-06-28 16:42:20 +0200163
164 b blockdev.BlockDev
Lorenz Brunee17d832022-10-18 12:02:45 +0000165}
Lorenz Brunad131882023-06-28 16:42:20 +0200166
Lorenz Brunee17d832022-10-18 12:02:45 +0000167type addOptions struct {
168 preferEnd bool
169 keepEmptyEntries bool
170 alignment int64
171}
172
173// AddOption is a bitset controlling various
174type AddOption func(*addOptions)
175
176// WithPreferEnd tries to put the partition as close to the end as possible
177// instead of as close to the start.
178func WithPreferEnd() AddOption {
179 return func(options *addOptions) {
180 options.preferEnd = true
181 }
182}
183
184// WithKeepEmptyEntries does not fill up empty entries which are followed by
185// filled ones. It always appends the partition after the last used entry.
186// Without this flag, the partition is placed in the first empty entry.
187func WithKeepEmptyEntries() AddOption {
188 return func(options *addOptions) {
189 options.keepEmptyEntries = true
190 }
191}
192
193// WithAlignment allows aligning the partition start block to a non-default
194// value. By default, these are aligned to 1MiB.
Lorenz Brun60d6b902023-06-20 16:02:40 +0200195// Only use this flag if you are certain you need it, it can cause quite severe
Lorenz Brunee17d832022-10-18 12:02:45 +0000196// performance degradation under certain conditions.
Jan Schäre479eee2024-08-21 16:01:39 +0200197func WithAlignment(alignment int64) AddOption {
Lorenz Brunee17d832022-10-18 12:02:45 +0000198 return func(options *addOptions) {
Jan Schäre479eee2024-08-21 16:01:39 +0200199 options.alignment = alignment
Lorenz Brunee17d832022-10-18 12:02:45 +0000200 }
201}
202
203// AddPartition takes a pointer to a partition and adds it, placing it into
204// the first (or last using WithPreferEnd) continuous free space which fits it.
Jan Schäre479eee2024-08-21 16:01:39 +0200205// If size is -1, the partition will fill the largest free space.
Lorenz Brunee17d832022-10-18 12:02:45 +0000206// It writes the placement information (FirstBlock, LastBlock) back to p.
207// By default, AddPartition aligns FirstBlock to 1MiB boundaries, but this can
208// be overridden using WithAlignment.
Tim Windelschmidt0c57d342024-04-11 01:38:47 +0200209func (gpt *Table) AddPartition(p *Partition, size int64, options ...AddOption) error {
210 blockSize := gpt.b.BlockSize()
Lorenz Brunee17d832022-10-18 12:02:45 +0000211 var opts addOptions
212 // Align to 1MiB or the block size, whichever is bigger
213 opts.alignment = 1 * 1024 * 1024
Lorenz Brunad131882023-06-28 16:42:20 +0200214 if blockSize > opts.alignment {
215 opts.alignment = blockSize
Lorenz Brunee17d832022-10-18 12:02:45 +0000216 }
217 for _, o := range options {
218 o(&opts)
219 }
Jan Schäre479eee2024-08-21 16:01:39 +0200220 if opts.alignment <= 0 {
221 return fmt.Errorf("alignment (%d bytes) must be positive", opts.alignment)
222 }
Lorenz Brunad131882023-06-28 16:42:20 +0200223 if opts.alignment%blockSize != 0 {
224 return fmt.Errorf("requested alignment (%d bytes) is not an integer multiple of the block size (%d), unable to align", opts.alignment, blockSize)
Lorenz Brunee17d832022-10-18 12:02:45 +0000225 }
Jan Schäre479eee2024-08-21 16:01:39 +0200226 alignBlocks := opts.alignment / blockSize
Lorenz Brunad131882023-06-28 16:42:20 +0200227 if p.ID == uuid.Nil {
228 p.ID = uuid.New()
229 }
230
Tim Windelschmidt0c57d342024-04-11 01:38:47 +0200231 fs, _, err := gpt.GetFreeSpaces()
Lorenz Brunee17d832022-10-18 12:02:45 +0000232 if err != nil {
Tim Windelschmidt5f1a7de2024-09-19 02:00:14 +0200233 return fmt.Errorf("unable to determine free space: %w", err)
Lorenz Brunee17d832022-10-18 12:02:45 +0000234 }
235 if opts.preferEnd {
236 // Reverse fs slice to start iteration at the end
Jan Schäre479eee2024-08-21 16:01:39 +0200237 slices.Reverse(fs)
Lorenz Brunee17d832022-10-18 12:02:45 +0000238 }
Jan Schäre479eee2024-08-21 16:01:39 +0200239 var largestFreeSpace int64
Lorenz Brunee17d832022-10-18 12:02:45 +0000240 for _, freeInt := range fs {
Jan Schäre479eee2024-08-21 16:01:39 +0200241 startAligned := (freeInt[0] + alignBlocks - 1) / alignBlocks * alignBlocks
242 freeBlocks := freeInt[1] - startAligned
243 if freeBlocks > largestFreeSpace {
244 largestFreeSpace = freeBlocks
Lorenz Brunee17d832022-10-18 12:02:45 +0000245 }
246 }
247
Jan Schäre479eee2024-08-21 16:01:39 +0200248 // Number of blocks the partition should occupy, rounded up.
249 blocks := (size + blockSize - 1) / blockSize
250 if size == -1 {
Jan Schär02d72172024-09-02 17:47:27 +0200251 // When size is -1, use the largest free space. Align the size to ensure
252 // that the partition does not overlap the hardware blocks containing the
253 // alternate GPT.
254 blocks = largestFreeSpace / alignBlocks * alignBlocks
255 if blocks == 0 {
Jan Schäre479eee2024-08-21 16:01:39 +0200256 return errors.New("no free space")
257 }
Jan Schäre479eee2024-08-21 16:01:39 +0200258 } else if size <= 0 {
259 return fmt.Errorf("partition size (%d bytes) must be positive or the special value -1", size)
260 }
261 for _, freeInt := range fs {
262 startAligned := (freeInt[0] + alignBlocks - 1) / alignBlocks * alignBlocks
263 freeBlocks := freeInt[1] - startAligned
264 if freeBlocks < blocks {
265 continue
266 }
267 if opts.preferEnd {
268 startAligned += (freeBlocks - blocks) / alignBlocks * alignBlocks
269 }
270 p.FirstBlock = uint64(startAligned)
271 p.LastBlock = uint64(startAligned + blocks - 1)
272
273 newPartPos := -1
274 if !opts.keepEmptyEntries {
275 for i, part := range gpt.Partitions {
276 if part.IsUnused() {
277 newPartPos = i
278 break
279 }
280 }
281 }
282 if newPartPos == -1 {
283 gpt.Partitions = append(gpt.Partitions, p)
284 } else {
285 gpt.Partitions[newPartPos] = p
286 }
287 p.Section, err = blockdev.NewSection(gpt.b, int64(p.FirstBlock), int64(p.LastBlock)+1)
288 if err != nil {
289 return fmt.Errorf("failed to create blockdev Section for partition: %w", err)
290 }
291 return nil
292 }
293
294 return fmt.Errorf("no space for partition of %d blocks, largest continuous free space after alignment is %d blocks", blocks, largestFreeSpace)
Lorenz Brunee17d832022-10-18 12:02:45 +0000295}
296
297// FirstUsableBlock returns the first usable (i.e. a partition can start there)
298// block.
Tim Windelschmidt0c57d342024-04-11 01:38:47 +0200299func (gpt *Table) FirstUsableBlock() int64 {
300 blockSize := gpt.b.BlockSize()
Lorenz Brunad131882023-06-28 16:42:20 +0200301 partitionEntryBlocks := (16384 + blockSize - 1) / blockSize
Lorenz Brunee17d832022-10-18 12:02:45 +0000302 return 2 + partitionEntryBlocks
303}
304
305// LastUsableBlock returns the last usable (i.e. a partition can end there)
306// block. This block is inclusive.
Tim Windelschmidt0c57d342024-04-11 01:38:47 +0200307func (gpt *Table) LastUsableBlock() int64 {
308 blockSize := gpt.b.BlockSize()
Lorenz Brunad131882023-06-28 16:42:20 +0200309 partitionEntryBlocks := (16384 + blockSize - 1) / blockSize
Tim Windelschmidt0c57d342024-04-11 01:38:47 +0200310 return gpt.b.BlockCount() - (2 + partitionEntryBlocks)
Lorenz Brunee17d832022-10-18 12:02:45 +0000311}
312
313// GetFreeSpaces returns a slice of tuples, each containing a half-closed
314// interval of logical blocks not occupied by the GPT itself or any partition.
315// The returned intervals are always in ascending order as well as
316// non-overlapping. It also returns if it detected any overlaps between
317// partitions or partitions and the GPT. It returns an error if and only if any
318// partition has its FirstBlock before the LastBlock or exceeds the amount of
319// blocks on the block device.
320//
321// Note that the most common use cases for this function are covered by
322// AddPartition, you're encouraged to use it instead.
Tim Windelschmidt0c57d342024-04-11 01:38:47 +0200323func (gpt *Table) GetFreeSpaces() ([][2]int64, bool, error) {
Lorenz Brunee17d832022-10-18 12:02:45 +0000324 // This implements an efficient algorithm for finding free intervals given
325 // a set of potentially overlapping occupying intervals. It uses O(n*log n)
326 // time for n being the amount of intervals, i.e. partitions. It uses O(n)
327 // additional memory. This makes it de facto infinitely scalable in the
328 // context of partition tables as the size of the block device is not part
329 // of its cyclomatic complexity and O(n*log n) is tiny for even very big
330 // partition tables.
331
Tim Windelschmidt0c57d342024-04-11 01:38:47 +0200332 blockCount := gpt.b.BlockCount()
Lorenz Brunad131882023-06-28 16:42:20 +0200333
Lorenz Brunee17d832022-10-18 12:02:45 +0000334 // startBlocks contains the start blocks (inclusive) of all occupied
335 // intervals.
336 var startBlocks []int64
337 // endBlocks contains the end blocks (exclusive!) of all occupied intervals.
338 // The interval at index i is given by [startBlock[i], endBlock[i]).
339 var endBlocks []int64
340
341 // Reserve the primary GPT interval including the protective MBR.
342 startBlocks = append(startBlocks, 0)
Tim Windelschmidt0c57d342024-04-11 01:38:47 +0200343 endBlocks = append(endBlocks, gpt.FirstUsableBlock())
Lorenz Brunee17d832022-10-18 12:02:45 +0000344
345 // Reserve the alternate GPT interval (needs +1 for exclusive interval)
Tim Windelschmidt0c57d342024-04-11 01:38:47 +0200346 startBlocks = append(startBlocks, gpt.LastUsableBlock()+1)
Lorenz Brunad131882023-06-28 16:42:20 +0200347 endBlocks = append(endBlocks, blockCount)
Lorenz Brunee17d832022-10-18 12:02:45 +0000348
Tim Windelschmidt0c57d342024-04-11 01:38:47 +0200349 for i, part := range gpt.Partitions {
Lorenz Brunee17d832022-10-18 12:02:45 +0000350 if part.IsUnused() {
351 continue
352 }
353 // Bail if partition does not contain a valid interval. These are open
354 // intervals, thus part.FirstBlock == part.LastBlock denotes a valid
355 // partition with a size of one block.
356 if part.FirstBlock > part.LastBlock {
357 return nil, false, fmt.Errorf("partition %d has a LastBlock smaller than its FirstBlock, its interval is [%d, %d]", i, part.FirstBlock, part.LastBlock)
358 }
Lorenz Brunad131882023-06-28 16:42:20 +0200359 if part.FirstBlock >= uint64(blockCount) || part.LastBlock >= uint64(blockCount) {
Lorenz Brunee17d832022-10-18 12:02:45 +0000360 return nil, false, fmt.Errorf("partition %d exceeds the block count of the block device", i)
361 }
362 startBlocks = append(startBlocks, int64(part.FirstBlock))
363 // Algorithm needs open-closed intervals, thus add +1 to the end.
364 endBlocks = append(endBlocks, int64(part.LastBlock)+1)
365 }
366 // Sort both sets of blocks independently in ascending order. Note that it
367 // is now no longer possible to extract the original intervals. Integers
368 // have no identity thus it doesn't matter if the sort is stable or not.
Jan Schäre479eee2024-08-21 16:01:39 +0200369 slices.Sort(startBlocks)
370 slices.Sort(endBlocks)
Lorenz Brunee17d832022-10-18 12:02:45 +0000371
372 var freeSpaces [][2]int64
373
374 // currentIntervals contains the number of intervals which contain the
375 // position currently being iterated over. If currentIntervals is ever
376 // bigger than 1, there is overlap within the given intervals.
377 currentIntervals := 0
378 var hasOverlap bool
379
380 // Iterate for as long as there are interval boundaries to be processed.
381 for len(startBlocks) != 0 || len(endBlocks) != 0 {
382 // Short-circuit boundary processing. If an interval ends at x and the
383 // next one starts at x (this is using half-open intervals), it would
384 // otherwise perform useless processing as well as create an empty free
385 // interval which would then need to be filtered back out.
386 if len(startBlocks) != 0 && len(endBlocks) != 0 && startBlocks[0] == endBlocks[0] {
387 startBlocks = startBlocks[1:]
388 endBlocks = endBlocks[1:]
389 continue
390 }
391 // Pick the lowest boundary from either startBlocks or endBlocks,
392 // preferring endBlocks if they are equal. Don't try to pick from empty
393 // slices.
394 if (len(startBlocks) != 0 && len(endBlocks) != 0 && startBlocks[0] < endBlocks[0]) || len(endBlocks) == 0 {
395 // If currentIntervals == 0 a free space region ends here.
396 // Since this algorithm creates the free space interval at the end
397 // of an occupied interval, for the first interval there is no free
398 // space entry. But in this case it's fine to just ignore it as the
399 // first interval always starts at 0 because of the GPT.
400 if currentIntervals == 0 && len(freeSpaces) != 0 {
401 freeSpaces[len(freeSpaces)-1][1] = startBlocks[0]
402 }
403 // This is the start of an interval, increase the number of active
404 // intervals.
405 currentIntervals++
406 hasOverlap = hasOverlap || currentIntervals > 1
407 // Drop processed startBlock from slice.
408 startBlocks = startBlocks[1:]
409 } else {
410 // This is the end of an interval, decrease the number of active
411 // intervals.
412 currentIntervals--
413 // If currentIntervals == 0 a free space region starts here.
414 // Same as with the startBlocks, ignore a potential free block after
415 // the final range as the GPT occupies the last blocks anyway.
416 if currentIntervals == 0 && len(startBlocks) != 0 {
417 freeSpaces = append(freeSpaces, [2]int64{endBlocks[0], 0})
418 }
419 endBlocks = endBlocks[1:]
420 }
421 }
422 return freeSpaces, hasOverlap, nil
423}
424
425// Overhead returns the number of blocks the GPT partitioning itself consumes,
426// i.e. aren't usable for user data.
427func Overhead(blockSize int64) int64 {
428 // 3 blocks + 2x 16384 bytes (partition entry space)
429 partitionEntryBlocks := (16384 + blockSize - 1) / blockSize
430 return 3 + (2 * partitionEntryBlocks)
431}
432
Lorenz Brunad131882023-06-28 16:42:20 +0200433// Write writes the two GPTs, first the alternate, then the primary to the
434// block device. If gpt.ID or any of the partition IDs are the all-zero UUID,
435// new random ones are generated and written back. If the output is supposed
436// to be reproducible, generate the UUIDs beforehand.
437func (gpt *Table) Write() error {
438 blockSize := gpt.b.BlockSize()
439 blockCount := gpt.b.BlockCount()
440 if blockSize < 512 {
Lorenz Brunee17d832022-10-18 12:02:45 +0000441 return errors.New("block size is smaller than 512 bytes, this is unsupported")
442 }
443 // Layout looks as follows:
444 // Block 0: Protective MBR
445 // Block 1: GPT Header
446 // Block 2-(16384 bytes): GPT partition entries
447 // Block (16384 bytes)-n: GPT partition entries alternate copy
448 // Block n: GPT Header alternate copy
Lorenz Brunad131882023-06-28 16:42:20 +0200449 partitionEntryCount := 128
450 if len(gpt.Partitions) > partitionEntryCount {
451 return errors.New("bigger-than default GPTs (>128 partitions) are unimplemented")
Lorenz Brunee17d832022-10-18 12:02:45 +0000452 }
453
Lorenz Brunad131882023-06-28 16:42:20 +0200454 partitionEntryBlocks := (16384 + blockSize - 1) / blockSize
455 if blockCount < 3+(2*partitionEntryBlocks) {
Lorenz Brunee17d832022-10-18 12:02:45 +0000456 return errors.New("not enough blocks to write GPT")
457 }
458
Lorenz Brunad131882023-06-28 16:42:20 +0200459 if gpt.ID == uuid.Nil {
Lorenz Brunee17d832022-10-18 12:02:45 +0000460 gpt.ID = uuid.New()
461 }
462
463 partSize := binary.Size(partition{})
Lorenz Brunee17d832022-10-18 12:02:45 +0000464 var partitionEntriesData bytes.Buffer
Lorenz Brunad131882023-06-28 16:42:20 +0200465 for i := 0; i < partitionEntryCount; i++ {
Lorenz Brunee17d832022-10-18 12:02:45 +0000466 if len(gpt.Partitions) <= i || gpt.Partitions[i] == nil {
Lorenz Brunad131882023-06-28 16:42:20 +0200467 // Write an empty entry
Lorenz Brunee17d832022-10-18 12:02:45 +0000468 partitionEntriesData.Write(make([]byte, partSize))
469 continue
470 }
471 p := gpt.Partitions[i]
Lorenz Brunad131882023-06-28 16:42:20 +0200472 if p.ID == uuid.Nil {
Lorenz Brunee17d832022-10-18 12:02:45 +0000473 p.ID = uuid.New()
474 }
475 rawP := partition{
Lorenz Brun60d6b902023-06-20 16:02:40 +0200476 Type: msguid.From(p.Type),
477 ID: msguid.From(p.ID),
Lorenz Brunee17d832022-10-18 12:02:45 +0000478 FirstBlock: p.FirstBlock,
479 LastBlock: p.LastBlock,
480 Attributes: uint64(p.Attributes),
481 }
482 nameUTF16 := utf16.Encode([]rune(p.Name))
483 // copy will automatically truncate if target is too short
484 copy(rawP.Name[:], nameUTF16)
485 binary.Write(&partitionEntriesData, binary.LittleEndian, rawP)
486 }
487
488 hdr := header{
489 Signature: gptSignature,
490 Revision: gptRevision,
491 HeaderSize: uint32(binary.Size(&header{})),
Lorenz Brun60d6b902023-06-20 16:02:40 +0200492 ID: msguid.From(gpt.ID),
Lorenz Brunee17d832022-10-18 12:02:45 +0000493
Lorenz Brunad131882023-06-28 16:42:20 +0200494 PartitionEntryCount: uint32(partitionEntryCount),
Lorenz Brunee17d832022-10-18 12:02:45 +0000495 PartitionEntrySize: uint32(partSize),
496
497 FirstUsableBlock: uint64(2 + partitionEntryBlocks),
Lorenz Brunad131882023-06-28 16:42:20 +0200498 LastUsableBlock: uint64(blockCount - (2 + partitionEntryBlocks)),
Lorenz Brunee17d832022-10-18 12:02:45 +0000499 }
500 hdr.PartitionEntriesCRC32 = crc32.ChecksumIEEE(partitionEntriesData.Bytes())
501
502 hdrChecksum := crc32.NewIEEE()
503
504 // Write alternate header first, as otherwise resizes are unsafe. If the
505 // alternate is currently not at the end of the block device, it cannot
506 // be found. Thus if the write operation is aborted abnormally, the
507 // primary GPT is corrupted and the alternate cannot be found because it
508 // is not at its canonical location. Rewriting the alternate first avoids
509 // this problem.
510
511 // Alternate header
Lorenz Brunad131882023-06-28 16:42:20 +0200512 hdr.HeaderBlock = uint64(blockCount - 1)
Lorenz Brunee17d832022-10-18 12:02:45 +0000513 hdr.AlternateHeaderBlock = 1
Lorenz Brunad131882023-06-28 16:42:20 +0200514 hdr.PartitionEntriesStartBlock = uint64(blockCount - (1 + partitionEntryBlocks))
Lorenz Brunee17d832022-10-18 12:02:45 +0000515
516 hdrChecksum.Reset()
517 hdr.HeaderCRC32 = 0
518 binary.Write(hdrChecksum, binary.LittleEndian, &hdr)
519 hdr.HeaderCRC32 = hdrChecksum.Sum32()
520
Lorenz Brunad131882023-06-28 16:42:20 +0200521 for partitionEntriesData.Len()%int(blockSize) != 0 {
522 partitionEntriesData.WriteByte(0x00)
523 }
524 if _, err := gpt.b.WriteAt(partitionEntriesData.Bytes(), int64(hdr.PartitionEntriesStartBlock)*blockSize); err != nil {
Lorenz Brunee17d832022-10-18 12:02:45 +0000525 return fmt.Errorf("failed to write alternate partition entries: %w", err)
526 }
Lorenz Brunee17d832022-10-18 12:02:45 +0000527
Lorenz Brunad131882023-06-28 16:42:20 +0200528 var hdrRaw bytes.Buffer
529 if err := binary.Write(&hdrRaw, binary.LittleEndian, &hdr); err != nil {
530 return fmt.Errorf("failed to encode alternate header: %w", err)
Lorenz Brunee17d832022-10-18 12:02:45 +0000531 }
Lorenz Brunad131882023-06-28 16:42:20 +0200532 for hdrRaw.Len()%int(blockSize) != 0 {
533 hdrRaw.WriteByte(0x00)
534 }
535 if _, err := gpt.b.WriteAt(hdrRaw.Bytes(), (blockCount-1)*blockSize); err != nil {
Tim Windelschmidt5f1a7de2024-09-19 02:00:14 +0200536 return fmt.Errorf("failed to write alternate header: %w", err)
Lorenz Brunee17d832022-10-18 12:02:45 +0000537 }
538
Jan Schärc39b1dc2024-08-26 17:21:14 +0200539 // Sync device after writing each GPT, to ensure there is at least one valid
540 // GPT at all times.
541 if err := gpt.b.Sync(); err != nil {
542 return fmt.Errorf("failed to sync device after writing alternate GPT: %w", err)
543 }
544
Lorenz Brunee17d832022-10-18 12:02:45 +0000545 // Primary header
Lorenz Brunee17d832022-10-18 12:02:45 +0000546 hdr.HeaderBlock = 1
Lorenz Brunad131882023-06-28 16:42:20 +0200547 hdr.AlternateHeaderBlock = uint64(blockCount - 1)
Lorenz Brunee17d832022-10-18 12:02:45 +0000548 hdr.PartitionEntriesStartBlock = 2
549
550 hdrChecksum.Reset()
551 hdr.HeaderCRC32 = 0
552 binary.Write(hdrChecksum, binary.LittleEndian, &hdr)
553 hdr.HeaderCRC32 = hdrChecksum.Sum32()
554
Lorenz Brunad131882023-06-28 16:42:20 +0200555 hdrRaw.Reset()
556
557 if err := makeProtectiveMBR(&hdrRaw, blockCount, gpt.BootCode); err != nil {
558 return fmt.Errorf("failed creating protective MBR: %w", err)
559 }
560 for hdrRaw.Len()%int(blockSize) != 0 {
561 hdrRaw.WriteByte(0x00)
562 }
563 if err := binary.Write(&hdrRaw, binary.LittleEndian, &hdr); err != nil {
564 panic(err)
565 }
566 for hdrRaw.Len()%int(blockSize) != 0 {
567 hdrRaw.WriteByte(0x00)
568 }
569 hdrRaw.Write(partitionEntriesData.Bytes())
570 for hdrRaw.Len()%int(blockSize) != 0 {
571 hdrRaw.WriteByte(0x00)
Lorenz Brunee17d832022-10-18 12:02:45 +0000572 }
573
Lorenz Brunad131882023-06-28 16:42:20 +0200574 if _, err := gpt.b.WriteAt(hdrRaw.Bytes(), 0); err != nil {
575 return fmt.Errorf("failed to write primary GPT: %w", err)
Lorenz Brunee17d832022-10-18 12:02:45 +0000576 }
Jan Schärc39b1dc2024-08-26 17:21:14 +0200577 if err := gpt.b.Sync(); err != nil {
578 return fmt.Errorf("failed to sync device after writing primary GPT: %w", err)
579 }
Lorenz Brunee17d832022-10-18 12:02:45 +0000580 return nil
581}
582
Lorenz Brunad131882023-06-28 16:42:20 +0200583// Read reads a Table from a block device.
584func Read(r blockdev.BlockDev) (*Table, error) {
585 if Overhead(r.BlockSize()) > r.BlockCount() {
Lorenz Brunee17d832022-10-18 12:02:45 +0000586 return nil, errors.New("disk cannot contain a GPT as the block count is too small to store one")
587 }
Lorenz Brunad131882023-06-28 16:42:20 +0200588 zeroBlock := make([]byte, r.BlockSize())
589 if _, err := r.ReadAt(zeroBlock, 0); err != nil {
590 return nil, fmt.Errorf("failed to read first block: %w", err)
Lorenz Brunee17d832022-10-18 12:02:45 +0000591 }
592
593 var m mbr
594 if err := binary.Read(bytes.NewReader(zeroBlock[:512]), binary.LittleEndian, &m); err != nil {
595 panic(err) // Read is from memory and with enough data
596 }
597 // The UEFI standard says that the only acceptable MBR for a GPT-partitioned
598 // device is a pure protective MBR with one partition of type 0xEE covering
599 // the entire disk. But reality is sadly not so simple. People have come up
600 // with hacks like Hybrid MBR which is basically a way to expose partitions
601 // as both GPT partitions and MBR partitions. There are also GPTs without
602 // any MBR at all.
603 // Following the standard strictly when reading means that this library
604 // would fail to read valid GPT disks where such schemes are employed.
605 // On the other hand just looking at the GPT signature is also dangerous
606 // as not all tools clear the second block where the GPT resides when
607 // writing an MBR, which results in reading a wrong/obsolete GPT.
608 // As a pragmatic solution this library treats any disk as GPT-formatted if
609 // the first block does not contain an MBR signature or at least one MBR
610 // partition has type 0xEE (GPT). It does however not care in which slot
611 // this partition is or if it begins at the start of the disk.
612 //
613 // Note that the block signatures for MBR and FAT are shared. This is a
614 // historical artifact from DOS. It is not reliably possible to
615 // differentiate the two as either has boot code where the other has meta-
616 // data and both lack any checksums. Because the MBR partition table is at
617 // the very end of the FAT bootcode section the following code always
618 // assumes that it is dealing with an MBR. This is both more likely and
619 // the 0xEE marker is rarer and thus more specific than FATs 0x00, 0x80 and
620 // 0x02.
621 var bootCode []byte
622 hasDOSBootSig := m.Signature == mbrSignature
623 if hasDOSBootSig {
624 var isGPT bool
625 for _, p := range m.PartitionRecords {
626 if p.Type == 0xEE {
627 isGPT = true
628 }
629 }
630 // Note that there is a small but non-zero chance that isGPT is true
631 // for a raw FAT filesystem if the bootcode contains a "valid" MBR.
632 // The next error message mentions that possibility.
633 if !isGPT {
634 return nil, errors.New("block device contains an MBR table without a GPT marker or a raw FAT filesystem")
635 }
636 // Trim right zeroes away as they are padded back when writing. This
637 // makes BootCode empty when it is all-zeros, making it easier to work
638 // with while still round-tripping correctly.
639 bootCode = bytes.TrimRight(m.BootCode[:], "\x00")
640 }
641 // Read the primary GPT. If it is damaged and/or broken, read the alternate.
Lorenz Brunad131882023-06-28 16:42:20 +0200642 primaryGPT, err := readSingleGPT(r, 1)
Lorenz Brunee17d832022-10-18 12:02:45 +0000643 if err != nil {
Lorenz Brunad131882023-06-28 16:42:20 +0200644 alternateGPT, err2 := readSingleGPT(r, r.BlockCount()-1)
Lorenz Brunee17d832022-10-18 12:02:45 +0000645 if err2 != nil {
Tim Windelschmidt5f1a7de2024-09-19 02:00:14 +0200646 return nil, fmt.Errorf("failed to read both GPTs: primary GPT (%w), secondary GPT (%w)", err, err2)
Lorenz Brunee17d832022-10-18 12:02:45 +0000647 }
648 alternateGPT.BootCode = bootCode
649 return alternateGPT, nil
650 }
651 primaryGPT.BootCode = bootCode
652 return primaryGPT, nil
653}
654
Lorenz Brunad131882023-06-28 16:42:20 +0200655func readSingleGPT(r blockdev.BlockDev, headerBlockPos int64) (*Table, error) {
656 hdrBlock := make([]byte, r.BlockSize())
657 if _, err := r.ReadAt(hdrBlock, r.BlockSize()*headerBlockPos); err != nil {
Lorenz Brunee17d832022-10-18 12:02:45 +0000658 return nil, fmt.Errorf("failed to read GPT header block: %w", err)
659 }
660 hdrBlockReader := bytes.NewReader(hdrBlock)
661 var hdr header
662 if err := binary.Read(hdrBlockReader, binary.LittleEndian, &hdr); err != nil {
663 panic(err) // Read from memory with enough bytes, should not fail
664 }
665 if hdr.Signature != gptSignature {
666 return nil, errors.New("no GPT signature found")
667 }
668 if hdr.HeaderSize < uint32(binary.Size(hdr)) {
669 return nil, fmt.Errorf("GPT header size is too small, likely corrupted")
670 }
Lorenz Brunad131882023-06-28 16:42:20 +0200671 if int64(hdr.HeaderSize) > r.BlockSize() {
Lorenz Brunee17d832022-10-18 12:02:45 +0000672 return nil, fmt.Errorf("GPT header size is bigger than block size, likely corrupted")
673 }
674 // Use reserved bytes to hash, but do not expose them to the user.
675 // If someone has a need to process them, they should extend this library
676 // with whatever an updated UEFI specification contains.
677 // It has been considered to store these in the user-exposed GPT struct to
678 // be able to round-trip them cleanly, but there is significant complexity
679 // and risk involved in doing so.
680 reservedBytes := hdrBlock[binary.Size(hdr):hdr.HeaderSize]
681 hdrExpectedCRC := hdr.HeaderCRC32
682 hdr.HeaderCRC32 = 0
683 hdrCRC := crc32.NewIEEE()
684 binary.Write(hdrCRC, binary.LittleEndian, &hdr)
685 hdrCRC.Write(reservedBytes)
686 if hdrCRC.Sum32() != hdrExpectedCRC {
687 return nil, fmt.Errorf("GPT header checksum mismatch, probably corrupted")
688 }
689 if hdr.HeaderBlock != uint64(headerBlockPos) {
690 return nil, errors.New("GPT header indicates wrong block")
691 }
692 if hdr.PartitionEntrySize < uint32(binary.Size(partition{})) {
693 return nil, errors.New("partition entry size too small")
694 }
Lorenz Brunad131882023-06-28 16:42:20 +0200695 if hdr.PartitionEntriesStartBlock > uint64(r.BlockCount()) {
Lorenz Brunee17d832022-10-18 12:02:45 +0000696 return nil, errors.New("partition entry start block is out of range")
697 }
698 // Sanity-check total size of the partition entry area. Otherwise, this is a
699 // trivial DoS as it could cause allocation of gigabytes of memory.
700 // 4MiB is equivalent to around 45k partitions at the current size.
701 // I know of no operating system which would handle even a fraction of this.
702 if uint64(hdr.PartitionEntryCount)*uint64(hdr.PartitionEntrySize) > 4*1024*1024 {
703 return nil, errors.New("partition entry area bigger than 4MiB, refusing to read")
704 }
705 partitionEntryData := make([]byte, hdr.PartitionEntrySize*hdr.PartitionEntryCount)
Lorenz Brunad131882023-06-28 16:42:20 +0200706 if _, err := r.ReadAt(partitionEntryData, r.BlockSize()*int64(hdr.PartitionEntriesStartBlock)); err != nil {
Lorenz Brunee17d832022-10-18 12:02:45 +0000707 return nil, fmt.Errorf("failed to read partition entries: %w", err)
708 }
709 if crc32.ChecksumIEEE(partitionEntryData) != hdr.PartitionEntriesCRC32 {
710 return nil, errors.New("GPT partition entry table checksum mismatch")
711 }
712 var g Table
Lorenz Brun60d6b902023-06-20 16:02:40 +0200713 g.ID = msguid.To(hdr.ID)
Lorenz Brunee17d832022-10-18 12:02:45 +0000714 for i := uint32(0); i < hdr.PartitionEntryCount; i++ {
715 entryReader := bytes.NewReader(partitionEntryData[i*hdr.PartitionEntrySize : (i+1)*hdr.PartitionEntrySize])
716 var part partition
717 if err := binary.Read(entryReader, binary.LittleEndian, &part); err != nil {
718 panic(err) // Should not happen
719 }
720 // If the partition type is the all-zero UUID, this slot counts as
721 // unused.
Lorenz Brunad131882023-06-28 16:42:20 +0200722 if part.Type == uuid.Nil {
Lorenz Brunee17d832022-10-18 12:02:45 +0000723 g.Partitions = append(g.Partitions, nil)
724 continue
725 }
726 g.Partitions = append(g.Partitions, &Partition{
Lorenz Brun60d6b902023-06-20 16:02:40 +0200727 ID: msguid.To(part.ID),
728 Type: msguid.To(part.Type),
Lorenz Brunee17d832022-10-18 12:02:45 +0000729 Name: strings.TrimRight(string(utf16.Decode(part.Name[:])), "\x00"),
730 FirstBlock: part.FirstBlock,
731 LastBlock: part.LastBlock,
732 Attributes: Attribute(part.Attributes),
733 })
734 }
735 // Remove long list of nils at the end as it's inconvenient to work with
736 // (append doesn't work, debug prints are very long) and it round-trips
737 // correctly even without it as it gets zero-padded when writing anyway.
738 var maxValidPartition int
739 for i, p := range g.Partitions {
740 if !p.IsUnused() {
741 maxValidPartition = i
742 }
743 }
744 g.Partitions = g.Partitions[:maxValidPartition+1]
Lorenz Brunad131882023-06-28 16:42:20 +0200745 g.b = r
Lorenz Brunee17d832022-10-18 12:02:45 +0000746 return &g, nil
747}