m/p/fat32: add fat32 package

The fat32 package is a write-only implementation of the FAT32
filesystem. It works quite unlike a normal file system by first
determining the entire disk layout and then sequentially writing
out everything. This allows it to have a fully streaming output without
needing to seek at all.
Because all IO is sequential the implementation is extremely fast and
can potentially even leverage things like the copy_file_range syscall.
This means however that all files and readers need to be prepared ahead
of time, it is not possible to make decisions during the writing
process.
It is also possible to generate "right-sized" filesystems by not
specifying an explicit block count. In that case the resulting image
will contain exactly as many clusters as needed.

Change-Id: I49bf2ce09b26a7d628a39a0dd0745bca61c1c4da
Reviewed-on: https://review.monogon.dev/c/monogon/+/841
Tested-by: Jenkins CI
Reviewed-by: Sergiusz Bazanski <serge@monogon.tech>
diff --git a/metropolis/pkg/fat32/BUILD.bazel b/metropolis/pkg/fat32/BUILD.bazel
new file mode 100644
index 0000000..d62196a
--- /dev/null
+++ b/metropolis/pkg/fat32/BUILD.bazel
@@ -0,0 +1,37 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+load("//metropolis/test/ktest:ktest.bzl", "ktest")
+
+go_library(
+    name = "fat32",
+    srcs = [
+        "dos83.go",
+        "fat32.go",
+        "structs.go",
+        "utils.go",
+    ],
+    importpath = "source.monogon.dev/metropolis/pkg/fat32",
+    visibility = ["//visibility:public"],
+)
+
+go_test(
+    name = "fat32_test",
+    srcs = [
+        "fsck_test.go",
+        "linux_test.go",
+        "structs_test.go",
+    ],
+    data = ["@com_github_dosfstools_dosfstools//:fsck"],
+    embed = [":fat32"],
+    deps = [
+        "//metropolis/cli/pkg/datafile",
+        "@com_github_stretchr_testify//assert",
+        "@com_github_stretchr_testify//require",
+        "@org_golang_x_mod//semver",
+        "@org_golang_x_sys//unix",
+    ],
+)
+
+ktest(
+    cmdline = "ramdisk_size=266240",
+    tester = ":fat32_test",
+)
diff --git a/metropolis/pkg/fat32/dos83.go b/metropolis/pkg/fat32/dos83.go
new file mode 100644
index 0000000..4956468
--- /dev/null
+++ b/metropolis/pkg/fat32/dos83.go
@@ -0,0 +1,93 @@
+package fat32
+
+import (
+	"errors"
+	"fmt"
+	"math"
+	"regexp"
+	"strings"
+)
+
+// By default, DOS names would be encoded as what Microsoft calls the OEM
+// code page. This is however dependant on the code page settings of the
+// OS reading the file name as it's not mentioned in FAT32 metadata.
+// To get maximum compatibility and make it easy to read in hex editors
+// this only encodes ASCII characters and not any specific code page.
+// This can still result in garbled data when using a non-latin code page,
+// but this is unavoidable.
+// This is legal as there is no specific requirements for generating these
+// DOS names and any semi-modern system should use the unicode filenames
+// anyways.
+
+var invalidDOSNameChar = regexp.MustCompile("^[^A-Z0-9!#$%&'()@^_\x60{}~-]$")
+
+// validDOSName matches names which are valid and unique DOS 8.3 file names as
+// well as valid ASCII
+var validDOSName = regexp.MustCompile(`^^([A-Z0-9!#$%&'()@^_\x60{}~-]{0,8})(\.[A-Z0-9!#$%&'()-@^_\x60{}~-]{1,3})?$`)
+
+func makeUniqueDOSNames(inodes []*Inode) error {
+	taken := make(map[[11]byte]bool)
+	var lossyNameInodes []*Inode
+	// Make two passes to ensure that names can always be passed through even
+	// if they would conflict with a generated name.
+	for _, i := range inodes {
+		for j := range i.dosName {
+			i.dosName[j] = ' '
+		}
+		nameUpper := strings.ToUpper(i.Name)
+		dosParts := validDOSName.FindStringSubmatch(nameUpper)
+		if dosParts != nil {
+			// Name is pass-through
+			copy(i.dosName[:8], []byte(dosParts[1]))
+			if len(dosParts[2]) > 0 {
+				// Skip the dot, it is implicit
+				copy(i.dosName[8:], []byte(dosParts[2][1:]))
+			}
+			if taken[i.dosName] {
+				// Mapping is unique, complain about the actual file name, not
+				// the 8.3 one
+				return fmt.Errorf("name %q occurs more than once in the same directory", i.Name)
+			}
+			taken[i.dosName] = true
+			continue
+		}
+		lossyNameInodes = append(lossyNameInodes, i)
+	}
+	// Willfully ignore the recommended short name generation algorithm as it
+	// requires tons of bookkeeping and doesn't result in stable names so
+	// cannot be relied on anyway.
+	// A FAT32 directory is limited to 2^16 entries (in practice less than half
+	// of that because of long file name entries), so 4 hex characters
+	// guarantee uniqueness, regardless of the rest of name.
+	var nameIdx int
+	for _, i := range lossyNameInodes {
+		nameUpper := strings.ToUpper(i.Name)
+		dotParts := strings.Split(nameUpper, ".")
+		for j := range dotParts {
+			// Remove all invalid chars
+			dotParts[j] = invalidDOSNameChar.ReplaceAllString(dotParts[j], "")
+		}
+		var fileName string
+		lastDotPart := dotParts[len(dotParts)-1]
+		if len(dotParts) > 1 && len(dotParts[0]) > 0 && len(lastDotPart) > 0 {
+			// We have a valid 8.3 extension
+			copy(i.dosName[8:], lastDotPart)
+			fileName = strings.Join(dotParts[:len(dotParts)-1], "")
+		} else {
+			fileName = strings.Join(dotParts[:], "")
+		}
+		copy(i.dosName[:4], fileName)
+
+		for {
+			copy(i.dosName[4:], fmt.Sprintf("%04X", nameIdx))
+			nameIdx++
+			if nameIdx >= math.MaxUint16 {
+				return errors.New("invariant violated: unable to find unique name with 16 bit counter in 16 bit space")
+			}
+			if !taken[i.dosName] {
+				break
+			}
+		}
+	}
+	return nil
+}
diff --git a/metropolis/pkg/fat32/fat32.go b/metropolis/pkg/fat32/fat32.go
new file mode 100644
index 0000000..d734eb7
--- /dev/null
+++ b/metropolis/pkg/fat32/fat32.go
@@ -0,0 +1,533 @@
+// Package fat32 implements a writer for the FAT32 filesystem.
+package fat32
+
+import (
+	"crypto/rand"
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"io"
+	"io/fs"
+	"math"
+	"math/bits"
+	"strings"
+	"time"
+	"unicode/utf16"
+)
+
+// This package contains multiple references to the FAT32 specification, called
+// Microsoft Extensible Firmware Initiative FAT32 File System Specification
+// version 1.03 (just called the spec from now on). You can get it at
+// https://download.microsoft.com/download/0/8/4/\
+// 084c452b-b772-4fe5-89bb-a0cbf082286a/fatgen103.doc
+
+type Options struct {
+	// Size of a logical block on the block device. Needs to be a power of two
+	// equal or bigger than 512. If left at zero, defaults to 512.
+	BlockSize uint16
+
+	// Number of blocks the filesystem should span. If zero, it will be exactly
+	// as large as it needs to be.
+	BlockCount uint32
+
+	// Human-readable filesystem label. Maximum 10 bytes (gets cut off), should
+	// be uppercase alphanumeric.
+	Label string
+
+	// Filesystem identifier. If unset (i.e. left at zero) a random value will
+	// be assigned by WriteFS.
+	ID uint32
+}
+
+// SizedReader is an io.Reader with a known size
+type SizedReader interface {
+	io.Reader
+	Size() int64
+}
+
+// Attribute is a bitset of flags set on an inode.
+// See also the spec page 24
+type Attribute uint8
+
+const (
+	// AttrReadOnly marks a file as read-only
+	AttrReadOnly Attribute = 0x01
+	// AttrHidden indicates that directory listings should not show this file.
+	AttrHidden Attribute = 0x02
+	// AttrSystem indicates that this is an operating system file.
+	AttrSystem Attribute = 0x04
+	// AttrDirectory indicates that this is a directory and not a file.
+	AttrDirectory Attribute = 0x10
+	// AttrArchive canonically indicates that a file has been created/modified
+	// since the last backup. Its use in practice is inconsistent.
+	AttrArchive Attribute = 0x20
+)
+
+// Inode is file or directory on the FAT32 filesystem. Note that the concept
+// of an inode doesn't really exist on FAT32, its directories are just special
+// files.
+type Inode struct {
+	// Name of the file or directory (not including its path)
+	Name string
+	// Time the file or directory was last modified
+	ModTime time.Time
+	// Time the file or directory was created
+	CreateTime time.Time
+	// Attributes
+	Attrs Attribute
+	// Children of this directory (only valid when Attrs has AttrDirectory set)
+	Children []*Inode
+	// Content of this file
+	// Only valid when Attrs doesn't have AttrDirectory set.
+	Content SizedReader
+
+	// Filled out on placement and write-out
+	startCluster int
+	parent       *Inode
+	dosName      [11]byte
+}
+
+// Number of LFN entries + normal entry (all 32 bytes)
+func (i Inode) metaSize() (int64, error) {
+	fileNameUTF16 := utf16.Encode([]rune(i.Name))
+	// VFAT file names are null-terminated
+	fileNameUTF16 = append(fileNameUTF16, 0x00)
+	if len(fileNameUTF16) > 255 {
+		return 0, errors.New("file name too long, maximum is 255 UTF-16 code points")
+	}
+
+	// ⌈len(fileNameUTF16)/codepointsPerEntry⌉
+	numEntries := (len(fileNameUTF16) + codepointsPerEntry - 1) / codepointsPerEntry
+	return (int64(numEntries) + 1) * 32, nil
+}
+
+func lfnChecksum(dosName [11]byte) uint8 {
+	var sum uint8
+	for _, b := range dosName {
+		sum = ((sum & 1) << 7) + (sum >> 1) + b
+	}
+	return sum
+}
+
+// writeMeta writes information about this inode into the contents of the parent
+// inode.
+func (i Inode) writeMeta(w io.Writer) error {
+	fileNameUTF16 := utf16.Encode([]rune(i.Name))
+	// VFAT file names are null-terminated
+	fileNameUTF16 = append(fileNameUTF16, 0x00)
+	if len(fileNameUTF16) > 255 {
+		return errors.New("file name too long, maximum is 255 UTF-16 code points")
+	}
+
+	// ⌈len(fileNameUTF16)/codepointsPerEntry⌉
+	numEntries := (len(fileNameUTF16) + codepointsPerEntry - 1) / codepointsPerEntry
+	// Fill up to space in given number of entries with fill code point 0xffff
+	fillCodePoints := (numEntries * codepointsPerEntry) - len(fileNameUTF16)
+	for j := 0; j < fillCodePoints; j++ {
+		fileNameUTF16 = append(fileNameUTF16, 0xffff)
+	}
+
+	// Write entries in reverse order
+	for j := numEntries; j > 0; j-- {
+		// Index of the code point being processed
+		cpIdx := (j - 1) * codepointsPerEntry
+		var entry lfnEntry
+		entry.Checksum = lfnChecksum(i.dosName)
+		// Downcast is safe as i <= numEntries <= ⌈255/codepointsPerEntry⌉
+		entry.SequenceNumber = uint8(j)
+		if j == numEntries {
+			entry.SequenceNumber |= lastSequenceNumberFlag
+		}
+		entry.Attributes = 0x0F
+		copy(entry.NamePart1[:], fileNameUTF16[cpIdx:])
+		cpIdx += len(entry.NamePart1)
+		copy(entry.NamePart2[:], fileNameUTF16[cpIdx:])
+		cpIdx += len(entry.NamePart2)
+		copy(entry.NamePart3[:], fileNameUTF16[cpIdx:])
+		cpIdx += len(entry.NamePart3)
+
+		if err := binary.Write(w, binary.LittleEndian, entry); err != nil {
+			return err
+		}
+	}
+	selfSize, err := i.dataSize()
+	if err != nil {
+		return err
+	}
+	if selfSize >= 4*1024*1024*1024 {
+		return errors.New("single file size exceeds 4GiB which is prohibited in FAT32")
+	}
+	if i.Attrs&AttrDirectory != 0 {
+		selfSize = 0 // Directories don't have an explicit size
+	}
+	date, t, _ := timeToMsDosTime(i.ModTime)
+	if err := binary.Write(w, binary.LittleEndian, &dirEntry{
+		DOSName:           i.dosName,
+		Attributes:        uint8(i.Attrs),
+		FirstClusterHigh:  uint16(i.startCluster >> 16),
+		LastWrittenToTime: t,
+		LastWrittenToDate: date,
+		FirstClusterLow:   uint16(i.startCluster & 0xffff),
+		FileSize:          uint32(selfSize),
+	}); err != nil {
+		return err
+	}
+	return nil
+}
+
+// writeData writes the contents of this inode (including possible metadata
+// of its children, but not its children's data)
+func (i Inode) writeData(w io.Writer, volumeLabel [11]byte) error {
+	if i.Attrs&AttrDirectory != 0 {
+		if i.parent == nil {
+			if err := binary.Write(w, binary.LittleEndian, &dirEntry{
+				DOSName:    volumeLabel,
+				Attributes: 0x08, // Volume ID, internal use only
+			}); err != nil {
+				return err
+			}
+		} else {
+			date, t, _ := timeToMsDosTime(i.ModTime)
+			cdate, ctime, ctens := timeToMsDosTime(i.CreateTime)
+			if err := binary.Write(w, binary.LittleEndian, &dirEntry{
+				DOSName:           [11]byte{'.', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '},
+				CreationDate:      cdate,
+				CreationTime:      ctime,
+				CreationTenMilli:  ctens,
+				LastWrittenToTime: t,
+				LastWrittenToDate: date,
+				Attributes:        uint8(i.Attrs),
+				FirstClusterHigh:  uint16(i.startCluster >> 16),
+				FirstClusterLow:   uint16(i.startCluster & 0xffff),
+			}); err != nil {
+				return err
+			}
+			startCluster := i.parent.startCluster
+			if i.parent.parent == nil {
+				// Special case: When the dotdot directory points to the root
+				// directory, the start cluster is defined to be zero even if
+				// it isn't.
+				startCluster = 0
+			}
+			// Time is intentionally taken from this directory, not the parent
+			if err := binary.Write(w, binary.LittleEndian, &dirEntry{
+				DOSName:           [11]byte{'.', '.', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '},
+				LastWrittenToTime: t,
+				LastWrittenToDate: date,
+				Attributes:        uint8(AttrDirectory),
+				FirstClusterHigh:  uint16(startCluster >> 16),
+				FirstClusterLow:   uint16(startCluster & 0xffff),
+			}); err != nil {
+				return err
+			}
+		}
+		err := makeUniqueDOSNames(i.Children)
+		if err != nil {
+			return err
+		}
+		for _, c := range i.Children {
+			if err := c.writeMeta(w); err != nil {
+				return err
+			}
+		}
+	} else {
+		if _, err := io.CopyN(w, i.Content, i.Content.Size()); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (i Inode) dataSize() (int64, error) {
+	if i.Attrs&AttrDirectory != 0 {
+		var size int64
+		if i.parent != nil {
+			// Dot and dotdot directories
+			size += 2 * 32
+		} else {
+			// Volume ID
+			size += 1 * 32
+		}
+		for _, c := range i.Children {
+			cs, err := c.metaSize()
+			if err != nil {
+				return 0, err
+			}
+			size += cs
+		}
+		if size > 2*1024*1024 {
+			return 0, errors.New("directory contains > 2MiB of metadata which is prohibited in FAT32")
+		}
+		return size, nil
+	} else {
+		return i.Content.Size(), nil
+	}
+}
+
+func (i *Inode) PlaceFile(path string, reader SizedReader) error {
+	pathParts := strings.Split(path, "/")
+	inodeRef := i
+	for j, part := range pathParts {
+		var childExists bool
+		for _, child := range inodeRef.Children {
+			if strings.ToLower(child.Name) == strings.ToLower(part) {
+				inodeRef = child
+				childExists = true
+				break
+			}
+		}
+		if j == len(pathParts)-1 { // Is last path part (i.e. file name)
+			if childExists {
+				return &fs.PathError{Path: path, Err: fs.ErrExist, Op: "create"}
+			}
+			newInode := &Inode{
+				Name:    part,
+				Content: reader,
+			}
+			inodeRef.Children = append(inodeRef.Children, newInode)
+			return nil
+		} else if !childExists {
+			newInode := &Inode{
+				Name:  part,
+				Attrs: AttrDirectory,
+			}
+			inodeRef.Children = append(inodeRef.Children, newInode)
+			inodeRef = newInode
+		}
+	}
+	panic("unreachable")
+}
+
+type planningState struct {
+	// List of inodes in filesystem layout order
+	orderedInodes []*Inode
+	// File Allocation Table
+	fat []uint32
+	// Size of a single cluster in the FAT in bytes
+	clusterSize int64
+}
+
+// Allocates clusters capable of holding at least b bytes and returns the
+// starting cluster index
+func (p *planningState) allocBytes(b int64) int {
+	// Zero-byte data entries are located at the cluster zero by definition
+	// No actual allocation is performed
+	if b == 0 {
+		return 0
+	}
+	// Calculate the number of clusters to be allocated
+	n := (b + p.clusterSize - 1) / p.clusterSize
+	allocStartCluster := len(p.fat)
+	for i := int64(0); i < n-1; i++ {
+		p.fat = append(p.fat, uint32(len(p.fat)+1))
+	}
+	p.fat = append(p.fat, fatEOF)
+	return allocStartCluster
+}
+
+func (i *Inode) placeRecursively(p *planningState) error {
+	selfDataSize, err := i.dataSize()
+	if err != nil {
+		return fmt.Errorf("%s: %w", i.Name, err)
+	}
+	i.startCluster = p.allocBytes(selfDataSize)
+	p.orderedInodes = append(p.orderedInodes, i)
+	for _, c := range i.Children {
+		c.parent = i
+		err = c.placeRecursively(p)
+		if err != nil {
+			return fmt.Errorf("%s/%w", i.Name, err)
+		}
+	}
+	return nil
+}
+
+// WriteFS writes a filesystem described by a root inode and its children to a
+// given io.Writer.
+func WriteFS(w io.Writer, rootInode Inode, opts Options) error {
+	if opts.BlockSize == 0 {
+		opts.BlockSize = 512
+	}
+	if bits.OnesCount16(opts.BlockSize) != 1 {
+		return fmt.Errorf("option BlockSize is not a power of two")
+	}
+	if opts.BlockSize < 512 {
+		return fmt.Errorf("option BlockSize must be at least 512 bytes")
+	}
+	if opts.ID == 0 {
+		var buf [4]byte
+		if _, err := rand.Read(buf[:]); err != nil {
+			return fmt.Errorf("failed to assign random FAT ID: %v", err)
+		}
+		opts.ID = binary.BigEndian.Uint32(buf[:])
+	}
+	if rootInode.Attrs&AttrDirectory == 0 {
+		return errors.New("root inode must be a directory (i.e. have AttrDirectory set)")
+	}
+	wb := newBlockWriter(w)
+	bs := bootSector{
+		// Assembled x86_32 machine code corresponding to
+		// jmp $
+		// nop
+		// i.e. an infinite loop doing nothing. Nothing created in the last 35
+		// years should boot this anyway.
+		// TODO(q3k): write a stub
+		JmpInstruction: [3]byte{0xEB, 0xFE, 0x90},
+		// Identification
+		OEMName: [8]byte{'M', 'O', 'N', 'O', 'G', 'O', 'N'},
+		ID:      opts.ID,
+		// Block geometry
+		BlockSize:   opts.BlockSize,
+		TotalBlocks: opts.BlockCount,
+		// BootSector block + FSInfo Block, backup copy at blocks 6 and 7
+		ReservedBlocks: 8,
+		// FSInfo block is always in block 1, right after this block
+		FSInfoBlock: 1,
+		// Start block of the backup of the boot block and FSInfo block
+		// De facto this must be 6 as it is only used when the primary
+		// boot block is damaged at which point this field can no longer be
+		// read.
+		BackupStartBlock: 6,
+		// A lot of implementations only work with 2, so use that
+		NumFATs:          2,
+		BlocksPerCluster: 1,
+		// Flags and signatures
+		MediaCode:     0xf8,
+		BootSignature: 0x29,
+		Label:         [11]byte{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '},
+		Type:          [8]byte{'F', 'A', 'T', '3', '2', ' ', ' ', ' '},
+		Signature:     [2]byte{0x55, 0xaa},
+	}
+
+	copy(bs.Label[:], opts.Label)
+
+	fs := fsinfo{
+		// Signatures
+		LeadSignature:     [4]byte{0x52, 0x52, 0x61, 0x41},
+		StructSignature:   [4]byte{0x72, 0x72, 0x41, 0x61},
+		TrailingSignature: [2]byte{0x55, 0xAA},
+
+		// This is the unset value which is always legal
+		NextFreeCluster: 0xFFFFFFFF,
+	}
+
+	p := planningState{
+		clusterSize: int64(bs.BlocksPerCluster) * int64(bs.BlockSize),
+	}
+	if opts.BlockCount != 0 {
+		// Preallocate FAT if we know how big it needs to be
+		p.fat = make([]uint32, 0, opts.BlockCount/uint32(bs.BlocksPerCluster))
+	} else {
+		// Preallocate minimum size FAT
+		// See the spec page 15 for the origin of this calculation.
+		p.fat = make([]uint32, 0, 65525+2)
+	}
+	// First two clusters are special
+	p.fat = append(p.fat, 0x0fffff00|uint32(bs.MediaCode), 0x0fffffff)
+	err := rootInode.placeRecursively(&p)
+	if err != nil {
+		return err
+	}
+
+	allocClusters := len(p.fat)
+	if allocClusters >= fatMask&math.MaxUint32 {
+		return fmt.Errorf("filesystem contains more than 2^28 FAT entries, this is unsupported. Note that this package currently always creates minimal clusters.")
+	}
+
+	// Fill out FAT to minimum size for FAT32
+	for len(p.fat) < 65525+2 {
+		p.fat = append(p.fat, fatFree)
+	}
+
+	bs.RootClusterNumber = uint32(rootInode.startCluster)
+
+	bs.BlocksPerFAT = uint32(binary.Size(p.fat)+int(opts.BlockSize)-1) / uint32(opts.BlockSize)
+	occupiedBlocks := uint32(bs.ReservedBlocks) + (uint32(len(p.fat)-2) * uint32(bs.BlocksPerCluster)) + bs.BlocksPerFAT*uint32(bs.NumFATs)
+	if bs.TotalBlocks == 0 {
+		bs.TotalBlocks = occupiedBlocks
+	} else if bs.TotalBlocks < occupiedBlocks {
+		return fmt.Errorf("content (minimum %d blocks) would exceed number of blocks specified (%d blocks)", occupiedBlocks, bs.TotalBlocks)
+	} else { // Fixed-size file system with enough space
+		blocksToDistribute := bs.TotalBlocks - uint32(bs.ReservedBlocks)
+		// Number of data blocks which can be described by one metadata/FAT
+		// block. Always an integer because 4 (bytes per uint32) is a divisor of
+		// all powers of two equal or bigger than 8 and FAT32 requires a minimum
+		// of 512.
+		dataBlocksPerFATBlock := (uint32(bs.BlocksPerCluster) * uint32(bs.BlockSize)) / (uint32(binary.Size(p.fat[0])))
+		// Split blocksToDistribute between metadata and data so that exactly as
+		// much metadata (FAT) exists for describing the amount of data blocks
+		// while respecting alignment.
+		divisor := dataBlocksPerFATBlock + uint32(bs.NumFATs)
+		// 2*blocksPerCluster compensates for the first two "magic" FAT entries
+		// which do not have corresponding data.
+		bs.BlocksPerFAT = (bs.TotalBlocks + 2*uint32(bs.BlocksPerCluster) + (divisor - 1)) / divisor
+		dataBlocks := blocksToDistribute - (uint32(bs.NumFATs) * bs.BlocksPerFAT)
+		// Align to full clusters
+		dataBlocks -= dataBlocks % uint32(bs.BlocksPerCluster)
+		// Magic +2 as the first two entries do not describe data
+		for len(p.fat) < (int(dataBlocks)/int(bs.BlocksPerCluster))+2 {
+			p.fat = append(p.fat, fatFree)
+		}
+	}
+	fs.FreeCount = uint32(len(p.fat) - allocClusters)
+	if fs.FreeCount > 1 {
+		fs.NextFreeCluster = uint32(allocClusters) + 1
+	}
+
+	// Write superblock
+	if err := binary.Write(wb, binary.LittleEndian, bs); err != nil {
+		return err
+	}
+	if err := wb.FinishBlock(int64(opts.BlockSize), true); err != nil {
+		return err
+	}
+	if err := binary.Write(wb, binary.LittleEndian, fs); err != nil {
+		return err
+	}
+	if err := wb.FinishBlock(int64(opts.BlockSize), true); err != nil {
+		return err
+	}
+
+	block := make([]byte, opts.BlockSize)
+	for i := 0; i < 4; i++ {
+		if _, err := wb.Write(block); err != nil {
+			return err
+		}
+	}
+	// Backup of superblock at block 6
+	if err := binary.Write(wb, binary.LittleEndian, bs); err != nil {
+		return err
+	}
+	if err := wb.FinishBlock(int64(opts.BlockSize), true); err != nil {
+		return err
+	}
+	if err := binary.Write(wb, binary.LittleEndian, fs); err != nil {
+		return err
+	}
+	if err := wb.FinishBlock(int64(opts.BlockSize), true); err != nil {
+		return err
+	}
+
+	for i := uint8(0); i < bs.NumFATs; i++ {
+		if err := binary.Write(wb, binary.LittleEndian, p.fat); err != nil {
+			return err
+		}
+		if err := wb.FinishBlock(int64(opts.BlockSize), true); err != nil {
+			return err
+		}
+	}
+
+	for _, i := range p.orderedInodes {
+		if err := i.writeData(wb, bs.Label); err != nil {
+			return fmt.Errorf("failed to write inode %q: %v", i.Name, err)
+		}
+		if err := wb.FinishBlock(int64(opts.BlockSize)*int64(bs.BlocksPerCluster), false); err != nil {
+			return err
+		}
+	}
+	// Creatively use block writer to write out all empty data at the end
+	if err := wb.FinishBlock(int64(opts.BlockSize)*int64(bs.TotalBlocks), false); err != nil {
+		return err
+	}
+	return nil
+}
diff --git a/metropolis/pkg/fat32/fsck_test.go b/metropolis/pkg/fat32/fsck_test.go
new file mode 100644
index 0000000..687504a
--- /dev/null
+++ b/metropolis/pkg/fat32/fsck_test.go
@@ -0,0 +1,103 @@
+package fat32
+
+import (
+	"fmt"
+	"os"
+	"os/exec"
+	"strings"
+	"testing"
+	"time"
+
+	"source.monogon.dev/metropolis/cli/pkg/datafile"
+)
+
+func testWithFsck(t *testing.T, rootInode Inode, opts Options) {
+	t.Helper()
+	fsckPath, err := datafile.ResolveRunfile("external/com_github_dosfstools_dosfstools/fsck")
+	if err != nil {
+		t.Fatalf("unable to get path to fsck: %v", err)
+	}
+	testFile, err := os.CreateTemp("", "fat32-fsck-test")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer os.Remove(testFile.Name())
+	if err := WriteFS(testFile, rootInode, opts); err != nil {
+		t.Fatalf("failed to write test FS: %v", err)
+	}
+	// Run fsck non-interactively (-n), disallow spaces in short file names (-S)
+	// as well as perform deep verification (-V)
+	// If the file system is OK (i.e. fsck does not want to fix it) it returns
+	// 0, otherwise 1.
+	fsckCmd := exec.Command(fsckPath, "-n", "-S", "-V", testFile.Name())
+	result, err := fsckCmd.CombinedOutput()
+	if err != nil {
+		t.Errorf("fsck failed: %v", string(result))
+	}
+}
+
+func TestBasicFsck(t *testing.T) {
+	if os.Getenv("IN_KTEST") == "true" {
+		t.Skip("In ktest")
+	}
+	var largeString strings.Builder
+	for i := 0; i < 16384; i++ {
+		fmt.Fprintf(&largeString, "part%d", i)
+	}
+	// Test both common block sizes (512 and 4096 bytes) as well as the largest
+	// supported one (32K)
+	for _, blockSize := range []uint16{512, 4096, 32768} {
+		for _, fixed := range []string{"", "Fixed"} {
+			t.Run(fmt.Sprintf("BlockSize%d%v", blockSize, fixed), func(t *testing.T) {
+				rootInode := Inode{
+					Attrs:      AttrDirectory,
+					ModTime:    time.Date(2022, 03, 04, 5, 6, 7, 8, time.UTC),
+					CreateTime: time.Date(2022, 03, 04, 5, 6, 7, 8, time.UTC),
+				}
+				files := []struct {
+					name    string
+					path    string
+					content string
+				}{
+					{"FileInRoot", "test1.txt", "test1 content"},
+					{"LongFileInRoot", "verylongtest1.txt", "test1 content long"},
+					{"LongPath", "test1/test2/test3/test4/longdirname.ext/hello", "long path test content"},
+					{"LargeFile", "test1/largefile.txt", largeString.String()},
+				}
+				for _, c := range files {
+					err := rootInode.PlaceFile(c.path, strings.NewReader(c.content))
+					if err != nil {
+						t.Errorf("failed to place file: %v", err)
+					}
+				}
+				opts := Options{ID: 1234, Label: "TEST", BlockSize: blockSize}
+				if fixed == "Fixed" {
+					// Use a block count that is slightly higher than the minimum
+					opts.BlockCount = 67000
+				}
+				testWithFsck(t, rootInode, opts)
+			})
+		}
+	}
+}
+
+func TestLotsOfFilesFsck(t *testing.T) {
+	if os.Getenv("IN_KTEST") == "true" {
+		t.Skip("In ktest")
+	}
+	rootInode := Inode{
+		Attrs:   AttrDirectory,
+		ModTime: time.Date(2022, 03, 04, 5, 6, 7, 8, time.UTC),
+	}
+	for i := 0; i < (32*1024)-2; i++ {
+		rootInode.Children = append(rootInode.Children, &Inode{
+			Name:    fmt.Sprintf("test%d", i),
+			Content: strings.NewReader("random test content"),
+			// Add some random attributes
+			Attrs: AttrHidden | AttrSystem,
+			// And a random ModTime
+			ModTime: time.Date(2022, 03, 04, 5, 6, 7, 8, time.UTC),
+		})
+	}
+	testWithFsck(t, rootInode, Options{ID: 1234, Label: "TEST"})
+}
diff --git a/metropolis/pkg/fat32/linux_test.go b/metropolis/pkg/fat32/linux_test.go
new file mode 100644
index 0000000..ca62b76
--- /dev/null
+++ b/metropolis/pkg/fat32/linux_test.go
@@ -0,0 +1,281 @@
+package fat32
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"math/rand"
+	"os"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"golang.org/x/mod/semver"
+	"golang.org/x/sys/unix"
+)
+
+func TestKernelInterop(t *testing.T) {
+	if os.Getenv("IN_KTEST") != "true" {
+		t.Skip("Not in ktest")
+	}
+
+	// ONCHANGE(//third_party/linux): Drop this once we move to a Kernel version
+	// newer than 5.19 which will have FAT btime support.
+	kernelVersion, err := os.ReadFile("/proc/sys/kernel/osrelease")
+	if err != nil {
+		t.Fatalf("unable to determine kernel version: %v", err)
+	}
+	haveBtime := semver.Compare("v"+string(kernelVersion), "v5.19.0") >= 0
+
+	type testCase struct {
+		name     string
+		setup    func(root *Inode) error
+		validate func(t *testing.T) error
+	}
+
+	// Random timestamp in UTC, divisible by 10ms
+	testTimestamp1 := time.Date(2022, 03, 04, 5, 6, 7, 10, time.UTC)
+	// Random timestamp in UTC, divisible by 2s
+	testTimestamp2 := time.Date(2022, 03, 04, 5, 6, 8, 0, time.UTC)
+	// Random timestamp in UTC, divisible by 10ms
+	testTimestamp3 := time.Date(2052, 03, 02, 5, 6, 7, 10, time.UTC)
+	// Random timestamp in UTC, divisible by 2s
+	testTimestamp4 := time.Date(2052, 10, 04, 5, 3, 4, 0, time.UTC)
+
+	testContent1 := "testcontent1"
+
+	tests := []testCase{
+		{
+			name: "SimpleFolder",
+			setup: func(root *Inode) error {
+				root.Children = []*Inode{{
+					Name:       "testdir",
+					Attrs:      AttrDirectory,
+					CreateTime: testTimestamp1,
+					ModTime:    testTimestamp2,
+				}}
+				return nil
+			},
+			validate: func(t *testing.T) error {
+				var stat unix.Statx_t
+				if err := unix.Statx(0, "/dut/testdir", 0, unix.STATX_TYPE|unix.STATX_MTIME|unix.STATX_BTIME, &stat); err != nil {
+					availableFiles, err := os.ReadDir("/dut")
+					var availableFileNames []string
+					for _, f := range availableFiles {
+						availableFileNames = append(availableFileNames, f.Name())
+					}
+					if err != nil {
+						t.Fatalf("Failed to list filesystem root directory: %v", err)
+					}
+					t.Fatalf("Failed to stat output: %v (available: %v)", err, strings.Join(availableFileNames, ", "))
+				}
+				if stat.Mode&unix.S_IFDIR == 0 {
+					t.Errorf("testdir is expected to be a directory, but has mode %v", stat.Mode)
+				}
+				btime := time.Unix(stat.Btime.Sec, int64(stat.Btime.Nsec))
+				if !btime.Equal(testTimestamp1) && haveBtime {
+					t.Errorf("testdir btime expected %v, got %v", testTimestamp1, btime)
+				}
+				mtime := time.Unix(stat.Mtime.Sec, int64(stat.Mtime.Nsec))
+				if !mtime.Equal(testTimestamp2) {
+					t.Errorf("testdir mtime expected %v, got %v", testTimestamp2, mtime)
+				}
+				return nil
+			},
+		},
+		{
+			name: "SimpleFile",
+			setup: func(root *Inode) error {
+				root.Children = []*Inode{{
+					Name:       "testfile",
+					CreateTime: testTimestamp3,
+					ModTime:    testTimestamp4,
+					Content:    strings.NewReader(testContent1),
+				}}
+				return nil
+			},
+			validate: func(t *testing.T) error {
+				var stat unix.Statx_t
+				if err := unix.Statx(0, "/dut/testfile", 0, unix.STATX_TYPE|unix.STATX_MTIME|unix.STATX_BTIME, &stat); err != nil {
+					t.Fatalf("failed to stat output: %v", err)
+				}
+				if stat.Mode&unix.S_IFREG == 0 {
+					t.Errorf("testfile is expected to be a file, but has mode %v", stat.Mode)
+				}
+				btime := time.Unix(stat.Btime.Sec, int64(stat.Btime.Nsec))
+				if !btime.Equal(testTimestamp3) && haveBtime {
+					t.Errorf("testfile ctime expected %v, got %v", testTimestamp3, btime)
+				}
+				mtime := time.Unix(stat.Mtime.Sec, int64(stat.Mtime.Nsec))
+				if !mtime.Equal(testTimestamp4) {
+					t.Errorf("testfile mtime expected %v, got %v", testTimestamp3, mtime)
+				}
+				contents, err := os.ReadFile("/dut/testfile")
+				if err != nil {
+					t.Fatalf("failed to read back test file: %v", err)
+				}
+				if string(contents) != testContent1 {
+					t.Errorf("testfile contains %x, got %x", contents, []byte(testContent1))
+				}
+				return nil
+			},
+		},
+		{
+			name: "FolderHierarchy",
+			setup: func(i *Inode) error {
+				i.Children = []*Inode{{
+					Name:       "l1",
+					Attrs:      AttrDirectory,
+					CreateTime: testTimestamp1,
+					ModTime:    testTimestamp2,
+					Children: []*Inode{{
+						Name:       "l2",
+						Attrs:      AttrDirectory,
+						CreateTime: testTimestamp1,
+						ModTime:    testTimestamp2,
+					}},
+				}}
+				return nil
+			},
+			validate: func(t *testing.T) error {
+				dirInfo, err := os.ReadDir("/dut/l1")
+				if err != nil {
+					t.Fatalf("Failed to read top-level directory: %v", err)
+				}
+				require.Len(t, dirInfo, 1, "more subdirs than expected")
+				require.Equal(t, "l2", dirInfo[0].Name(), "unexpected subdir")
+				require.True(t, dirInfo[0].IsDir(), "l1 not a directory")
+				subdirInfo, err := os.ReadDir("/dut/l1/l2")
+				assert.NoError(t, err, "cannot read empty subdir")
+				require.Len(t, subdirInfo, 0, "unexpected subdirs in empty directory")
+				return nil
+			},
+		},
+		{
+			name: "LargeFile",
+			setup: func(i *Inode) error {
+				content := make([]byte, 6500)
+				io.ReadFull(rand.New(rand.NewSource(1)), content)
+				i.Children = []*Inode{{
+					Name:    "test.bin",
+					Content: bytes.NewReader(content),
+				}}
+				return nil
+			},
+			validate: func(t *testing.T) error {
+				var stat unix.Stat_t
+				err := unix.Stat("/dut/test.bin", &stat)
+				assert.NoError(t, err, "failed to stat file")
+				require.EqualValues(t, 6500, stat.Size, "wrong size")
+				file, err := os.Open("/dut/test.bin")
+				assert.NoError(t, err, "failed to open test file")
+				defer file.Close()
+				r := io.LimitReader(rand.New(rand.NewSource(1)), 6500) // Random but deterministic data
+				expected, _ := io.ReadAll(r)
+				actual, err := io.ReadAll(file)
+				assert.NoError(t, err, "failed to read test file")
+				assert.Equal(t, expected, actual, "content not identical")
+				return nil
+			},
+		},
+		{
+			name: "Unicode",
+			setup: func(i *Inode) error {
+				i.Children = []*Inode{{
+					Name:    "✨😂", // Really exercise that UTF-16 conversion
+					Content: strings.NewReader("😂"),
+				}}
+				return nil
+			},
+			validate: func(t *testing.T) error {
+				file, err := os.Open("/dut/✨😂")
+				if err != nil {
+					availableFiles, err := os.ReadDir("/dut")
+					var availableFileNames []string
+					for _, f := range availableFiles {
+						availableFileNames = append(availableFileNames, f.Name())
+					}
+					if err != nil {
+						t.Fatalf("Failed to list filesystem root directory: %v", err)
+					}
+					t.Fatalf("Failed to open unicode file: %v (available files: %v)", err, strings.Join(availableFileNames, ", "))
+				}
+				defer file.Close()
+				contents, err := io.ReadAll(file)
+				if err != nil {
+					t.Errorf("Wrong content: expected %x, got %x", []byte("😂"), contents)
+				}
+				return nil
+			},
+		},
+		{
+			name: "MultipleMetaClusters",
+			setup: func(root *Inode) error {
+				// Only test up to 2048 files as Linux gets VERY slow if going
+				// up to the maximum of approximately 32K
+				for i := 0; i < 2048; i++ {
+					root.Children = append(root.Children, &Inode{
+						Name:    fmt.Sprintf("verylongtestfilename%d", i),
+						Content: strings.NewReader("random test content"),
+					})
+				}
+				return nil
+			},
+			validate: func(t *testing.T) error {
+				files, err := os.ReadDir("/dut")
+				if err != nil {
+					t.Errorf("failed to list directory: %v", err)
+				}
+				if len(files) != 2048 {
+					t.Errorf("wrong number of files: expected %d, got %d", 2048, len(files))
+				}
+				return nil
+			},
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			file, err := os.OpenFile("/dev/ram0", os.O_WRONLY|os.O_TRUNC, 0644)
+			if err != nil {
+				t.Fatalf("failed to create test image: %v", err)
+			}
+			size, err := unix.IoctlGetInt(int(file.Fd()), unix.BLKGETSIZE64)
+			if err != nil {
+				t.Fatalf("failed to get ramdisk size: %v", err)
+			}
+			blockSize, err := unix.IoctlGetInt(int(file.Fd()), unix.BLKBSZGET)
+			if err != nil {
+				t.Fatalf("failed to get ramdisk block size: %v", err)
+			}
+			defer file.Close()
+			rootInode := Inode{
+				Attrs: AttrDirectory,
+			}
+			if err := test.setup(&rootInode); err != nil {
+				t.Fatalf("setup failed: %v", err)
+			}
+			if err := WriteFS(file, rootInode, Options{
+				ID:         1234,
+				Label:      "KTEST",
+				BlockSize:  uint16(blockSize),
+				BlockCount: uint32(size / blockSize),
+			}); err != nil {
+				t.Fatalf("failed to write fileystem: %v", err)
+			}
+			_ = file.Close()
+			if err := os.MkdirAll("/dut", 0755); err != nil {
+				t.Error(err)
+			}
+			// TODO(lorenz): Set CONFIG_FAT_DEFAULT_UTF8 for Monogon Kernel
+			if err := unix.Mount("/dev/ram0", "/dut", "vfat", unix.MS_NOEXEC|unix.MS_NODEV, "utf8=1"); err != nil {
+				t.Fatal(err)
+			}
+			defer unix.Unmount("/dut", 0)
+			test.validate(t)
+		})
+
+	}
+}
diff --git a/metropolis/pkg/fat32/structs.go b/metropolis/pkg/fat32/structs.go
new file mode 100644
index 0000000..396361d
--- /dev/null
+++ b/metropolis/pkg/fat32/structs.go
@@ -0,0 +1,133 @@
+package fat32
+
+const (
+	// FAT32 entries are only 28 bits
+	fatMask = 0x0fffffff
+	// Free entries are 0
+	fatFree = 0x0
+	// Entry at the end of a cluster chain
+	fatEOF = 0x0ffffff8
+)
+
+// FAT32 Boot Sector and BIOS Parameter Block. This structure is 512 bytes long,
+// even if the logical block size is longer. The rest should be filled up with
+// zeroes.
+type bootSector struct {
+	// Jump instruction to boot code.
+	JmpInstruction [3]byte
+	// Creator name. "MSWIN4.1" recommended for compatibility.
+	OEMName [8]byte
+	// Count of bytes per block (i.e. logical block size)
+	// Must be one of 512, 1024, 2048 or 4096
+	BlockSize uint16
+	// Number of blocks per allocation unit (cluster).
+	// Must be a power of 2 that is greater than 0.
+	BlocksPerCluster uint8
+	// Number of reserved blocks in the reserved region of the volume starting
+	// at the first block of the volume. This field must not be 0.
+	ReservedBlocks uint16
+	// The count of FAT data structures on the volume. This field should always
+	// contain the value of 2 for any FAT volume of any type.
+	NumFATs uint8
+	_       [4]byte
+	// Legacy value for media determination, must be 0xf8.
+	MediaCode uint8
+	_         [2]byte
+	// Number of sectors per track for 0x13 interrupts.
+	SectorsPerTrack uint16
+	// Number of heads for 0x13 interrupts.
+	NumHeads uint16
+	// Count of hidden blocks preceding the partition that contains this FAT
+	// volume.
+	HiddenBlocks uint32
+	// Total count of blocks on the volume.
+	TotalBlocks uint32
+	// Count of blocks per FAT.
+	BlocksPerFAT uint32
+	// Flags for FAT32
+	Flags uint16
+	_     [2]byte
+	// Cluster number of the first cluster of the root directory. Usually 2.
+	RootClusterNumber uint32
+	// Block number of the FSINFO structure in the reserved area.
+	FSInfoBlock uint16
+	// Block number of the copy of the boot record in the reserved area.
+	BackupStartBlock uint16
+	_                [12]byte
+	// Drive number for 0x13 interrupts.
+	DriveNumber   uint8
+	_             [1]byte
+	BootSignature uint8
+	// ID of this filesystem
+	ID uint32
+	// Human-readable label of this filesystem, padded with spaces (0x20)
+	Label [11]byte
+	// Always set to ASCII "FAT32    "
+	Type [8]byte
+	_    [420]byte
+	// Always 0x55, 0xAA
+	Signature [2]byte
+}
+
+// Special block (usually at block 1) containing additional metadata,
+// specifically the number of free clusters and the next free cluster.
+// Always 512 bytes, rest of the block should be padded with zeroes.
+type fsinfo struct {
+	// Validates that this is an FSINFO block. Always 0x52, 0x52, 0x61, 0x41
+	LeadSignature [4]byte
+	_             [480]byte
+	// Another signature. Always 0x72, 0x72, 0x41, 0x61
+	StructSignature [4]byte
+	// Last known number of free clusters on the volume.
+	FreeCount uint32
+	// Next free cluster hint. All 1's is interpreted as undefined.
+	NextFreeCluster uint32
+	_               [14]byte
+	// One more signature. Always 0x55, 0xAA.
+	TrailingSignature [2]byte
+}
+
+// Directory entry
+type dirEntry struct {
+	// DOS 8.3 file name.
+	DOSName [11]byte
+	// Attribtes of the file or directory, 0x0f reserved to mark entry as a
+	// LFN entry (see lfnEntry below)
+	Attributes        uint8
+	_                 byte
+	CreationTenMilli  uint8 // Actually 10ms units, 0-199 range
+	CreationTime      uint16
+	CreationDate      uint16
+	_                 [2]byte
+	FirstClusterHigh  uint16
+	LastWrittenToTime uint16
+	LastWrittenToDate uint16
+	FirstClusterLow   uint16
+	FileSize          uint32
+}
+
+const (
+	// lastSequenceNumberFlag is logically-ORed with the sequence number of the
+	// last Long File Name entry to mark it as such.
+	lastSequenceNumberFlag = 0x40
+	// codepointsPerEntry is the number of UTF-16 codepoints that fit into a
+	// single Long File Name entry.
+	codepointsPerEntry = 5 + 6 + 2
+)
+
+// VFAT long file name prepended entry
+type lfnEntry struct {
+	SequenceNumber uint8
+	// First 5 UTF-16 code units
+	NamePart1 [5]uint16
+	// Attributes (must be 0x0f)
+	Attributes uint8
+	_          byte
+	// Checksum of the 8.3 name.
+	Checksum uint8
+	// Next 6 UTF-16 code units
+	NamePart2 [6]uint16
+	_         [2]byte
+	// Next 2 UTF-16 code units
+	NamePart3 [2]uint16
+}
diff --git a/metropolis/pkg/fat32/structs_test.go b/metropolis/pkg/fat32/structs_test.go
new file mode 100644
index 0000000..77a7df0
--- /dev/null
+++ b/metropolis/pkg/fat32/structs_test.go
@@ -0,0 +1,27 @@
+package fat32
+
+import (
+	"encoding/binary"
+	"reflect"
+	"testing"
+)
+
+func TestStructureSizes(t *testing.T) {
+	cases := []struct {
+		StructInstance interface{}
+		ExpectedSize   int
+	}{
+		{bootSector{}, 512},
+		{fsinfo{}, 512},
+		{dirEntry{}, 32},
+		{lfnEntry{}, 32},
+	}
+	for _, c := range cases {
+		t.Run(reflect.TypeOf(c.StructInstance).String(), func(t *testing.T) {
+			actualSize := binary.Size(c.StructInstance)
+			if actualSize != c.ExpectedSize {
+				t.Errorf("Expected %d bytes, got %d", c.ExpectedSize, actualSize)
+			}
+		})
+	}
+}
diff --git a/metropolis/pkg/fat32/utils.go b/metropolis/pkg/fat32/utils.go
new file mode 100644
index 0000000..833665c
--- /dev/null
+++ b/metropolis/pkg/fat32/utils.go
@@ -0,0 +1,74 @@
+package fat32
+
+import (
+	"fmt"
+	"io"
+	"time"
+)
+
+// Wraps a writer and provides support for writing padding up to a specified
+// alignment.
+// TODO(lorenz): Implement WriterTo when w implements it to allow for copy
+// offload
+type blockWriter struct {
+	w io.Writer
+	n int64
+}
+
+func newBlockWriter(w io.Writer) *blockWriter {
+	return &blockWriter{w: w}
+}
+
+func (b *blockWriter) Write(p []byte) (n int, err error) {
+	n, err = b.w.Write(p)
+	b.n += int64(n)
+	return
+}
+
+func (b *blockWriter) FinishBlock(alignment int64, mustZero bool) (err error) {
+	requiredBytes := (alignment - (b.n % alignment)) % alignment
+	if requiredBytes == 0 {
+		return nil
+	}
+	// Do not actually write out zeroes if not necessary
+	if s, ok := b.w.(io.Seeker); ok && !mustZero {
+		if _, err := s.Seek(requiredBytes-1, io.SeekCurrent); err != nil {
+			return fmt.Errorf("failed to seek to create hole for empty block: %w", err)
+		}
+		if _, err := b.w.Write([]byte{0x00}); err != nil {
+			return fmt.Errorf("failed to write last byte to create hole: %w", err)
+		}
+		b.n += requiredBytes
+		return
+	}
+	emptyBuf := make([]byte, 1*1024*1024)
+	for requiredBytes > 0 {
+		curBlockBytes := requiredBytes
+		if curBlockBytes > int64(len(emptyBuf)) {
+			curBlockBytes = int64(len(emptyBuf))
+		}
+		_, err = b.Write(emptyBuf[:curBlockBytes])
+		if err != nil {
+			return
+		}
+		requiredBytes -= curBlockBytes
+	}
+	return
+}
+
+// timeToMsDosTime converts a time.Time to an MS-DOS date and time.
+// The resolution is 2s with fTime and 10ms if fTenMils is also used.
+// See: http://msdn.microsoft.com/en-us/library/ms724274(v=VS.85).aspx
+func timeToMsDosTime(t time.Time) (fDate uint16, fTime uint16, fTenMils uint8) {
+	t = t.In(time.UTC)
+	if t.Year() < 1980 {
+		t = time.Date(1980, 1, 1, 0, 0, 0, 0, time.UTC)
+	}
+	if t.Year() > 2107 {
+		t = time.Date(2107, 12, 31, 23, 59, 59, 0, time.UTC)
+	}
+	fDate = uint16(t.Day() + int(t.Month())<<5 + (t.Year()-1980)<<9)
+	fTime = uint16(t.Second()/2 + t.Minute()<<5 + t.Hour()<<11)
+	fTenMils = uint8(t.Nanosecond()/1e7 + (t.Second()%2)*100)
+	return
+}