blob: fae87ec26a6c04a58bd531abe790d59753447f8c [file] [log] [blame]
Tim Windelschmidt6d33a432025-02-04 14:34:25 +01001// Copyright The Monogon Project Authors.
Lorenz Brun378a4452021-01-26 13:47:41 +01002// SPDX-License-Identifier: Apache-2.0
Lorenz Brun378a4452021-01-26 13:47:41 +01003
4package erofs
5
6import (
7 "encoding/binary"
8 "errors"
9 "fmt"
10 "io"
11 "path"
12
13 "golang.org/x/sys/unix"
14)
15
16// Writer writes a new EROFS filesystem.
17type Writer struct {
18 w io.WriteSeeker
Serge Bazanski216fe7b2021-05-21 18:36:16 +020019 // fixDirectoryEntry contains for each referenced path where it is
20 // referenced from. Since self-references are required anyways (for the "."
21 // and ".." entries) we let the user write files in any order and just
22 // point the directory entries to the right target nid and file type on
23 // Close().
Lorenz Brun378a4452021-01-26 13:47:41 +010024 fixDirectoryEntry map[string][]direntFixupLocation
25 pathInodeMeta map[string]*uncompressedInodeMeta
Serge Bazanski216fe7b2021-05-21 18:36:16 +020026 // legacyInodeIndex stores the next legacy (32-bit) inode to be allocated.
27 // 64 bit inodes are automatically calculated by EROFS on mount.
Lorenz Brun378a4452021-01-26 13:47:41 +010028 legacyInodeIndex uint32
29 blockAllocatorIndex uint32
30 metadataBlocksFree metadataBlocksMeta
31}
32
Serge Bazanski216fe7b2021-05-21 18:36:16 +020033// NewWriter creates a new EROFS filesystem writer. The given WriteSeeker needs
34// to be at the start.
Lorenz Brun378a4452021-01-26 13:47:41 +010035func NewWriter(w io.WriteSeeker) (*Writer, error) {
36 erofsWriter := &Writer{
37 w: w,
38 fixDirectoryEntry: make(map[string][]direntFixupLocation),
39 pathInodeMeta: make(map[string]*uncompressedInodeMeta),
40 }
41 _, err := erofsWriter.allocateMetadata(1024+binary.Size(&superblock{}), 0)
42 if err != nil {
43 return nil, fmt.Errorf("cannot allocate first metadata block: %w", err)
44 }
45 if _, err := erofsWriter.w.Write(make([]byte, 1024)); err != nil { // Padding
46 return nil, fmt.Errorf("failed to write initial padding: %w", err)
47 }
48 if err := binary.Write(erofsWriter.w, binary.LittleEndian, &superblock{
Serge Bazanski216fe7b2021-05-21 18:36:16 +020049 Magic: Magic,
50 BlockSizeBits: blockSizeBits,
51 // 1024 (padding) + 128 (superblock) / 32, not eligible for fixup as
52 // different int size
53 RootNodeNumber: 36,
Lorenz Brun378a4452021-01-26 13:47:41 +010054 }); err != nil {
55 return nil, fmt.Errorf("failed to write superblock: %w", err)
56 }
57 return erofsWriter, nil
58}
59
Serge Bazanski216fe7b2021-05-21 18:36:16 +020060// allocateMetadata allocates metadata space of size bytes with a given
61// alignment and seeks to the first byte of the newly-allocated metadata space.
62// It also returns the position of that first byte.
Lorenz Brun378a4452021-01-26 13:47:41 +010063func (w *Writer) allocateMetadata(size int, alignment uint16) (int64, error) {
64 if size > BlockSize {
65 panic("cannot allocate a metadata object bigger than BlockSize bytes")
66 }
67 sizeU16 := uint16(size)
Lorenz Brun8ae96a42024-04-17 12:46:32 +000068 pos, ok := w.metadataBlocksFree.findBlock(sizeU16, alignment)
Lorenz Brun378a4452021-01-26 13:47:41 +010069 if !ok {
70 blockNumber, err := w.allocateBlocks(1)
71 if err != nil {
72 return 0, fmt.Errorf("failed to allocate additional metadata space: %w", err)
73 }
74 w.metadataBlocksFree = append(w.metadataBlocksFree, metadataBlockMeta{blockNumber: blockNumber, freeBytes: BlockSize - sizeU16})
75 if _, err := w.w.Write(make([]byte, BlockSize)); err != nil {
76 return 0, fmt.Errorf("failed to write metadata: %w", err)
77 }
78 pos = int64(blockNumber) * BlockSize // Always aligned to BlockSize, bigger alignments are unsupported anyways
79 }
80 if _, err := w.w.Seek(pos, io.SeekStart); err != nil {
81 return 0, fmt.Errorf("cannot seek to existing metadata nid, likely misaligned meta write")
82 }
83 return pos, nil
84}
85
Serge Bazanski216fe7b2021-05-21 18:36:16 +020086// allocateBlocks allocates n new BlockSize-sized block and seeks to the
87// beginning of the first newly-allocated block. It also returns the first
88// newly-allocated block number. The caller is expected to write these blocks
89// completely before calling allocateBlocks again.
Lorenz Brun378a4452021-01-26 13:47:41 +010090func (w *Writer) allocateBlocks(n uint32) (uint32, error) {
91 if _, err := w.w.Seek(int64(w.blockAllocatorIndex)*BlockSize, io.SeekStart); err != nil {
92 return 0, fmt.Errorf("cannot seek to end of last block, check write alignment: %w", err)
93 }
94 firstBlock := w.blockAllocatorIndex
95 w.blockAllocatorIndex += n
96 return firstBlock, nil
97}
98
99func (w *Writer) create(pathname string, inode Inode) *uncompressedInodeWriter {
100 i := &uncompressedInodeWriter{
101 writer: w,
102 inode: *inode.inode(),
103 legacyInodeNumber: w.legacyInodeIndex,
104 pathname: path.Clean(pathname),
105 }
106 w.legacyInodeIndex++
107 return i
108}
109
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200110// CreateFile adds a new file to the EROFS. It returns a WriteCloser to which
111// the file contents should be written and which then needs to be closed. The
112// last writer obtained by calling CreateFile() needs to be closed first before
113// opening a new one. The given pathname needs to be referenced by a directory
114// created using Create(), otherwise it will not be accessible.
Lorenz Brun378a4452021-01-26 13:47:41 +0100115func (w *Writer) CreateFile(pathname string, meta *FileMeta) io.WriteCloser {
116 return w.create(pathname, meta)
117}
118
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200119// Create adds a new non-file inode to the EROFS. This includes directories,
120// device nodes, symlinks and FIFOs. The first call to Create() needs to be
121// with pathname "." and a directory inode. The given pathname needs to be
122// referenced by a directory, otherwise it will not be accessible (with the
123// exception of the directory ".").
Lorenz Brun378a4452021-01-26 13:47:41 +0100124func (w *Writer) Create(pathname string, inode Inode) error {
125 iw := w.create(pathname, inode)
126 switch i := inode.(type) {
127 case *Directory:
128 if err := i.writeTo(iw); err != nil {
129 return fmt.Errorf("failed to write directory contents: %w", err)
130 }
131 case *SymbolicLink:
132 if err := i.writeTo(iw); err != nil {
133 return fmt.Errorf("failed to write symbolic link contents: %w", err)
134 }
135 }
136 return iw.Close()
137}
138
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200139// Close finishes writing an EROFS filesystem. Errors by this function need to
140// be handled as they indicate if the written filesystem is consistent (i.e.
141// there are no directory entries pointing to nonexistent inodes).
Lorenz Brun378a4452021-01-26 13:47:41 +0100142func (w *Writer) Close() error {
143 for targetPath, entries := range w.fixDirectoryEntry {
144 for _, entry := range entries {
145 targetMeta, ok := w.pathInodeMeta[targetPath]
146 if !ok {
147 return fmt.Errorf("failed to link filesystem tree: dangling reference to %v", targetPath)
148 }
149 if err := direntFixup(w.pathInodeMeta[entry.path], int64(entry.entryIndex), targetMeta); err != nil {
150 return err
151 }
152 }
153 }
154 return nil
155}
156
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200157// uncompressedInodeMeta tracks enough metadata about a written inode to be
158// able to point dirents to it and to provide a WriteSeeker into the inode
159// itself.
Lorenz Brun378a4452021-01-26 13:47:41 +0100160type uncompressedInodeMeta struct {
161 nid uint64
162 ftype uint8
163
164 // Physical placement metdata
165 blockStart int64
166 blockLength int64
167 inlineStart int64
168 inlineLength int64
169
170 writer *Writer
171 currentOffset int64
172}
173
174func (a *uncompressedInodeMeta) Seek(offset int64, whence int) (int64, error) {
175 switch whence {
176 case io.SeekCurrent:
177 break
178 case io.SeekStart:
179 a.currentOffset = 0
180 case io.SeekEnd:
181 a.currentOffset = a.blockLength + a.inlineLength
182 }
183 a.currentOffset += offset
184 return a.currentOffset, nil
185}
186
187func (a *uncompressedInodeMeta) Write(p []byte) (int, error) {
188 if a.currentOffset < a.blockLength {
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200189 // TODO(lorenz): Handle the special case where a directory inode is
190 // spread across multiple blocks (depending on other factors this
191 // occurs around ~200 direct children).
Lorenz Brun378a4452021-01-26 13:47:41 +0100192 return 0, errors.New("relocating dirents in multi-block directory inodes is unimplemented")
193 }
194 if _, err := a.writer.w.Seek(a.inlineStart+a.currentOffset, io.SeekStart); err != nil {
195 return 0, err
196 }
197 a.currentOffset += int64(len(p))
198 return a.writer.w.Write(p)
199}
200
201type direntFixupLocation struct {
202 path string
203 entryIndex uint16
204}
205
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200206// direntFixup overrides nid and file type from the path the dirent is pointing
207// to. The given iw is expected to be at the start of the dirent inode to be
208// fixed up.
Lorenz Brun378a4452021-01-26 13:47:41 +0100209func direntFixup(iw io.WriteSeeker, entryIndex int64, meta *uncompressedInodeMeta) error {
210 if _, err := iw.Seek(entryIndex*12, io.SeekStart); err != nil {
211 return fmt.Errorf("failed to seek to dirent: %w", err)
212 }
213 if err := binary.Write(iw, binary.LittleEndian, meta.nid); err != nil {
214 return fmt.Errorf("failed to write nid: %w", err)
215 }
216 if _, err := iw.Seek(2, io.SeekCurrent); err != nil { // Skip NameStartOffset
217 return fmt.Errorf("failed to seek to dirent: %w", err)
218 }
219 if err := binary.Write(iw, binary.LittleEndian, meta.ftype); err != nil {
220 return fmt.Errorf("failed to write ftype: %w", err)
221 }
222 return nil
223}
224
225type metadataBlockMeta struct {
226 blockNumber uint32
227 freeBytes uint16
228}
229
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200230// metadataBlocksMeta contains metadata about all metadata blocks, most
231// importantly the amount of free bytes in each block. This is not a map for
232// reproducibility (map ordering).
Lorenz Brun378a4452021-01-26 13:47:41 +0100233type metadataBlocksMeta []metadataBlockMeta
234
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200235// findBlock returns the absolute position where `size` bytes with the
236// specified alignment can still fit. If there is not enough space in any
237// metadata block it returns false as the second return value.
Lorenz Brun378a4452021-01-26 13:47:41 +0100238func (m metadataBlocksMeta) findBlock(size uint16, alignment uint16) (int64, bool) {
239 for i, blockMeta := range m {
Lorenz Brun8ae96a42024-04-17 12:46:32 +0000240 freeBytesAligned := blockMeta.freeBytes
241 if alignment > 0 {
242 freeBytesAligned = blockMeta.freeBytes - (blockMeta.freeBytes % alignment)
243 }
Lorenz Brun378a4452021-01-26 13:47:41 +0100244 if freeBytesAligned > size {
245 m[i] = metadataBlockMeta{
246 blockNumber: blockMeta.blockNumber,
247 freeBytes: freeBytesAligned - size,
248 }
249 pos := int64(blockMeta.blockNumber+1)*BlockSize - int64(freeBytesAligned)
250 return pos, true
251 }
252 }
253 return 0, false
254}
255
256var unixModeToFTMap = map[uint16]uint8{
257 unix.S_IFREG: fileTypeRegularFile,
258 unix.S_IFDIR: fileTypeDirectory,
259 unix.S_IFCHR: fileTypeCharacterDevice,
260 unix.S_IFBLK: fileTypeBlockDevice,
261 unix.S_IFIFO: fileTypeFIFO,
262 unix.S_IFSOCK: fileTypeSocket,
263 unix.S_IFLNK: fileTypeSymbolicLink,
264}
265
266// unixModeToFT maps a Unix file type to an EROFS file type.
267func unixModeToFT(mode uint16) uint8 {
268 return unixModeToFTMap[mode&unix.S_IFMT]
269}