blob: af6ad1cf0ff273786bee5b9e615a5449e6eb843f [file] [log] [blame]
Lorenz Brun378a4452021-01-26 13:47:41 +01001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package erofs
18
19import (
20 "encoding/binary"
21 "errors"
22 "fmt"
23 "io"
24 "path"
25
26 "golang.org/x/sys/unix"
27)
28
29// Writer writes a new EROFS filesystem.
30type Writer struct {
31 w io.WriteSeeker
32 // fixDirectoryEntry contains for each referenced path where it is referenced from. Since self-references
33 // are required anyways (for the "." and ".." entries) we let the user write files in any order and just
34 // point the directory entries to the right target nid and file type on Close().
35 fixDirectoryEntry map[string][]direntFixupLocation
36 pathInodeMeta map[string]*uncompressedInodeMeta
37 // legacyInodeIndex stores the next legacy (32-bit) inode to be allocated. 64 bit inodes are automatically
38 // calculated by EROFS on mount.
39 legacyInodeIndex uint32
40 blockAllocatorIndex uint32
41 metadataBlocksFree metadataBlocksMeta
42}
43
44// NewWriter creates a new EROFS filesystem writer. The given WriteSeeker needs to be at the start.
45func NewWriter(w io.WriteSeeker) (*Writer, error) {
46 erofsWriter := &Writer{
47 w: w,
48 fixDirectoryEntry: make(map[string][]direntFixupLocation),
49 pathInodeMeta: make(map[string]*uncompressedInodeMeta),
50 }
51 _, err := erofsWriter.allocateMetadata(1024+binary.Size(&superblock{}), 0)
52 if err != nil {
53 return nil, fmt.Errorf("cannot allocate first metadata block: %w", err)
54 }
55 if _, err := erofsWriter.w.Write(make([]byte, 1024)); err != nil { // Padding
56 return nil, fmt.Errorf("failed to write initial padding: %w", err)
57 }
58 if err := binary.Write(erofsWriter.w, binary.LittleEndian, &superblock{
59 Magic: Magic,
60 BlockSizeBits: blockSizeBits,
61 RootNodeNumber: 36, // 1024 (padding) + 128 (superblock) / 32, not eligible for fixup as different int size
62 }); err != nil {
63 return nil, fmt.Errorf("failed to write superblock: %w", err)
64 }
65 return erofsWriter, nil
66}
67
68// allocateMetadata allocates metadata space of size bytes with a given alignment and seeks to the first byte of the
69// newly-allocated metadata space. It also returns the position of that first byte.
70func (w *Writer) allocateMetadata(size int, alignment uint16) (int64, error) {
71 if size > BlockSize {
72 panic("cannot allocate a metadata object bigger than BlockSize bytes")
73 }
74 sizeU16 := uint16(size)
75 pos, ok := w.metadataBlocksFree.findBlock(sizeU16, 32)
76 if !ok {
77 blockNumber, err := w.allocateBlocks(1)
78 if err != nil {
79 return 0, fmt.Errorf("failed to allocate additional metadata space: %w", err)
80 }
81 w.metadataBlocksFree = append(w.metadataBlocksFree, metadataBlockMeta{blockNumber: blockNumber, freeBytes: BlockSize - sizeU16})
82 if _, err := w.w.Write(make([]byte, BlockSize)); err != nil {
83 return 0, fmt.Errorf("failed to write metadata: %w", err)
84 }
85 pos = int64(blockNumber) * BlockSize // Always aligned to BlockSize, bigger alignments are unsupported anyways
86 }
87 if _, err := w.w.Seek(pos, io.SeekStart); err != nil {
88 return 0, fmt.Errorf("cannot seek to existing metadata nid, likely misaligned meta write")
89 }
90 return pos, nil
91}
92
93// allocateBlocks allocates n new BlockSize-sized block and seeks to the beginning of the first newly-allocated block.
94// It also returns the first newly-allocated block number. The caller is expected to write these blocks completely
95// before calling allocateBlocks again.
96func (w *Writer) allocateBlocks(n uint32) (uint32, error) {
97 if _, err := w.w.Seek(int64(w.blockAllocatorIndex)*BlockSize, io.SeekStart); err != nil {
98 return 0, fmt.Errorf("cannot seek to end of last block, check write alignment: %w", err)
99 }
100 firstBlock := w.blockAllocatorIndex
101 w.blockAllocatorIndex += n
102 return firstBlock, nil
103}
104
105func (w *Writer) create(pathname string, inode Inode) *uncompressedInodeWriter {
106 i := &uncompressedInodeWriter{
107 writer: w,
108 inode: *inode.inode(),
109 legacyInodeNumber: w.legacyInodeIndex,
110 pathname: path.Clean(pathname),
111 }
112 w.legacyInodeIndex++
113 return i
114}
115
116// CreateFile adds a new file to the EROFS. It returns a WriteCloser to which the file contents should be written and
117// which then needs to be closed. The last writer obtained by calling CreateFile() needs to be closed first before
118// opening a new one. The given pathname needs to be referenced by a directory created using Create(), otherwise it will
119// not be accessible.
120func (w *Writer) CreateFile(pathname string, meta *FileMeta) io.WriteCloser {
121 return w.create(pathname, meta)
122}
123
124// Create adds a new non-file inode to the EROFS. This includes directories, device nodes, symlinks and FIFOs.
125// The first call to Create() needs to be with pathname "." and a directory inode.
126// The given pathname needs to be referenced by a directory, otherwise it will not be accessible (with the exception of
127// the directory ".").
128func (w *Writer) Create(pathname string, inode Inode) error {
129 iw := w.create(pathname, inode)
130 switch i := inode.(type) {
131 case *Directory:
132 if err := i.writeTo(iw); err != nil {
133 return fmt.Errorf("failed to write directory contents: %w", err)
134 }
135 case *SymbolicLink:
136 if err := i.writeTo(iw); err != nil {
137 return fmt.Errorf("failed to write symbolic link contents: %w", err)
138 }
139 }
140 return iw.Close()
141}
142
143// Close finishes writing an EROFS filesystem. Errors by this function need to be handled as they indicate if the
144// written filesystem is consistent (i.e. there are no directory entries pointing to nonexistent inodes).
145func (w *Writer) Close() error {
146 for targetPath, entries := range w.fixDirectoryEntry {
147 for _, entry := range entries {
148 targetMeta, ok := w.pathInodeMeta[targetPath]
149 if !ok {
150 return fmt.Errorf("failed to link filesystem tree: dangling reference to %v", targetPath)
151 }
152 if err := direntFixup(w.pathInodeMeta[entry.path], int64(entry.entryIndex), targetMeta); err != nil {
153 return err
154 }
155 }
156 }
157 return nil
158}
159
160// uncompressedInodeMeta tracks enough metadata about a written inode to be able to point dirents to it and to provide
161// a WriteSeeker into the inode itself.
162type uncompressedInodeMeta struct {
163 nid uint64
164 ftype uint8
165
166 // Physical placement metdata
167 blockStart int64
168 blockLength int64
169 inlineStart int64
170 inlineLength int64
171
172 writer *Writer
173 currentOffset int64
174}
175
176func (a *uncompressedInodeMeta) Seek(offset int64, whence int) (int64, error) {
177 switch whence {
178 case io.SeekCurrent:
179 break
180 case io.SeekStart:
181 a.currentOffset = 0
182 case io.SeekEnd:
183 a.currentOffset = a.blockLength + a.inlineLength
184 }
185 a.currentOffset += offset
186 return a.currentOffset, nil
187}
188
189func (a *uncompressedInodeMeta) Write(p []byte) (int, error) {
190 if a.currentOffset < a.blockLength {
191 // TODO(lorenz): Handle the special case where a directory inode is spread across multiple
192 // blocks (depending on other factors this occurs around ~200 direct children).
193 return 0, errors.New("relocating dirents in multi-block directory inodes is unimplemented")
194 }
195 if _, err := a.writer.w.Seek(a.inlineStart+a.currentOffset, io.SeekStart); err != nil {
196 return 0, err
197 }
198 a.currentOffset += int64(len(p))
199 return a.writer.w.Write(p)
200}
201
202type direntFixupLocation struct {
203 path string
204 entryIndex uint16
205}
206
207// direntFixup overrides nid and file type from the path the dirent is pointing to. The given iw is expected to be at
208// the start of the dirent inode to be fixed up.
209func direntFixup(iw io.WriteSeeker, entryIndex int64, meta *uncompressedInodeMeta) error {
210 if _, err := iw.Seek(entryIndex*12, io.SeekStart); err != nil {
211 return fmt.Errorf("failed to seek to dirent: %w", err)
212 }
213 if err := binary.Write(iw, binary.LittleEndian, meta.nid); err != nil {
214 return fmt.Errorf("failed to write nid: %w", err)
215 }
216 if _, err := iw.Seek(2, io.SeekCurrent); err != nil { // Skip NameStartOffset
217 return fmt.Errorf("failed to seek to dirent: %w", err)
218 }
219 if err := binary.Write(iw, binary.LittleEndian, meta.ftype); err != nil {
220 return fmt.Errorf("failed to write ftype: %w", err)
221 }
222 return nil
223}
224
225type metadataBlockMeta struct {
226 blockNumber uint32
227 freeBytes uint16
228}
229
230// metadataBlocksMeta contains metadata about all metadata blocks, most importantly the amount of free
231// bytes in each block. This is not a map for reproducibility (map ordering).
232type metadataBlocksMeta []metadataBlockMeta
233
234// findBlock returns the absolute position where `size` bytes with the specified alignment can still fit.
235// If there is not enough space in any metadata block it returns false as the second return value.
236func (m metadataBlocksMeta) findBlock(size uint16, alignment uint16) (int64, bool) {
237 for i, blockMeta := range m {
238 freeBytesAligned := blockMeta.freeBytes - (blockMeta.freeBytes % alignment)
239 if freeBytesAligned > size {
240 m[i] = metadataBlockMeta{
241 blockNumber: blockMeta.blockNumber,
242 freeBytes: freeBytesAligned - size,
243 }
244 pos := int64(blockMeta.blockNumber+1)*BlockSize - int64(freeBytesAligned)
245 return pos, true
246 }
247 }
248 return 0, false
249}
250
251var unixModeToFTMap = map[uint16]uint8{
252 unix.S_IFREG: fileTypeRegularFile,
253 unix.S_IFDIR: fileTypeDirectory,
254 unix.S_IFCHR: fileTypeCharacterDevice,
255 unix.S_IFBLK: fileTypeBlockDevice,
256 unix.S_IFIFO: fileTypeFIFO,
257 unix.S_IFSOCK: fileTypeSocket,
258 unix.S_IFLNK: fileTypeSymbolicLink,
259}
260
261// unixModeToFT maps a Unix file type to an EROFS file type.
262func unixModeToFT(mode uint16) uint8 {
263 return unixModeToFTMap[mode&unix.S_IFMT]
264}